correction to hashtag handling
Build Images and Deploy / Update-PROD-Stack (push) Successful in 1m21s

This commit is contained in:
2026-03-19 01:04:17 -04:00
parent ec275dd858
commit 64ae9c1082
+15 -11
View File
@@ -18,16 +18,15 @@ function extractMaxId(linkHeader: string | null): string | null {
}
/**
* Extracts hashtag names from Mastodon HTML content as a supplement to post.tags.
* Extracts hashtag names from Mastodon HTML content with original casing preserved.
* Mastodon renders hashtags as: #<span>TagName</span> inside an anchor.
*/
function extractTagsFromHtml(html: string): string[] {
const results: string[] = []
// Match: #<span>TagName</span>
const re = /#<span>([^<]+)<\/span>/gi
let m: RegExpExecArray | null
while ((m = re.exec(html)) !== null) {
results.push(m[1].toLowerCase())
results.push(m[1]) // preserve original casing
}
return results
}
@@ -36,7 +35,7 @@ async function fetchPage(tag: string, maxId?: string): Promise<TimelineResult> {
const instance = process.env.MASTODON_INSTANCE
if (!instance) throw new Error('MASTODON_INSTANCE is not configured')
let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}?limit=40`
let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}`
if (maxId) url += `&max_id=${maxId}`
const headers: HeadersInit = { Accept: 'application/json' }
@@ -123,15 +122,12 @@ export async function getPostsData(
postsPerHour = allPosts.length / (coveredMs / ONE_HOUR_MS)
}
// Count co-occurring tags — merge post.tags with tags parsed from HTML content
// Count co-occurring tags from the API tags object (authoritative for membership)
const counts = new Map<string, number>()
const lowerTag = tag.toLowerCase()
for (const post of allPosts) {
const fromApi = (post.tags ?? []).map((t) => t.name.toLowerCase())
const fromContent = extractTagsFromHtml(post.content)
// Union of both sources
const allTagNames = [...new Set([...fromApi, ...fromContent])]
for (const name of allTagNames) {
for (const t of post.tags ?? []) {
const name = t.name.toLowerCase()
if (name !== lowerTag && name.length >= 2 && name.length <= 100) {
counts.set(name, (counts.get(name) ?? 0) + 1)
}
@@ -143,14 +139,22 @@ export async function getPostsData(
.slice(0, 10)
.map(([name]) => name)
// Derive the most common casing variant for the queried tag itself
// Derive the most common casing variant for the queried tag itself.
// Merges post.tags (e.g. "JavaScript") with HTML-rendered span variants for better coverage.
const casingCounts = new Map<string, number>()
for (const post of allPosts) {
// From API tags array
for (const t of post.tags ?? []) {
if (t.name.toLowerCase() === lowerTag) {
casingCounts.set(t.name, (casingCounts.get(t.name) ?? 0) + 1)
}
}
// From HTML content (preserves original casing as typed by the user)
for (const variant of extractTagsFromHtml(post.content)) {
if (variant.toLowerCase() === lowerTag) {
casingCounts.set(variant, (casingCounts.get(variant) ?? 0) + 1)
}
}
}
let displayTag: string | undefined
if (casingCounts.size > 0) {