diff --git a/src/lib/mastodon.ts b/src/lib/mastodon.ts index 757f6bf..77444eb 100644 --- a/src/lib/mastodon.ts +++ b/src/lib/mastodon.ts @@ -18,16 +18,15 @@ function extractMaxId(linkHeader: string | null): string | null { } /** - * Extracts hashtag names from Mastodon HTML content as a supplement to post.tags. + * Extracts hashtag names from Mastodon HTML content with original casing preserved. * Mastodon renders hashtags as: #TagName inside an anchor. */ function extractTagsFromHtml(html: string): string[] { const results: string[] = [] - // Match: #TagName const re = /#([^<]+)<\/span>/gi let m: RegExpExecArray | null while ((m = re.exec(html)) !== null) { - results.push(m[1].toLowerCase()) + results.push(m[1]) // preserve original casing } return results } @@ -36,7 +35,7 @@ async function fetchPage(tag: string, maxId?: string): Promise { const instance = process.env.MASTODON_INSTANCE if (!instance) throw new Error('MASTODON_INSTANCE is not configured') - let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}?limit=40` + let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}` if (maxId) url += `&max_id=${maxId}` const headers: HeadersInit = { Accept: 'application/json' } @@ -123,15 +122,12 @@ export async function getPostsData( postsPerHour = allPosts.length / (coveredMs / ONE_HOUR_MS) } - // Count co-occurring tags — merge post.tags with tags parsed from HTML content + // Count co-occurring tags from the API tags object (authoritative for membership) const counts = new Map() const lowerTag = tag.toLowerCase() for (const post of allPosts) { - const fromApi = (post.tags ?? []).map((t) => t.name.toLowerCase()) - const fromContent = extractTagsFromHtml(post.content) - // Union of both sources - const allTagNames = [...new Set([...fromApi, ...fromContent])] - for (const name of allTagNames) { + for (const t of post.tags ?? []) { + const name = t.name.toLowerCase() if (name !== lowerTag && name.length >= 2 && name.length <= 100) { counts.set(name, (counts.get(name) ?? 0) + 1) } @@ -143,14 +139,22 @@ export async function getPostsData( .slice(0, 10) .map(([name]) => name) - // Derive the most common casing variant for the queried tag itself + // Derive the most common casing variant for the queried tag itself. + // Merges post.tags (e.g. "JavaScript") with HTML-rendered span variants for better coverage. const casingCounts = new Map() for (const post of allPosts) { + // From API tags array for (const t of post.tags ?? []) { if (t.name.toLowerCase() === lowerTag) { casingCounts.set(t.name, (casingCounts.get(t.name) ?? 0) + 1) } } + // From HTML content (preserves original casing as typed by the user) + for (const variant of extractTagsFromHtml(post.content)) { + if (variant.toLowerCase() === lowerTag) { + casingCounts.set(variant, (casingCounts.get(variant) ?? 0) + 1) + } + } } let displayTag: string | undefined if (casingCounts.size > 0) {