correction to hashtag handling
Build Images and Deploy / Update-PROD-Stack (push) Successful in 1m21s

This commit is contained in:
2026-03-19 01:04:17 -04:00
parent ec275dd858
commit 64ae9c1082
+15 -11
View File
@@ -18,16 +18,15 @@ function extractMaxId(linkHeader: string | null): string | null {
} }
/** /**
* Extracts hashtag names from Mastodon HTML content as a supplement to post.tags. * Extracts hashtag names from Mastodon HTML content with original casing preserved.
* Mastodon renders hashtags as: #<span>TagName</span> inside an anchor. * Mastodon renders hashtags as: #<span>TagName</span> inside an anchor.
*/ */
function extractTagsFromHtml(html: string): string[] { function extractTagsFromHtml(html: string): string[] {
const results: string[] = [] const results: string[] = []
// Match: #<span>TagName</span>
const re = /#<span>([^<]+)<\/span>/gi const re = /#<span>([^<]+)<\/span>/gi
let m: RegExpExecArray | null let m: RegExpExecArray | null
while ((m = re.exec(html)) !== null) { while ((m = re.exec(html)) !== null) {
results.push(m[1].toLowerCase()) results.push(m[1]) // preserve original casing
} }
return results return results
} }
@@ -36,7 +35,7 @@ async function fetchPage(tag: string, maxId?: string): Promise<TimelineResult> {
const instance = process.env.MASTODON_INSTANCE const instance = process.env.MASTODON_INSTANCE
if (!instance) throw new Error('MASTODON_INSTANCE is not configured') if (!instance) throw new Error('MASTODON_INSTANCE is not configured')
let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}?limit=40` let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}`
if (maxId) url += `&max_id=${maxId}` if (maxId) url += `&max_id=${maxId}`
const headers: HeadersInit = { Accept: 'application/json' } const headers: HeadersInit = { Accept: 'application/json' }
@@ -123,15 +122,12 @@ export async function getPostsData(
postsPerHour = allPosts.length / (coveredMs / ONE_HOUR_MS) postsPerHour = allPosts.length / (coveredMs / ONE_HOUR_MS)
} }
// Count co-occurring tags — merge post.tags with tags parsed from HTML content // Count co-occurring tags from the API tags object (authoritative for membership)
const counts = new Map<string, number>() const counts = new Map<string, number>()
const lowerTag = tag.toLowerCase() const lowerTag = tag.toLowerCase()
for (const post of allPosts) { for (const post of allPosts) {
const fromApi = (post.tags ?? []).map((t) => t.name.toLowerCase()) for (const t of post.tags ?? []) {
const fromContent = extractTagsFromHtml(post.content) const name = t.name.toLowerCase()
// Union of both sources
const allTagNames = [...new Set([...fromApi, ...fromContent])]
for (const name of allTagNames) {
if (name !== lowerTag && name.length >= 2 && name.length <= 100) { if (name !== lowerTag && name.length >= 2 && name.length <= 100) {
counts.set(name, (counts.get(name) ?? 0) + 1) counts.set(name, (counts.get(name) ?? 0) + 1)
} }
@@ -143,14 +139,22 @@ export async function getPostsData(
.slice(0, 10) .slice(0, 10)
.map(([name]) => name) .map(([name]) => name)
// Derive the most common casing variant for the queried tag itself // Derive the most common casing variant for the queried tag itself.
// Merges post.tags (e.g. "JavaScript") with HTML-rendered span variants for better coverage.
const casingCounts = new Map<string, number>() const casingCounts = new Map<string, number>()
for (const post of allPosts) { for (const post of allPosts) {
// From API tags array
for (const t of post.tags ?? []) { for (const t of post.tags ?? []) {
if (t.name.toLowerCase() === lowerTag) { if (t.name.toLowerCase() === lowerTag) {
casingCounts.set(t.name, (casingCounts.get(t.name) ?? 0) + 1) casingCounts.set(t.name, (casingCounts.get(t.name) ?? 0) + 1)
} }
} }
// From HTML content (preserves original casing as typed by the user)
for (const variant of extractTagsFromHtml(post.content)) {
if (variant.toLowerCase() === lowerTag) {
casingCounts.set(variant, (casingCounts.get(variant) ?? 0) + 1)
}
}
} }
let displayTag: string | undefined let displayTag: string | undefined
if (casingCounts.size > 0) { if (casingCounts.size > 0) {