correction to hashtag handling
Build Images and Deploy / Update-PROD-Stack (push) Successful in 1m21s
Build Images and Deploy / Update-PROD-Stack (push) Successful in 1m21s
This commit is contained in:
+15
-11
@@ -18,16 +18,15 @@ function extractMaxId(linkHeader: string | null): string | null {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts hashtag names from Mastodon HTML content as a supplement to post.tags.
|
* Extracts hashtag names from Mastodon HTML content with original casing preserved.
|
||||||
* Mastodon renders hashtags as: #<span>TagName</span> inside an anchor.
|
* Mastodon renders hashtags as: #<span>TagName</span> inside an anchor.
|
||||||
*/
|
*/
|
||||||
function extractTagsFromHtml(html: string): string[] {
|
function extractTagsFromHtml(html: string): string[] {
|
||||||
const results: string[] = []
|
const results: string[] = []
|
||||||
// Match: #<span>TagName</span>
|
|
||||||
const re = /#<span>([^<]+)<\/span>/gi
|
const re = /#<span>([^<]+)<\/span>/gi
|
||||||
let m: RegExpExecArray | null
|
let m: RegExpExecArray | null
|
||||||
while ((m = re.exec(html)) !== null) {
|
while ((m = re.exec(html)) !== null) {
|
||||||
results.push(m[1].toLowerCase())
|
results.push(m[1]) // preserve original casing
|
||||||
}
|
}
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
@@ -36,7 +35,7 @@ async function fetchPage(tag: string, maxId?: string): Promise<TimelineResult> {
|
|||||||
const instance = process.env.MASTODON_INSTANCE
|
const instance = process.env.MASTODON_INSTANCE
|
||||||
if (!instance) throw new Error('MASTODON_INSTANCE is not configured')
|
if (!instance) throw new Error('MASTODON_INSTANCE is not configured')
|
||||||
|
|
||||||
let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}?limit=40`
|
let url = `${instance}/api/v1/timelines/tag/${encodeURIComponent(tag)}`
|
||||||
if (maxId) url += `&max_id=${maxId}`
|
if (maxId) url += `&max_id=${maxId}`
|
||||||
|
|
||||||
const headers: HeadersInit = { Accept: 'application/json' }
|
const headers: HeadersInit = { Accept: 'application/json' }
|
||||||
@@ -123,15 +122,12 @@ export async function getPostsData(
|
|||||||
postsPerHour = allPosts.length / (coveredMs / ONE_HOUR_MS)
|
postsPerHour = allPosts.length / (coveredMs / ONE_HOUR_MS)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count co-occurring tags — merge post.tags with tags parsed from HTML content
|
// Count co-occurring tags from the API tags object (authoritative for membership)
|
||||||
const counts = new Map<string, number>()
|
const counts = new Map<string, number>()
|
||||||
const lowerTag = tag.toLowerCase()
|
const lowerTag = tag.toLowerCase()
|
||||||
for (const post of allPosts) {
|
for (const post of allPosts) {
|
||||||
const fromApi = (post.tags ?? []).map((t) => t.name.toLowerCase())
|
for (const t of post.tags ?? []) {
|
||||||
const fromContent = extractTagsFromHtml(post.content)
|
const name = t.name.toLowerCase()
|
||||||
// Union of both sources
|
|
||||||
const allTagNames = [...new Set([...fromApi, ...fromContent])]
|
|
||||||
for (const name of allTagNames) {
|
|
||||||
if (name !== lowerTag && name.length >= 2 && name.length <= 100) {
|
if (name !== lowerTag && name.length >= 2 && name.length <= 100) {
|
||||||
counts.set(name, (counts.get(name) ?? 0) + 1)
|
counts.set(name, (counts.get(name) ?? 0) + 1)
|
||||||
}
|
}
|
||||||
@@ -143,14 +139,22 @@ export async function getPostsData(
|
|||||||
.slice(0, 10)
|
.slice(0, 10)
|
||||||
.map(([name]) => name)
|
.map(([name]) => name)
|
||||||
|
|
||||||
// Derive the most common casing variant for the queried tag itself
|
// Derive the most common casing variant for the queried tag itself.
|
||||||
|
// Merges post.tags (e.g. "JavaScript") with HTML-rendered span variants for better coverage.
|
||||||
const casingCounts = new Map<string, number>()
|
const casingCounts = new Map<string, number>()
|
||||||
for (const post of allPosts) {
|
for (const post of allPosts) {
|
||||||
|
// From API tags array
|
||||||
for (const t of post.tags ?? []) {
|
for (const t of post.tags ?? []) {
|
||||||
if (t.name.toLowerCase() === lowerTag) {
|
if (t.name.toLowerCase() === lowerTag) {
|
||||||
casingCounts.set(t.name, (casingCounts.get(t.name) ?? 0) + 1)
|
casingCounts.set(t.name, (casingCounts.get(t.name) ?? 0) + 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// From HTML content (preserves original casing as typed by the user)
|
||||||
|
for (const variant of extractTagsFromHtml(post.content)) {
|
||||||
|
if (variant.toLowerCase() === lowerTag) {
|
||||||
|
casingCounts.set(variant, (casingCounts.get(variant) ?? 0) + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let displayTag: string | undefined
|
let displayTag: string | undefined
|
||||||
if (casingCounts.size > 0) {
|
if (casingCounts.size > 0) {
|
||||||
|
|||||||
Reference in New Issue
Block a user