redefine logic to try and get a stable price with unstable timeline
Build Images and Deploy / Update-PROD-Stack (push) Successful in 1m24s

This commit is contained in:
2026-03-21 14:19:11 -04:00
parent 997f1041f0
commit e067d3f5c7
+43 -14
View File
@@ -70,14 +70,27 @@ export async function getPostsPerHour(tag: string): Promise<number> {
* Returns posts-per-hour AND a sorted list of co-occurring tag names
* (lowercased, excluding the queried tag itself).
*
* Strategy:
* - Paginate until we have at least one post older than 1 hour (a complete picture),
* OR we exhaust the timeline, OR we hit MAX_PAGES_PER_HASHTAG.
* - Oldest post >= 1 hour old: count posts in the last hour directly (full window).
* - Hit the page cap (burst): more posts exist beyond what we fetched — extrapolate from
* the covered span (postsPerHour = count / coveredHours).
* - Timeline exhausted (sparse): these are all the posts that exist — use the raw count.
* Extrapolating would artificially inflate a tag with 3 posts clustered in 10 minutes.
* Pagination strategy:
* - Fetch pages until the oldest post in a batch falls before the 1-hour cutoff
* (the horizon), OR the timeline is exhausted, OR MAX_PAGES_PER_HASHTAG is reached.
* - When we first cross the horizon, keep fetching additional pages as long as each
* new page contributes at least one post within the cutoff. Only stop when a page
* adds nothing new to the in-window count — at that point the window is stable.
* This handles out-of-order federation: Mastodon timelines are ordered by post ID
* (local receive time), not created_at. A post authored at 10:45 on a remote server
* may arrive at 11:05, get a recent ID and appear near the top of the stream — but
* its created_at is in the past/out of order. Continuing until the count stabilises
* ensures all such late-arriving posts are captured regardless of how many pages
* they span.
* - After collecting all pages, sort by created_at and filter to the last hour for an
* accurate count regardless of any remaining ordering noise.
*
* PPH calculation:
* - Crossed horizon (direct): we have a full window — count posts with created_at >= cutoff.
* - Hit page cap without crossing (burst): more posts exist beyond what we fetched —
* extrapolate from the covered time span (count / coveredHours).
* - Timeline exhausted without crossing (sparse): all posts in the last hour are accounted
* for — use the raw count directly (no extrapolation).
*/
export async function getPostsData(
tag: string,
@@ -91,8 +104,15 @@ export async function getPostsData(
let allPosts: MastodonPost[] = []
let maxId: string | undefined
let hitPageCap = false
let crossedHorizon = false
for (let page = 0; page < maxPages; page++) {
// Once we've crossed the horizon, snapshot the in-window count before this fetch
// so we can detect whether the page contributed anything new.
const inWindowBefore = crossedHorizon
? allPosts.filter((p) => new Date(p.created_at).getTime() >= cutoff).length
: 0
const { posts, nextMaxId } = await fetchPage(tag, maxId, postLimit)
if (posts.length === 0) break
@@ -101,21 +121,30 @@ export async function getPostsData(
// End of timeline or no more pages
if (posts.length < postLimit || !nextMaxId) break
// If the oldest post in this batch is already beyond 1 hour, we have a full window
if (crossedHorizon) {
// Keep fetching while this page added new in-window posts; stop when count stabilises
const inWindowAfter = allPosts.filter((p) => new Date(p.created_at).getTime() >= cutoff).length
if (inWindowAfter === inWindowBefore) break
}
// Check if this batch first crosses the 1-hour horizon
const oldestInBatch = Math.min(...posts.map((p) => new Date(p.created_at).getTime()))
if (oldestInBatch < cutoff) break
if (oldestInBatch < cutoff) crossedHorizon = true
maxId = nextMaxId
// Mark if we completed the final allowed page without breaking
if (page === maxPages - 1) hitPageCap = true
// Only mark as hit-cap when we never found old enough data (true burst scenario)
if (page === maxPages - 1 && !crossedHorizon) hitPageCap = true
}
if (allPosts.length === 0) return { postsPerHour: 0, relatedTags: [], hasAnyPosts: false }
// Sort globally by created_at so the window filter is accurate regardless of federation order
allPosts.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime())
const times = allPosts.map((p) => new Date(p.created_at).getTime())
const newestMs = Math.max(...times)
const oldestMs = Math.min(...times)
const newestMs = times[0]
const oldestMs = times[times.length - 1]
let postsPerHour: number
if (oldestMs < cutoff) {