redefine logic to try and get a stable price with unstable timeline
Build Images and Deploy / Update-PROD-Stack (push) Successful in 1m24s
Build Images and Deploy / Update-PROD-Stack (push) Successful in 1m24s
This commit is contained in:
+43
-14
@@ -70,14 +70,27 @@ export async function getPostsPerHour(tag: string): Promise<number> {
|
|||||||
* Returns posts-per-hour AND a sorted list of co-occurring tag names
|
* Returns posts-per-hour AND a sorted list of co-occurring tag names
|
||||||
* (lowercased, excluding the queried tag itself).
|
* (lowercased, excluding the queried tag itself).
|
||||||
*
|
*
|
||||||
* Strategy:
|
* Pagination strategy:
|
||||||
* - Paginate until we have at least one post older than 1 hour (a complete picture),
|
* - Fetch pages until the oldest post in a batch falls before the 1-hour cutoff
|
||||||
* OR we exhaust the timeline, OR we hit MAX_PAGES_PER_HASHTAG.
|
* (the horizon), OR the timeline is exhausted, OR MAX_PAGES_PER_HASHTAG is reached.
|
||||||
* - Oldest post >= 1 hour old: count posts in the last hour directly (full window).
|
* - When we first cross the horizon, keep fetching additional pages as long as each
|
||||||
* - Hit the page cap (burst): more posts exist beyond what we fetched — extrapolate from
|
* new page contributes at least one post within the cutoff. Only stop when a page
|
||||||
* the covered span (postsPerHour = count / coveredHours).
|
* adds nothing new to the in-window count — at that point the window is stable.
|
||||||
* - Timeline exhausted (sparse): these are all the posts that exist — use the raw count.
|
* This handles out-of-order federation: Mastodon timelines are ordered by post ID
|
||||||
* Extrapolating would artificially inflate a tag with 3 posts clustered in 10 minutes.
|
* (local receive time), not created_at. A post authored at 10:45 on a remote server
|
||||||
|
* may arrive at 11:05, get a recent ID and appear near the top of the stream — but
|
||||||
|
* its created_at is in the past/out of order. Continuing until the count stabilises
|
||||||
|
* ensures all such late-arriving posts are captured regardless of how many pages
|
||||||
|
* they span.
|
||||||
|
* - After collecting all pages, sort by created_at and filter to the last hour for an
|
||||||
|
* accurate count regardless of any remaining ordering noise.
|
||||||
|
*
|
||||||
|
* PPH calculation:
|
||||||
|
* - Crossed horizon (direct): we have a full window — count posts with created_at >= cutoff.
|
||||||
|
* - Hit page cap without crossing (burst): more posts exist beyond what we fetched —
|
||||||
|
* extrapolate from the covered time span (count / coveredHours).
|
||||||
|
* - Timeline exhausted without crossing (sparse): all posts in the last hour are accounted
|
||||||
|
* for — use the raw count directly (no extrapolation).
|
||||||
*/
|
*/
|
||||||
export async function getPostsData(
|
export async function getPostsData(
|
||||||
tag: string,
|
tag: string,
|
||||||
@@ -91,8 +104,15 @@ export async function getPostsData(
|
|||||||
let allPosts: MastodonPost[] = []
|
let allPosts: MastodonPost[] = []
|
||||||
let maxId: string | undefined
|
let maxId: string | undefined
|
||||||
let hitPageCap = false
|
let hitPageCap = false
|
||||||
|
let crossedHorizon = false
|
||||||
|
|
||||||
for (let page = 0; page < maxPages; page++) {
|
for (let page = 0; page < maxPages; page++) {
|
||||||
|
// Once we've crossed the horizon, snapshot the in-window count before this fetch
|
||||||
|
// so we can detect whether the page contributed anything new.
|
||||||
|
const inWindowBefore = crossedHorizon
|
||||||
|
? allPosts.filter((p) => new Date(p.created_at).getTime() >= cutoff).length
|
||||||
|
: 0
|
||||||
|
|
||||||
const { posts, nextMaxId } = await fetchPage(tag, maxId, postLimit)
|
const { posts, nextMaxId } = await fetchPage(tag, maxId, postLimit)
|
||||||
|
|
||||||
if (posts.length === 0) break
|
if (posts.length === 0) break
|
||||||
@@ -101,21 +121,30 @@ export async function getPostsData(
|
|||||||
// End of timeline or no more pages
|
// End of timeline or no more pages
|
||||||
if (posts.length < postLimit || !nextMaxId) break
|
if (posts.length < postLimit || !nextMaxId) break
|
||||||
|
|
||||||
// If the oldest post in this batch is already beyond 1 hour, we have a full window
|
if (crossedHorizon) {
|
||||||
|
// Keep fetching while this page added new in-window posts; stop when count stabilises
|
||||||
|
const inWindowAfter = allPosts.filter((p) => new Date(p.created_at).getTime() >= cutoff).length
|
||||||
|
if (inWindowAfter === inWindowBefore) break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this batch first crosses the 1-hour horizon
|
||||||
const oldestInBatch = Math.min(...posts.map((p) => new Date(p.created_at).getTime()))
|
const oldestInBatch = Math.min(...posts.map((p) => new Date(p.created_at).getTime()))
|
||||||
if (oldestInBatch < cutoff) break
|
if (oldestInBatch < cutoff) crossedHorizon = true
|
||||||
|
|
||||||
maxId = nextMaxId
|
maxId = nextMaxId
|
||||||
|
|
||||||
// Mark if we completed the final allowed page without breaking
|
// Only mark as hit-cap when we never found old enough data (true burst scenario)
|
||||||
if (page === maxPages - 1) hitPageCap = true
|
if (page === maxPages - 1 && !crossedHorizon) hitPageCap = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allPosts.length === 0) return { postsPerHour: 0, relatedTags: [], hasAnyPosts: false }
|
if (allPosts.length === 0) return { postsPerHour: 0, relatedTags: [], hasAnyPosts: false }
|
||||||
|
|
||||||
|
// Sort globally by created_at so the window filter is accurate regardless of federation order
|
||||||
|
allPosts.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime())
|
||||||
|
|
||||||
const times = allPosts.map((p) => new Date(p.created_at).getTime())
|
const times = allPosts.map((p) => new Date(p.created_at).getTime())
|
||||||
const newestMs = Math.max(...times)
|
const newestMs = times[0]
|
||||||
const oldestMs = Math.min(...times)
|
const oldestMs = times[times.length - 1]
|
||||||
|
|
||||||
let postsPerHour: number
|
let postsPerHour: number
|
||||||
if (oldestMs < cutoff) {
|
if (oldestMs < cutoff) {
|
||||||
|
|||||||
Reference in New Issue
Block a user