package commons import ( "time" ) // Domain represents a host to process for feeds // Status: hold (pending review), pass (approved), skip (not processing), dead (retired TLD) // CrawledAt: zero time = needs domain_check, +1 sec = needs feed_crawl, real time = done type Domain struct { Host string `json:"host"` Status string `json:"status"` CrawledAt time.Time `json:"crawled_at"` FeedsFound int `json:"feeds_found,omitempty"` LastError string `json:"last_error,omitempty"` TLD string `json:"tld,omitempty"` MissCount int `json:"miss_count,omitempty"` } // MissCountThreshold is the number of consecutive errors before setting status to hold const MissCountThreshold = 100 // Sentinel values for domain processing state var ( DomainStateUnchecked = time.Time{} // 0001-01-01 00:00:00 - needs domain_check DomainStateChecked = time.Time{}.Add(time.Second) // 0001-01-01 00:00:01 - needs feed_crawl ) // FullHost returns the complete hostname (host + tld) func (d *Domain) FullHost() string { return FullHost(d.Host, d.TLD) } // Feed represents a discovered RSS/Atom feed with metadata type Feed struct { URL string `json:"url"` Type string `json:"type"` // "rss", "atom", "json", or "unknown" Category string `json:"category"` // "main", "comments", "category", "author", "article", "podcast" Title string `json:"title,omitempty"` Description string `json:"description,omitempty"` Language string `json:"language,omitempty"` SiteURL string `json:"site_url,omitempty"` // The website the feed belongs to // Timing DiscoveredAt time.Time `json:"discovered_at"` LastCheckedAt time.Time `json:"last_checked_at,omitempty"` // feed_check: when last checked NextCheckAt time.Time `json:"next_check_at,omitempty"` // feed_check: when to next check LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated // Cache headers for conditional requests ETag string `json:"etag,omitempty"` LastModified string `json:"last_modified,omitempty"` // Health tracking Status string `json:"status"` // "pass", "hold", "skip" LastError string `json:"last_error,omitempty"` LastErrorAt time.Time `json:"last_error_at,omitempty"` // Discovery source SourceURL string `json:"source_url,omitempty"` DomainHost string `json:"domain_host,omitempty"` DomainTLD string `json:"domain_tld,omitempty"` // Content stats ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check OldestItemDate time.Time `json:"oldest_item_date,omitempty"` NewestItemDate time.Time `json:"newest_item_date,omitempty"` // Adaptive check interval NoUpdate int `json:"no_update"` // Consecutive checks with no change // Publishing to PDS PublishStatus string `json:"publish_status"` // "hold", "pass", "skip" PublishAccount string `json:"publish_account,omitempty"` // e.g., "news.ycombinator.com.1440.news" } // Enclosure represents a media attachment (audio, video, image) type Enclosure struct { URL string `json:"url"` Type string `json:"type"` // MIME type (audio/mpeg, image/jpeg, etc.) Length int64 `json:"length"` // Size in bytes } // Item represents an individual entry/article from a feed type Item struct { FeedURL string `json:"feed_url"` GUID string `json:"guid,omitempty"` Title string `json:"title,omitempty"` Link string `json:"link,omitempty"` Description string `json:"description,omitempty"` Content string `json:"content,omitempty"` Author string `json:"author,omitempty"` PubDate time.Time `json:"pub_date,omitempty"` DiscoveredAt time.Time `json:"discovered_at"` UpdatedAt time.Time `json:"updated_at,omitempty"` // Media attachments Enclosure *Enclosure `json:"enclosure,omitempty"` // Primary enclosure (podcast audio, etc.) ImageURLs []string `json:"image_urls,omitempty"` // Image URLs extracted from content Tags []string `json:"tags,omitempty"` // Category/tag strings from feed // Publishing to PDS PublishedAt time.Time `json:"published_at,omitempty"` PublishedUri string `json:"published_uri,omitempty"` } // ShortURL represents a shortened URL mapping type ShortURL struct { Code string `json:"code"` OriginalURL string `json:"original_url"` ItemGUID string `json:"item_guid,omitempty"` FeedURL string `json:"feed_url,omitempty"` CreatedAt time.Time `json:"created_at"` ClickCount int `json:"click_count"` } // Click represents a click event on a short URL type Click struct { ID int64 `json:"id"` ShortCode string `json:"short_code"` ClickedAt time.Time `json:"clicked_at"` Referrer string `json:"referrer,omitempty"` UserAgent string `json:"user_agent,omitempty"` IPHash string `json:"ip_hash,omitempty"` Country string `json:"country,omitempty"` } // DashboardStats holds all statistics for the dashboard type DashboardStats struct { // Domain stats TotalDomains int `json:"total_domains"` HoldDomains int `json:"hold_domains"` PassDomains int `json:"pass_domains"` SkipDomains int `json:"skip_domains"` DeadDomains int `json:"dead_domains"` // Feed stats TotalFeeds int `json:"total_feeds"` AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds) PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing) SkipFeeds int `json:"skip_feeds"` HoldFeeds int `json:"hold_feeds"` DeadFeeds int `json:"dead_feeds"` EmptyFeeds int `json:"empty_feeds"` RSSFeeds int `json:"rss_feeds"` AtomFeeds int `json:"atom_feeds"` JSONFeeds int `json:"json_feeds"` UnknownFeeds int `json:"unknown_feeds"` // Processing rates (per minute) DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute // Timing UpdatedAt time.Time `json:"updated_at"` } // TLDStat holds TLD statistics type TLDStat struct { TLD string `json:"tld"` Count int `json:"count"` } // DomainStat holds domain statistics type DomainStat struct { Host string `json:"host"` FeedsFound int `json:"feeds_found"` } // FeedInfo holds basic feed metadata for profile setup type FeedInfo struct { Title string Description string SiteURL string SourceHost string }