Contains: - db.go: Database connection wrapper with helper methods - models.go: Domain, Feed, Item, ShortURL, Click structs - util.go: URL normalization, TLD functions, search helpers - handle.go: AT Protocol handle derivation from feed URLs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
179 lines
6.5 KiB
Go
179 lines
6.5 KiB
Go
package shared
|
|
|
|
import (
|
|
"time"
|
|
)
|
|
|
|
// Domain represents a host to process for feeds
|
|
// Status: hold (pending review), pass (approved), skip (not processing), dead (retired TLD)
|
|
// CrawledAt: zero time = needs domain_check, +1 sec = needs feed_crawl, real time = done
|
|
type Domain struct {
|
|
Host string `json:"host"`
|
|
Status string `json:"status"`
|
|
CrawledAt time.Time `json:"crawled_at"`
|
|
FeedsFound int `json:"feeds_found,omitempty"`
|
|
LastError string `json:"last_error,omitempty"`
|
|
TLD string `json:"tld,omitempty"`
|
|
MissCount int `json:"miss_count,omitempty"`
|
|
}
|
|
|
|
// MissCountThreshold is the number of consecutive errors before setting status to hold
|
|
const MissCountThreshold = 100
|
|
|
|
// Sentinel values for domain processing state
|
|
var (
|
|
DomainStateUnchecked = time.Time{} // 0001-01-01 00:00:00 - needs domain_check
|
|
DomainStateChecked = time.Time{}.Add(time.Second) // 0001-01-01 00:00:01 - needs feed_crawl
|
|
)
|
|
|
|
// FullHost returns the complete hostname (host + tld)
|
|
func (d *Domain) FullHost() string {
|
|
return FullHost(d.Host, d.TLD)
|
|
}
|
|
|
|
// Feed represents a discovered RSS/Atom feed with metadata
|
|
type Feed struct {
|
|
URL string `json:"url"`
|
|
Type string `json:"type"` // "rss", "atom", "json", or "unknown"
|
|
Category string `json:"category"` // "main", "comments", "category", "author", "article", "podcast"
|
|
Title string `json:"title,omitempty"`
|
|
Description string `json:"description,omitempty"`
|
|
Language string `json:"language,omitempty"`
|
|
SiteURL string `json:"site_url,omitempty"` // The website the feed belongs to
|
|
|
|
// Timing
|
|
DiscoveredAt time.Time `json:"discovered_at"`
|
|
LastCheckedAt time.Time `json:"last_checked_at,omitempty"` // feed_check: when last checked
|
|
NextCheckAt time.Time `json:"next_check_at,omitempty"` // feed_check: when to next check
|
|
LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated
|
|
|
|
// Cache headers for conditional requests
|
|
ETag string `json:"etag,omitempty"`
|
|
LastModified string `json:"last_modified,omitempty"`
|
|
|
|
// Health tracking
|
|
Status string `json:"status"` // "pass", "hold", "skip"
|
|
LastError string `json:"last_error,omitempty"`
|
|
LastErrorAt time.Time `json:"last_error_at,omitempty"`
|
|
|
|
// Discovery source
|
|
SourceURL string `json:"source_url,omitempty"`
|
|
DomainHost string `json:"domain_host,omitempty"`
|
|
DomainTLD string `json:"domain_tld,omitempty"`
|
|
|
|
// Content stats
|
|
ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check
|
|
OldestItemDate time.Time `json:"oldest_item_date,omitempty"`
|
|
NewestItemDate time.Time `json:"newest_item_date,omitempty"`
|
|
|
|
// Adaptive check interval
|
|
NoUpdate int `json:"no_update"` // Consecutive checks with no change
|
|
|
|
// Publishing to PDS
|
|
PublishStatus string `json:"publish_status"` // "hold", "pass", "skip"
|
|
PublishAccount string `json:"publish_account,omitempty"` // e.g., "news.ycombinator.com.1440.news"
|
|
}
|
|
|
|
// Enclosure represents a media attachment (audio, video, image)
|
|
type Enclosure struct {
|
|
URL string `json:"url"`
|
|
Type string `json:"type"` // MIME type (audio/mpeg, image/jpeg, etc.)
|
|
Length int64 `json:"length"` // Size in bytes
|
|
}
|
|
|
|
// Item represents an individual entry/article from a feed
|
|
type Item struct {
|
|
FeedURL string `json:"feed_url"`
|
|
GUID string `json:"guid,omitempty"`
|
|
Title string `json:"title,omitempty"`
|
|
Link string `json:"link,omitempty"`
|
|
Description string `json:"description,omitempty"`
|
|
Content string `json:"content,omitempty"`
|
|
Author string `json:"author,omitempty"`
|
|
PubDate time.Time `json:"pub_date,omitempty"`
|
|
DiscoveredAt time.Time `json:"discovered_at"`
|
|
UpdatedAt time.Time `json:"updated_at,omitempty"`
|
|
|
|
// Media attachments
|
|
Enclosure *Enclosure `json:"enclosure,omitempty"` // Primary enclosure (podcast audio, etc.)
|
|
ImageURLs []string `json:"image_urls,omitempty"` // Image URLs extracted from content
|
|
Tags []string `json:"tags,omitempty"` // Category/tag strings from feed
|
|
|
|
// Publishing to PDS
|
|
PublishedAt time.Time `json:"published_at,omitempty"`
|
|
PublishedUri string `json:"published_uri,omitempty"`
|
|
}
|
|
|
|
// ShortURL represents a shortened URL mapping
|
|
type ShortURL struct {
|
|
Code string `json:"code"`
|
|
OriginalURL string `json:"original_url"`
|
|
ItemGUID string `json:"item_guid,omitempty"`
|
|
FeedURL string `json:"feed_url,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
ClickCount int `json:"click_count"`
|
|
}
|
|
|
|
// Click represents a click event on a short URL
|
|
type Click struct {
|
|
ID int64 `json:"id"`
|
|
ShortCode string `json:"short_code"`
|
|
ClickedAt time.Time `json:"clicked_at"`
|
|
Referrer string `json:"referrer,omitempty"`
|
|
UserAgent string `json:"user_agent,omitempty"`
|
|
IPHash string `json:"ip_hash,omitempty"`
|
|
Country string `json:"country,omitempty"`
|
|
}
|
|
|
|
// DashboardStats holds all statistics for the dashboard
|
|
type DashboardStats struct {
|
|
// Domain stats
|
|
TotalDomains int `json:"total_domains"`
|
|
HoldDomains int `json:"hold_domains"`
|
|
PassDomains int `json:"pass_domains"`
|
|
SkipDomains int `json:"skip_domains"`
|
|
DeadDomains int `json:"dead_domains"`
|
|
|
|
// Feed stats
|
|
TotalFeeds int `json:"total_feeds"`
|
|
AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds)
|
|
PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing)
|
|
SkipFeeds int `json:"skip_feeds"`
|
|
HoldFeeds int `json:"hold_feeds"`
|
|
DeadFeeds int `json:"dead_feeds"`
|
|
EmptyFeeds int `json:"empty_feeds"`
|
|
RSSFeeds int `json:"rss_feeds"`
|
|
AtomFeeds int `json:"atom_feeds"`
|
|
JSONFeeds int `json:"json_feeds"`
|
|
UnknownFeeds int `json:"unknown_feeds"`
|
|
|
|
// Processing rates (per minute)
|
|
DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count
|
|
DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute
|
|
FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute
|
|
FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute
|
|
|
|
// Timing
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
// TLDStat holds TLD statistics
|
|
type TLDStat struct {
|
|
TLD string `json:"tld"`
|
|
Count int `json:"count"`
|
|
}
|
|
|
|
// DomainStat holds domain statistics
|
|
type DomainStat struct {
|
|
Host string `json:"host"`
|
|
FeedsFound int `json:"feeds_found"`
|
|
}
|
|
|
|
// FeedInfo holds basic feed metadata for profile setup
|
|
type FeedInfo struct {
|
|
Title string
|
|
Description string
|
|
SiteURL string
|
|
SourceHost string
|
|
}
|