Files
commons/models.go
primal 1ff14ee957 Initial commit: shared Go module for 1440.news services
Contains:
- db.go: Database connection wrapper with helper methods
- models.go: Domain, Feed, Item, ShortURL, Click structs
- util.go: URL normalization, TLD functions, search helpers
- handle.go: AT Protocol handle derivation from feed URLs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 11:19:04 -05:00

179 lines
6.5 KiB
Go

package shared
import (
"time"
)
// Domain represents a host to process for feeds
// Status: hold (pending review), pass (approved), skip (not processing), dead (retired TLD)
// CrawledAt: zero time = needs domain_check, +1 sec = needs feed_crawl, real time = done
type Domain struct {
Host string `json:"host"`
Status string `json:"status"`
CrawledAt time.Time `json:"crawled_at"`
FeedsFound int `json:"feeds_found,omitempty"`
LastError string `json:"last_error,omitempty"`
TLD string `json:"tld,omitempty"`
MissCount int `json:"miss_count,omitempty"`
}
// MissCountThreshold is the number of consecutive errors before setting status to hold
const MissCountThreshold = 100
// Sentinel values for domain processing state
var (
DomainStateUnchecked = time.Time{} // 0001-01-01 00:00:00 - needs domain_check
DomainStateChecked = time.Time{}.Add(time.Second) // 0001-01-01 00:00:01 - needs feed_crawl
)
// FullHost returns the complete hostname (host + tld)
func (d *Domain) FullHost() string {
return FullHost(d.Host, d.TLD)
}
// Feed represents a discovered RSS/Atom feed with metadata
type Feed struct {
URL string `json:"url"`
Type string `json:"type"` // "rss", "atom", "json", or "unknown"
Category string `json:"category"` // "main", "comments", "category", "author", "article", "podcast"
Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"`
Language string `json:"language,omitempty"`
SiteURL string `json:"site_url,omitempty"` // The website the feed belongs to
// Timing
DiscoveredAt time.Time `json:"discovered_at"`
LastCheckedAt time.Time `json:"last_checked_at,omitempty"` // feed_check: when last checked
NextCheckAt time.Time `json:"next_check_at,omitempty"` // feed_check: when to next check
LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated
// Cache headers for conditional requests
ETag string `json:"etag,omitempty"`
LastModified string `json:"last_modified,omitempty"`
// Health tracking
Status string `json:"status"` // "pass", "hold", "skip"
LastError string `json:"last_error,omitempty"`
LastErrorAt time.Time `json:"last_error_at,omitempty"`
// Discovery source
SourceURL string `json:"source_url,omitempty"`
DomainHost string `json:"domain_host,omitempty"`
DomainTLD string `json:"domain_tld,omitempty"`
// Content stats
ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check
OldestItemDate time.Time `json:"oldest_item_date,omitempty"`
NewestItemDate time.Time `json:"newest_item_date,omitempty"`
// Adaptive check interval
NoUpdate int `json:"no_update"` // Consecutive checks with no change
// Publishing to PDS
PublishStatus string `json:"publish_status"` // "hold", "pass", "skip"
PublishAccount string `json:"publish_account,omitempty"` // e.g., "news.ycombinator.com.1440.news"
}
// Enclosure represents a media attachment (audio, video, image)
type Enclosure struct {
URL string `json:"url"`
Type string `json:"type"` // MIME type (audio/mpeg, image/jpeg, etc.)
Length int64 `json:"length"` // Size in bytes
}
// Item represents an individual entry/article from a feed
type Item struct {
FeedURL string `json:"feed_url"`
GUID string `json:"guid,omitempty"`
Title string `json:"title,omitempty"`
Link string `json:"link,omitempty"`
Description string `json:"description,omitempty"`
Content string `json:"content,omitempty"`
Author string `json:"author,omitempty"`
PubDate time.Time `json:"pub_date,omitempty"`
DiscoveredAt time.Time `json:"discovered_at"`
UpdatedAt time.Time `json:"updated_at,omitempty"`
// Media attachments
Enclosure *Enclosure `json:"enclosure,omitempty"` // Primary enclosure (podcast audio, etc.)
ImageURLs []string `json:"image_urls,omitempty"` // Image URLs extracted from content
Tags []string `json:"tags,omitempty"` // Category/tag strings from feed
// Publishing to PDS
PublishedAt time.Time `json:"published_at,omitempty"`
PublishedUri string `json:"published_uri,omitempty"`
}
// ShortURL represents a shortened URL mapping
type ShortURL struct {
Code string `json:"code"`
OriginalURL string `json:"original_url"`
ItemGUID string `json:"item_guid,omitempty"`
FeedURL string `json:"feed_url,omitempty"`
CreatedAt time.Time `json:"created_at"`
ClickCount int `json:"click_count"`
}
// Click represents a click event on a short URL
type Click struct {
ID int64 `json:"id"`
ShortCode string `json:"short_code"`
ClickedAt time.Time `json:"clicked_at"`
Referrer string `json:"referrer,omitempty"`
UserAgent string `json:"user_agent,omitempty"`
IPHash string `json:"ip_hash,omitempty"`
Country string `json:"country,omitempty"`
}
// DashboardStats holds all statistics for the dashboard
type DashboardStats struct {
// Domain stats
TotalDomains int `json:"total_domains"`
HoldDomains int `json:"hold_domains"`
PassDomains int `json:"pass_domains"`
SkipDomains int `json:"skip_domains"`
DeadDomains int `json:"dead_domains"`
// Feed stats
TotalFeeds int `json:"total_feeds"`
AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds)
PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing)
SkipFeeds int `json:"skip_feeds"`
HoldFeeds int `json:"hold_feeds"`
DeadFeeds int `json:"dead_feeds"`
EmptyFeeds int `json:"empty_feeds"`
RSSFeeds int `json:"rss_feeds"`
AtomFeeds int `json:"atom_feeds"`
JSONFeeds int `json:"json_feeds"`
UnknownFeeds int `json:"unknown_feeds"`
// Processing rates (per minute)
DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count
DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute
FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute
FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute
// Timing
UpdatedAt time.Time `json:"updated_at"`
}
// TLDStat holds TLD statistics
type TLDStat struct {
TLD string `json:"tld"`
Count int `json:"count"`
}
// DomainStat holds domain statistics
type DomainStat struct {
Host string `json:"host"`
FeedsFound int `json:"feeds_found"`
}
// FeedInfo holds basic feed metadata for profile setup
type FeedInfo struct {
Title string
Description string
SiteURL string
SourceHost string
}