Initial commit: shared Go module for 1440.news services
Contains: - db.go: Database connection wrapper with helper methods - models.go: Domain, Feed, Item, ShortURL, Click structs - util.go: URL normalization, TLD functions, search helpers - handle.go: AT Protocol handle derivation from feed URLs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,178 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// Domain represents a host to process for feeds
|
||||
// Status: hold (pending review), pass (approved), skip (not processing), dead (retired TLD)
|
||||
// CrawledAt: zero time = needs domain_check, +1 sec = needs feed_crawl, real time = done
|
||||
type Domain struct {
|
||||
Host string `json:"host"`
|
||||
Status string `json:"status"`
|
||||
CrawledAt time.Time `json:"crawled_at"`
|
||||
FeedsFound int `json:"feeds_found,omitempty"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
TLD string `json:"tld,omitempty"`
|
||||
MissCount int `json:"miss_count,omitempty"`
|
||||
}
|
||||
|
||||
// MissCountThreshold is the number of consecutive errors before setting status to hold
|
||||
const MissCountThreshold = 100
|
||||
|
||||
// Sentinel values for domain processing state
|
||||
var (
|
||||
DomainStateUnchecked = time.Time{} // 0001-01-01 00:00:00 - needs domain_check
|
||||
DomainStateChecked = time.Time{}.Add(time.Second) // 0001-01-01 00:00:01 - needs feed_crawl
|
||||
)
|
||||
|
||||
// FullHost returns the complete hostname (host + tld)
|
||||
func (d *Domain) FullHost() string {
|
||||
return FullHost(d.Host, d.TLD)
|
||||
}
|
||||
|
||||
// Feed represents a discovered RSS/Atom feed with metadata
|
||||
type Feed struct {
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"` // "rss", "atom", "json", or "unknown"
|
||||
Category string `json:"category"` // "main", "comments", "category", "author", "article", "podcast"
|
||||
Title string `json:"title,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Language string `json:"language,omitempty"`
|
||||
SiteURL string `json:"site_url,omitempty"` // The website the feed belongs to
|
||||
|
||||
// Timing
|
||||
DiscoveredAt time.Time `json:"discovered_at"`
|
||||
LastCheckedAt time.Time `json:"last_checked_at,omitempty"` // feed_check: when last checked
|
||||
NextCheckAt time.Time `json:"next_check_at,omitempty"` // feed_check: when to next check
|
||||
LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated
|
||||
|
||||
// Cache headers for conditional requests
|
||||
ETag string `json:"etag,omitempty"`
|
||||
LastModified string `json:"last_modified,omitempty"`
|
||||
|
||||
// Health tracking
|
||||
Status string `json:"status"` // "pass", "hold", "skip"
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
LastErrorAt time.Time `json:"last_error_at,omitempty"`
|
||||
|
||||
// Discovery source
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
DomainHost string `json:"domain_host,omitempty"`
|
||||
DomainTLD string `json:"domain_tld,omitempty"`
|
||||
|
||||
// Content stats
|
||||
ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check
|
||||
OldestItemDate time.Time `json:"oldest_item_date,omitempty"`
|
||||
NewestItemDate time.Time `json:"newest_item_date,omitempty"`
|
||||
|
||||
// Adaptive check interval
|
||||
NoUpdate int `json:"no_update"` // Consecutive checks with no change
|
||||
|
||||
// Publishing to PDS
|
||||
PublishStatus string `json:"publish_status"` // "hold", "pass", "skip"
|
||||
PublishAccount string `json:"publish_account,omitempty"` // e.g., "news.ycombinator.com.1440.news"
|
||||
}
|
||||
|
||||
// Enclosure represents a media attachment (audio, video, image)
|
||||
type Enclosure struct {
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"` // MIME type (audio/mpeg, image/jpeg, etc.)
|
||||
Length int64 `json:"length"` // Size in bytes
|
||||
}
|
||||
|
||||
// Item represents an individual entry/article from a feed
|
||||
type Item struct {
|
||||
FeedURL string `json:"feed_url"`
|
||||
GUID string `json:"guid,omitempty"`
|
||||
Title string `json:"title,omitempty"`
|
||||
Link string `json:"link,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Content string `json:"content,omitempty"`
|
||||
Author string `json:"author,omitempty"`
|
||||
PubDate time.Time `json:"pub_date,omitempty"`
|
||||
DiscoveredAt time.Time `json:"discovered_at"`
|
||||
UpdatedAt time.Time `json:"updated_at,omitempty"`
|
||||
|
||||
// Media attachments
|
||||
Enclosure *Enclosure `json:"enclosure,omitempty"` // Primary enclosure (podcast audio, etc.)
|
||||
ImageURLs []string `json:"image_urls,omitempty"` // Image URLs extracted from content
|
||||
Tags []string `json:"tags,omitempty"` // Category/tag strings from feed
|
||||
|
||||
// Publishing to PDS
|
||||
PublishedAt time.Time `json:"published_at,omitempty"`
|
||||
PublishedUri string `json:"published_uri,omitempty"`
|
||||
}
|
||||
|
||||
// ShortURL represents a shortened URL mapping
|
||||
type ShortURL struct {
|
||||
Code string `json:"code"`
|
||||
OriginalURL string `json:"original_url"`
|
||||
ItemGUID string `json:"item_guid,omitempty"`
|
||||
FeedURL string `json:"feed_url,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
ClickCount int `json:"click_count"`
|
||||
}
|
||||
|
||||
// Click represents a click event on a short URL
|
||||
type Click struct {
|
||||
ID int64 `json:"id"`
|
||||
ShortCode string `json:"short_code"`
|
||||
ClickedAt time.Time `json:"clicked_at"`
|
||||
Referrer string `json:"referrer,omitempty"`
|
||||
UserAgent string `json:"user_agent,omitempty"`
|
||||
IPHash string `json:"ip_hash,omitempty"`
|
||||
Country string `json:"country,omitempty"`
|
||||
}
|
||||
|
||||
// DashboardStats holds all statistics for the dashboard
|
||||
type DashboardStats struct {
|
||||
// Domain stats
|
||||
TotalDomains int `json:"total_domains"`
|
||||
HoldDomains int `json:"hold_domains"`
|
||||
PassDomains int `json:"pass_domains"`
|
||||
SkipDomains int `json:"skip_domains"`
|
||||
DeadDomains int `json:"dead_domains"`
|
||||
|
||||
// Feed stats
|
||||
TotalFeeds int `json:"total_feeds"`
|
||||
AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds)
|
||||
PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing)
|
||||
SkipFeeds int `json:"skip_feeds"`
|
||||
HoldFeeds int `json:"hold_feeds"`
|
||||
DeadFeeds int `json:"dead_feeds"`
|
||||
EmptyFeeds int `json:"empty_feeds"`
|
||||
RSSFeeds int `json:"rss_feeds"`
|
||||
AtomFeeds int `json:"atom_feeds"`
|
||||
JSONFeeds int `json:"json_feeds"`
|
||||
UnknownFeeds int `json:"unknown_feeds"`
|
||||
|
||||
// Processing rates (per minute)
|
||||
DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count
|
||||
DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute
|
||||
FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute
|
||||
FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute
|
||||
|
||||
// Timing
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// TLDStat holds TLD statistics
|
||||
type TLDStat struct {
|
||||
TLD string `json:"tld"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// DomainStat holds domain statistics
|
||||
type DomainStat struct {
|
||||
Host string `json:"host"`
|
||||
FeedsFound int `json:"feeds_found"`
|
||||
}
|
||||
|
||||
// FeedInfo holds basic feed metadata for profile setup
|
||||
type FeedInfo struct {
|
||||
Title string
|
||||
Description string
|
||||
SiteURL string
|
||||
SourceHost string
|
||||
}
|
||||
Reference in New Issue
Block a user