diff --git a/db.go b/db.go index 8fee8b3..8226bf0 100644 --- a/db.go +++ b/db.go @@ -45,7 +45,6 @@ CREATE TABLE IF NOT EXISTS feeds ( description TEXT, language TEXT, site_url TEXT, - source_url TEXT, discovered_at TIMESTAMP NOT NULL DEFAULT NOW(), last_checked_at TIMESTAMP, -- feed_check: when last checked for new items @@ -270,8 +269,9 @@ func OpenDatabase(connString string) (*DB, error) { if colExists { pool.Exec(ctx, "ALTER TABLE feeds RENAME COLUMN last_crawled_at TO last_checked_at") } - // Drop legacy next_check_at column if it exists (no longer used) + // Drop legacy columns if they exist (no longer used) pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS next_check_at") + pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS source_url") // Create index for feed check scheduling pool.Exec(ctx, "DROP INDEX IF EXISTS idx_feeds_to_check") pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_feeds_to_check ON feeds(last_checked_at NULLS FIRST, miss_count) WHERE status IN ('PUBLISH', 'STANDBY')") diff --git a/feed.go b/feed.go index 9363fee..216be9c 100644 --- a/feed.go +++ b/feed.go @@ -148,8 +148,7 @@ type Feed struct { LastError string `json:"last_error,omitempty"` LastErrorAt time.Time `json:"last_error_at,omitempty"` - // Discovery source - SourceURL string `json:"source_url,omitempty"` + // Domain info DomainHost string `json:"domain_host,omitempty"` DomainTLD string `json:"domain_tld,omitempty"` @@ -170,10 +169,10 @@ func (c *Crawler) saveFeed(feed *Feed) error { discovered_at, last_checked_at, last_build_date, etag, last_modified, status, last_error, last_error_at, - source_url, domain_host, domain_tld, + domain_host, domain_tld, item_count, oldest_item_date, newest_item_date, miss_count - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22) + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21) ON CONFLICT(url) DO UPDATE SET type = EXCLUDED.type, category = EXCLUDED.category, @@ -197,7 +196,7 @@ func (c *Crawler) saveFeed(feed *Feed) error { feed.DiscoveredAt, NullableTime(feed.LastCheckedAt), NullableTime(feed.LastBuildDate), NullableString(feed.ETag), NullableString(feed.LastModified), feed.Status, NullableString(feed.LastError), NullableTime(feed.LastErrorAt), - NullableString(feed.SourceURL), NullableString(feed.DomainHost), NullableString(feed.DomainTLD), + NullableString(feed.DomainHost), NullableString(feed.DomainTLD), feed.ItemCount, NullableTime(feed.OldestItemDate), NullableTime(feed.NewestItemDate), feed.MissCount, ) @@ -209,7 +208,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) { feed := &Feed{} var category, title, description, language, siteURL *string var lastCheckedAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time - var etag, lastModified, lastError, sourceURL, domainHost, domainTLD *string + var etag, lastModified, lastError, domainHost, domainTLD *string var status *string var itemCount, missCount *int @@ -218,7 +217,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) { discovered_at, last_checked_at, last_build_date, etag, last_modified, status, last_error, last_error_at, - source_url, domain_host, domain_tld, + domain_host, domain_tld, item_count, oldest_item_date, newest_item_date, miss_count FROM feeds WHERE url = $1 @@ -227,7 +226,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) { &feed.DiscoveredAt, &lastCheckedAt, &lastBuildDate, &etag, &lastModified, &status, &lastError, &lastErrorAt, - &sourceURL, &domainHost, &domainTLD, + &domainHost, &domainTLD, &itemCount, &oldestItemDate, &newestItemDate, &missCount, ) @@ -259,7 +258,6 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) { } feed.LastError = StringValue(lastError) feed.LastErrorAt = TimeValue(lastErrorAt) - feed.SourceURL = StringValue(sourceURL) feed.DomainHost = StringValue(domainHost) feed.DomainTLD = StringValue(domainTLD) if itemCount != nil { @@ -296,7 +294,7 @@ func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) { discovered_at, last_checked_at, last_build_date, etag, last_modified, status, last_error, last_error_at, - source_url, domain_host, domain_tld, + domain_host, domain_tld, item_count, oldest_item_date, newest_item_date, miss_count FROM feeds @@ -321,7 +319,7 @@ func (c *Crawler) GetFeedsByHost(host string) ([]*Feed, error) { discovered_at, last_checked_at, last_build_date, etag, last_modified, status, last_error, last_error_at, - source_url, domain_host, domain_tld, + domain_host, domain_tld, item_count, oldest_item_date, newest_item_date, miss_count FROM feeds WHERE domain_host = $1 @@ -342,7 +340,7 @@ func (c *Crawler) SearchFeeds(query string) ([]*Feed, error) { discovered_at, last_checked_at, last_build_date, etag, last_modified, status, last_error, last_error_at, - source_url, domain_host, domain_tld, + domain_host, domain_tld, item_count, oldest_item_date, newest_item_date, miss_count FROM feeds @@ -365,7 +363,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) { feed := &Feed{} var feedType, category, title, description, language, siteURL *string var lastCheckedAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time - var etag, lastModified, lastError, sourceURL, domainHost, domainTLD *string + var etag, lastModified, lastError, domainHost, domainTLD *string var itemCount, missCount *int var status *string @@ -374,7 +372,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) { &feed.DiscoveredAt, &lastCheckedAt, &lastBuildDate, &etag, &lastModified, &status, &lastError, &lastErrorAt, - &sourceURL, &domainHost, &domainTLD, + &domainHost, &domainTLD, &itemCount, &oldestItemDate, &newestItemDate, &missCount, ); err != nil { @@ -402,7 +400,6 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) { } feed.LastError = StringValue(lastError) feed.LastErrorAt = TimeValue(lastErrorAt) - feed.SourceURL = StringValue(sourceURL) feed.DomainHost = StringValue(domainHost) feed.DomainTLD = StringValue(domainTLD) if itemCount != nil { @@ -434,7 +431,7 @@ func (c *Crawler) GetFeedsByStatus(status string, limit int) ([]*Feed, error) { discovered_at, last_checked_at, last_build_date, etag, last_modified, status, last_error, last_error_at, - source_url, domain_host, domain_tld, + domain_host, domain_tld, item_count, oldest_item_date, newest_item_date, miss_count FROM feeds