Migrate to normalized FK schema (domain_host, domain_tld)
Replace source_host column with proper FK to domains table using composite key (domain_host, domain_tld). This enables JOIN queries instead of string concatenation for domain lookups. Changes: - Update Feed struct: SourceHost/TLD → DomainHost/DomainTLD - Update all SQL queries to use domain_host/domain_tld columns - Add column aliases (as source_host) for API backwards compatibility - Update trigram index from source_host to domain_host - Add getDomainHost() helper for extracting host from domain Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -116,8 +116,8 @@ type Feed struct {
|
||||
|
||||
// Discovery source
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
SourceHost string `json:"source_host,omitempty"`
|
||||
TLD string `json:"tld,omitempty"`
|
||||
DomainHost string `json:"domain_host,omitempty"`
|
||||
DomainTLD string `json:"domain_tld,omitempty"`
|
||||
|
||||
// Content stats
|
||||
ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check
|
||||
@@ -139,7 +139,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
// Auto-pass feeds from our own domain
|
||||
publishStatus := feed.PublishStatus
|
||||
if publishStatus == "" {
|
||||
if strings.HasSuffix(feed.SourceHost, "1440.news") || feed.SourceHost == "1440.news" {
|
||||
if strings.HasSuffix(feed.DomainHost, "1440.news") || feed.DomainHost == "1440.news" {
|
||||
publishStatus = "pass"
|
||||
} else if feed.Language == "" || (feed.Language != "en" && !strings.HasPrefix(feed.Language, "en-")) {
|
||||
publishStatus = "skip"
|
||||
@@ -156,7 +156,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
@@ -188,7 +188,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
feed.DiscoveredAt, NullableTime(feed.LastCheckedAt), NullableTime(feed.NextCheckAt), NullableTime(feed.LastBuildDate),
|
||||
NullableString(feed.ETag), NullableString(feed.LastModified),
|
||||
feed.Status, NullableString(feed.LastError), NullableTime(feed.LastErrorAt),
|
||||
NullableString(feed.SourceURL), NullableString(feed.SourceHost), NullableString(feed.TLD),
|
||||
NullableString(feed.SourceURL), NullableString(feed.DomainHost), NullableString(feed.DomainTLD),
|
||||
feed.ItemCount, NullableTime(feed.OldestItemDate), NullableTime(feed.NewestItemDate),
|
||||
feed.NoUpdate,
|
||||
publishStatus, NullableString(feed.PublishAccount),
|
||||
@@ -201,7 +201,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
|
||||
feed := &Feed{}
|
||||
var category, title, description, language, siteURL *string
|
||||
var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
|
||||
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string
|
||||
var etag, lastModified, lastError, sourceURL, domainHost, domainTLD *string
|
||||
var publishStatus, publishAccount *string
|
||||
var itemCount, noUpdate *int
|
||||
|
||||
@@ -210,7 +210,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
@@ -220,7 +220,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
|
||||
&feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
|
||||
&etag, &lastModified,
|
||||
&feed.Status, &lastError, &lastErrorAt,
|
||||
&sourceURL, &sourceHost, &tld,
|
||||
&sourceURL, &domainHost, &domainTLD,
|
||||
&itemCount, &oldestItemDate, &newestItemDate,
|
||||
&noUpdate,
|
||||
&publishStatus, &publishAccount,
|
||||
@@ -251,8 +251,8 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
|
||||
feed.LastError = StringValue(lastError)
|
||||
feed.LastErrorAt = TimeValue(lastErrorAt)
|
||||
feed.SourceURL = StringValue(sourceURL)
|
||||
feed.SourceHost = StringValue(sourceHost)
|
||||
feed.TLD = StringValue(tld)
|
||||
feed.DomainHost = StringValue(domainHost)
|
||||
feed.DomainTLD = StringValue(domainTLD)
|
||||
if itemCount != nil {
|
||||
feed.ItemCount = *itemCount
|
||||
}
|
||||
@@ -285,7 +285,7 @@ func (c *Crawler) GetAllFeeds() ([]*Feed, error) {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
@@ -309,7 +309,7 @@ func (c *Crawler) GetFeedCount() (int, error) {
|
||||
// GetFeedCountByHost returns the number of feeds for a specific host
|
||||
func (c *Crawler) GetFeedCountByHost(host string) (int, error) {
|
||||
var count int
|
||||
err := c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE source_host = $1", host).Scan(&count)
|
||||
err := c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE domain_host = $1", host).Scan(&count)
|
||||
return count, err
|
||||
}
|
||||
|
||||
@@ -320,7 +320,7 @@ func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
@@ -344,11 +344,11 @@ func (c *Crawler) GetFeedsByHost(host string) ([]*Feed, error) {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
FROM feeds WHERE source_host = $1
|
||||
FROM feeds WHERE domain_host = $1
|
||||
`, host)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -366,7 +366,7 @@ func (c *Crawler) SearchFeeds(query string) ([]*Feed, error) {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
@@ -390,7 +390,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
|
||||
feed := &Feed{}
|
||||
var feedType, category, title, description, language, siteURL *string
|
||||
var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
|
||||
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string
|
||||
var etag, lastModified, lastError, sourceURL, domainHost, domainTLD *string
|
||||
var itemCount, noUpdate *int
|
||||
var status *string
|
||||
var publishStatus, publishAccount *string
|
||||
@@ -400,7 +400,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
|
||||
&feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
|
||||
&etag, &lastModified,
|
||||
&status, &lastError, &lastErrorAt,
|
||||
&sourceURL, &sourceHost, &tld,
|
||||
&sourceURL, &domainHost, &domainTLD,
|
||||
&itemCount, &oldestItemDate, &newestItemDate,
|
||||
&noUpdate,
|
||||
&publishStatus, &publishAccount,
|
||||
@@ -428,8 +428,8 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
|
||||
feed.LastError = StringValue(lastError)
|
||||
feed.LastErrorAt = TimeValue(lastErrorAt)
|
||||
feed.SourceURL = StringValue(sourceURL)
|
||||
feed.SourceHost = StringValue(sourceHost)
|
||||
feed.TLD = StringValue(tld)
|
||||
feed.DomainHost = StringValue(domainHost)
|
||||
feed.DomainTLD = StringValue(domainTLD)
|
||||
if itemCount != nil {
|
||||
feed.ItemCount = *itemCount
|
||||
}
|
||||
@@ -474,7 +474,7 @@ func (c *Crawler) GetFeedsByPublishStatus(status string) ([]*Feed, error) {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
@@ -496,7 +496,7 @@ func (c *Crawler) GetPublishCandidates(limit int) ([]*Feed, error) {
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
source_url, domain_host, domain_tld,
|
||||
item_count, oldest_item_date, newest_item_date,
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
|
||||
Reference in New Issue
Block a user