Migrate to normalized FK schema (domain_host, domain_tld)

Replace source_host column with proper FK to domains table using
composite key (domain_host, domain_tld). This enables JOIN queries
instead of string concatenation for domain lookups.

Changes:
- Update Feed struct: SourceHost/TLD → DomainHost/DomainTLD
- Update all SQL queries to use domain_host/domain_tld columns
- Add column aliases (as source_host) for API backwards compatibility
- Update trigram index from source_host to domain_host
- Add getDomainHost() helper for extracting host from domain

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-02-01 22:36:25 -05:00
parent e7f6be2203
commit 7ec4207173
12 changed files with 193 additions and 214 deletions
+5 -5
View File
@@ -138,17 +138,17 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
return url, sf, true
}
// Search feeds by source_host (LIKE search for domain matching)
// Search feeds by domain_host (LIKE search for domain matching)
// Use LOWER() to leverage trigram index
lowerPattern := "%" + strings.ToLower(query) + "%"
hostRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds
WHERE LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1
WHERE LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1
LIMIT $2
`, lowerPattern, limit)
if err == nil {
@@ -168,7 +168,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SELECT url, type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds
WHERE search_vector @@ to_tsquery('english', $1)
@@ -243,7 +243,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SELECT type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds WHERE url = $1
`, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl,