Migrate to normalized FK schema (domain_host, domain_tld)

Replace source_host column with proper FK to domains table using
composite key (domain_host, domain_tld). This enables JOIN queries
instead of string concatenation for domain lookups.

Changes:
- Update Feed struct: SourceHost/TLD → DomainHost/DomainTLD
- Update all SQL queries to use domain_host/domain_tld columns
- Add column aliases (as source_host) for API backwards compatibility
- Update trigram index from source_host to domain_host
- Add getDomainHost() helper for extracting host from domain

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-02-01 22:36:25 -05:00
parent e7f6be2203
commit 7ec4207173
12 changed files with 193 additions and 214 deletions
+1 -1
View File
@@ -22,7 +22,7 @@ FROM ubuntu:latest
WORKDIR /app WORKDIR /app
# Install runtime dependencies # Install runtime dependencies
RUN apt-get update && apt-get install -y ca-certificates tzdata && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y ca-certificates tzdata curl wget && rm -rf /var/lib/apt/lists/*
# Copy binary from builder # Copy binary from builder
COPY --from=builder /app/1440.news . COPY --from=builder /app/1440.news .
+78 -66
View File
@@ -42,7 +42,7 @@ func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) {
case "url": case "url":
// Search feed URL paths (after domain) // Search feed URL paths (after domain)
return ` return `
SELECT tld, COUNT(DISTINCT source_host) as domain_count SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count
FROM feeds FROM feeds
WHERE tld IS NOT NULL AND LOWER(url) LIKE $1 WHERE tld IS NOT NULL AND LOWER(url) LIKE $1
GROUP BY tld GROUP BY tld
@@ -52,7 +52,7 @@ func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) {
case "title": case "title":
// Search feed titles // Search feed titles
return ` return `
SELECT tld, COUNT(DISTINCT source_host) as domain_count SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count
FROM feeds FROM feeds
WHERE tld IS NOT NULL AND LOWER(title) LIKE $1 WHERE tld IS NOT NULL AND LOWER(title) LIKE $1
GROUP BY tld GROUP BY tld
@@ -62,7 +62,7 @@ func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) {
case "description": case "description":
// Search feed descriptions // Search feed descriptions
return ` return `
SELECT tld, COUNT(DISTINCT source_host) as domain_count SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count
FROM feeds FROM feeds
WHERE tld IS NOT NULL AND LOWER(description) LIKE $1 WHERE tld IS NOT NULL AND LOWER(description) LIKE $1
GROUP BY tld GROUP BY tld
@@ -72,7 +72,7 @@ func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) {
case "item": case "item":
// Search item titles // Search item titles
return ` return `
SELECT f.tld, COUNT(DISTINCT f.source_host) as domain_count SELECT f.tld, COUNT(DISTINCT f.domain_host || '.' || f.domain_tld) as domain_count
FROM feeds f FROM feeds f
INNER JOIN items i ON i.feed_url = f.url INNER JOIN items i ON i.feed_url = f.url
WHERE f.tld IS NOT NULL AND LOWER(i.title) LIKE $1 WHERE f.tld IS NOT NULL AND LOWER(i.title) LIKE $1
@@ -85,7 +85,7 @@ func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) {
// Also include exact domain match if pattern looks like a domain // Also include exact domain match if pattern looks like a domain
if sq.DomainHost != "" && sq.DomainTLD != "" { if sq.DomainHost != "" && sq.DomainTLD != "" {
return ` return `
SELECT tld, COUNT(DISTINCT source_host) as domain_count FROM ( SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM (
-- Domains matching host pattern -- Domains matching host pattern
SELECT tld::text as tld, host || '.' || tld as source_host SELECT tld::text as tld, host || '.' || tld as source_host
FROM domains WHERE LOWER(host) LIKE $1 FROM domains WHERE LOWER(host) LIKE $1
@@ -95,32 +95,32 @@ func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) {
FROM domains WHERE LOWER(host) = $2 AND tld::text = $3 FROM domains WHERE LOWER(host) = $2 AND tld::text = $3
UNION UNION
-- Feeds matching URL -- Feeds matching URL
SELECT tld, source_host FROM feeds WHERE tld IS NOT NULL AND LOWER(url) LIKE $1 SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(url) LIKE $1
UNION UNION
-- Feeds matching title -- Feeds matching title
SELECT tld, source_host FROM feeds WHERE tld IS NOT NULL AND LOWER(title) LIKE $1 SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(title) LIKE $1
UNION UNION
-- Feeds matching description -- Feeds matching description
SELECT tld, source_host FROM feeds WHERE tld IS NOT NULL AND LOWER(description) LIKE $1 SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(description) LIKE $1
) combined ) combined
GROUP BY tld GROUP BY tld
ORDER BY tld ASC ORDER BY tld ASC
`, []interface{}{pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} `, []interface{}{pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)}
} }
return ` return `
SELECT tld, COUNT(DISTINCT source_host) as domain_count FROM ( SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM (
-- Domains matching host -- Domains matching host
SELECT tld::text as tld, host || '.' || tld as source_host SELECT tld::text as tld, host || '.' || tld as source_host
FROM domains WHERE LOWER(host) LIKE $1 FROM domains WHERE LOWER(host) LIKE $1
UNION UNION
-- Feeds matching URL -- Feeds matching URL
SELECT tld, source_host FROM feeds WHERE tld IS NOT NULL AND LOWER(url) LIKE $1 SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(url) LIKE $1
UNION UNION
-- Feeds matching title -- Feeds matching title
SELECT tld, source_host FROM feeds WHERE tld IS NOT NULL AND LOWER(title) LIKE $1 SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(title) LIKE $1
UNION UNION
-- Feeds matching description -- Feeds matching description
SELECT tld, source_host FROM feeds WHERE tld IS NOT NULL AND LOWER(description) LIKE $1 SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(description) LIKE $1
) combined ) combined
GROUP BY tld GROUP BY tld
ORDER BY tld ASC ORDER BY tld ASC
@@ -335,7 +335,7 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
query := ` query := `
SELECT DISTINCT d.host, d.tld, d.status, d.last_error, d.feeds_found SELECT DISTINCT d.host, d.tld, d.status, d.last_error, d.feeds_found
FROM domains d FROM domains d
INNER JOIN feeds f ON f.source_host = (d.host || '.' || d.tld) INNER JOIN feeds f ON f.domain_host = d.host AND f.domain_tld = d.tld
WHERE 1=1` WHERE 1=1`
args := []interface{}{} args := []interface{}{}
argNum := 1 argNum := 1
@@ -471,11 +471,11 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d FROM domains d
INNER JOIN ( INNER JOIN (
SELECT source_host, COUNT(*) as feed_count SELECT domain_host, domain_tld, COUNT(*) as feed_count
FROM feeds FROM feeds
WHERE item_count > 0 WHERE item_count > 0
GROUP BY source_host GROUP BY domain_host, domain_tld
) f ON (d.host || '.' || d.tld) = f.source_host ) f ON d.host = f.domain_host AND d.tld = f.domain_tld
WHERE d.tld = $1 AND d.status = $2 WHERE d.tld = $1 AND d.status = $2
ORDER BY d.host ASC ORDER BY d.host ASC
LIMIT $3 OFFSET $4 LIMIT $3 OFFSET $4
@@ -486,11 +486,11 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d FROM domains d
INNER JOIN ( INNER JOIN (
SELECT source_host, COUNT(*) as feed_count SELECT domain_host, domain_tld, COUNT(*) as feed_count
FROM feeds FROM feeds
WHERE item_count > 0 WHERE item_count > 0
GROUP BY source_host GROUP BY domain_host, domain_tld
) f ON (d.host || '.' || d.tld) = f.source_host ) f ON d.host = f.domain_host AND d.tld = f.domain_tld
WHERE d.status != 'skip' AND d.tld = $1 WHERE d.status != 'skip' AND d.tld = $1
ORDER BY d.host ASC ORDER BY d.host ASC
LIMIT $2 OFFSET $3 LIMIT $2 OFFSET $3
@@ -501,11 +501,11 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d FROM domains d
INNER JOIN ( INNER JOIN (
SELECT source_host, COUNT(*) as feed_count SELECT domain_host, domain_tld, COUNT(*) as feed_count
FROM feeds FROM feeds
WHERE item_count > 0 WHERE item_count > 0
GROUP BY source_host GROUP BY domain_host, domain_tld
) f ON (d.host || '.' || d.tld) = f.source_host ) f ON d.host = f.domain_host AND d.tld = f.domain_tld
WHERE d.status != 'skip' AND LOWER(d.host) LIKE $1 WHERE d.status != 'skip' AND LOWER(d.host) LIKE $1
ORDER BY d.tld ASC, d.host ASC ORDER BY d.tld ASC, d.host ASC
LIMIT $2 OFFSET $3 LIMIT $2 OFFSET $3
@@ -515,11 +515,11 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d FROM domains d
INNER JOIN ( INNER JOIN (
SELECT source_host, COUNT(*) as feed_count SELECT domain_host, domain_tld, COUNT(*) as feed_count
FROM feeds FROM feeds
WHERE item_count > 0 WHERE item_count > 0
GROUP BY source_host GROUP BY domain_host, domain_tld
) f ON (d.host || '.' || d.tld) = f.source_host ) f ON d.host = f.domain_host AND d.tld = f.domain_tld
WHERE d.status = $1 WHERE d.status = $1
ORDER BY d.tld ASC, d.host ASC ORDER BY d.tld ASC, d.host ASC
LIMIT $2 OFFSET $3 LIMIT $2 OFFSET $3
@@ -530,11 +530,11 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d FROM domains d
INNER JOIN ( INNER JOIN (
SELECT source_host, COUNT(*) as feed_count SELECT domain_host, domain_tld, COUNT(*) as feed_count
FROM feeds FROM feeds
WHERE item_count > 0 WHERE item_count > 0
GROUP BY source_host GROUP BY domain_host, domain_tld
) f ON (d.host || '.' || d.tld) = f.source_host ) f ON d.host = f.domain_host AND d.tld = f.domain_tld
WHERE d.status != 'skip' WHERE d.status != 'skip'
ORDER BY d.tld ASC, d.host ASC ORDER BY d.tld ASC, d.host ASC
LIMIT $1 OFFSET $2 LIMIT $1 OFFSET $2
@@ -683,10 +683,10 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
// Apply the same feed filters used for domain selection // Apply the same feed filters used for domain selection
if len(hosts) > 0 { if len(hosts) > 0 {
feedQuery := ` feedQuery := `
SELECT f.source_host, f.url, f.title, f.type, f.status, f.publish_status, f.language, SELECT f.domain_host || '.' || f.domain_tld as source_host, f.url, f.title, f.type, f.status, f.publish_status, f.language,
(SELECT COUNT(*) FROM items WHERE feed_url = f.url) as item_count (SELECT COUNT(*) FROM items WHERE feed_url = f.url) as item_count
FROM feeds f FROM feeds f
WHERE f.source_host = ANY($1)` WHERE f.domain_host || '.' || f.domain_tld = ANY($1)`
feedArgs := []interface{}{hosts} feedArgs := []interface{}{hosts}
feedArgNum := 2 feedArgNum := 2
@@ -740,7 +740,7 @@ func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
} }
} }
feedQuery += " ORDER BY f.source_host, f.url" feedQuery += " ORDER BY f.domain_host, f.domain_tld, f.url"
feedRows, err := c.db.Query(feedQuery, feedArgs...) feedRows, err := c.db.Query(feedQuery, feedArgs...)
if err == nil { if err == nil {
@@ -856,13 +856,17 @@ func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) {
fmt.Sscanf(o, "%d", &offset) fmt.Sscanf(o, "%d", &offset)
} }
// Parse host into domain_host and domain_tld
domainHost := stripTLD(host)
domainTLD := getTLD(host)
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, title, type, status, last_error, item_count, publish_status, language SELECT url, title, type, status, last_error, item_count, publish_status, language
FROM feeds FROM feeds
WHERE source_host = $1 WHERE domain_host = $1 AND domain_tld = $2
ORDER BY url ASC ORDER BY url ASC
LIMIT $2 OFFSET $3 LIMIT $3 OFFSET $4
`, host, limit, offset) `, domainHost, domainTLD, limit, offset)
if err != nil { if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError) http.Error(w, err.Error(), http.StatusInternalServerError)
return return
@@ -1233,7 +1237,7 @@ func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) {
// If feed filter is specified, query from feeds table instead // If feed filter is specified, query from feeds table instead
if len(statusList) > 0 || len(typeList) > 0 || feedMode == "exclude" { if len(statusList) > 0 || len(typeList) > 0 || feedMode == "exclude" {
// Build query to get TLDs from feeds // Build query to get TLDs from feeds
query := `SELECT tld, COUNT(DISTINCT source_host) as domain_count FROM feeds WHERE tld IS NOT NULL` query := `SELECT domain_tld as tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM feeds WHERE domain_tld IS NOT NULL`
args := []interface{}{} args := []interface{}{}
argNum := 1 argNum := 1
@@ -1310,14 +1314,14 @@ func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) {
case "domain": case "domain":
// Search domain names // Search domain names
if exactMatch && tldFilter != "" { if exactMatch && tldFilter != "" {
// d:npr.org -> exact match (source_host = 'npr.org') // d:npr.org -> exact match
query += fmt.Sprintf(" AND LOWER(source_host) = $%d", argNum) query += fmt.Sprintf(" AND LOWER(domain_host || '.' || domain_tld) = $%d", argNum)
args = append(args, strings.ToLower(sq.Pattern)) args = append(args, strings.ToLower(sq.Pattern))
} else if tldFilter != "" { } else if tldFilter != "" {
query += fmt.Sprintf(" AND tld = $%d AND LOWER(source_host) LIKE $%d", argNum, argNum+1) query += fmt.Sprintf(" AND domain_tld = $%d AND LOWER(domain_host || '.' || domain_tld) LIKE $%d", argNum, argNum+1)
args = append(args, tldFilter, hostSearchPattern) args = append(args, tldFilter, hostSearchPattern)
} else { } else {
query += fmt.Sprintf(" AND LOWER(source_host) LIKE $%d", argNum) query += fmt.Sprintf(" AND LOWER(domain_host || '.' || domain_tld) LIKE $%d", argNum)
args = append(args, hostSearchPattern) args = append(args, hostSearchPattern)
} }
case "url": case "url":
@@ -1338,16 +1342,16 @@ func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) {
if sq.DomainHost != "" && sq.DomainTLD != "" { if sq.DomainHost != "" && sq.DomainTLD != "" {
fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD) fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD)
query += fmt.Sprintf(` AND ( query += fmt.Sprintf(` AND (
LOWER(source_host) LIKE $%d OR LOWER(domain_host || '.' || domain_tld) LIKE $%d OR
LOWER(url) LIKE $%d OR LOWER(url) LIKE $%d OR
LOWER(title) LIKE $%d OR LOWER(title) LIKE $%d OR
LOWER(description) LIKE $%d OR LOWER(description) LIKE $%d OR
LOWER(source_host) = $%d LOWER(domain_host || '.' || domain_tld) = $%d
)`, argNum, argNum, argNum, argNum, argNum+1) )`, argNum, argNum, argNum, argNum, argNum+1)
args = append(args, searchPattern, fullDomain) args = append(args, searchPattern, fullDomain)
} else { } else {
query += fmt.Sprintf(` AND ( query += fmt.Sprintf(` AND (
LOWER(source_host) LIKE $%d OR LOWER(domain_host || '.' || domain_tld) LIKE $%d OR
LOWER(url) LIKE $%d OR LOWER(url) LIKE $%d OR
LOWER(title) LIKE $%d OR LOWER(title) LIKE $%d OR
LOWER(description) LIKE $%d LOWER(description) LIKE $%d
@@ -1356,7 +1360,7 @@ func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) {
} }
} }
} }
query += " GROUP BY tld ORDER BY tld ASC" query += " GROUP BY domain_tld ORDER BY domain_tld ASC"
rows, err = c.db.Query(query, args...) rows, err = c.db.Query(query, args...)
} else if search != "" { } else if search != "" {
// Parse search prefix for type-specific searching // Parse search prefix for type-specific searching
@@ -1441,20 +1445,20 @@ func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) {
// d:npr.org -> exact match for host "npr" in specified TLD // d:npr.org -> exact match for host "npr" in specified TLD
domainWhere = "tld = $1 AND lower(host) = $2" domainWhere = "tld = $1 AND lower(host) = $2"
domainArgs = []interface{}{tld, strings.ToLower(hostPart)} domainArgs = []interface{}{tld, strings.ToLower(hostPart)}
feedWhere = "tld = $1 AND lower(source_host) = $2" feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) = $2"
feedArgs = []interface{}{tld, strings.ToLower(sq.Pattern)} feedArgs = []interface{}{tld, strings.ToLower(sq.Pattern)}
} else { } else {
// d:npr -> pattern match in specified TLD // d:npr -> pattern match in specified TLD
domainWhere = "tld = $1 AND lower(host) LIKE $2" domainWhere = "tld = $1 AND lower(host) LIKE $2"
domainArgs = []interface{}{tld, searchPattern} domainArgs = []interface{}{tld, searchPattern}
feedWhere = "tld = $1 AND lower(source_host) LIKE $2" feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) LIKE $2"
feedArgs = []interface{}{tld, searchPattern} feedArgs = []interface{}{tld, searchPattern}
} }
} else { } else {
// Other search types - pattern match // Other search types - pattern match
domainWhere = "tld = $1 AND lower(host) LIKE $2" domainWhere = "tld = $1 AND lower(host) LIKE $2"
domainArgs = []interface{}{tld, searchPattern} domainArgs = []interface{}{tld, searchPattern}
feedWhere = "tld = $1 AND lower(source_host) LIKE $2" feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) LIKE $2"
feedArgs = []interface{}{tld, searchPattern} feedArgs = []interface{}{tld, searchPattern}
} }
stats["search"] = search stats["search"] = search
@@ -1462,7 +1466,7 @@ func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) {
// Filter by TLD only // Filter by TLD only
domainWhere = "tld = $1" domainWhere = "tld = $1"
domainArgs = []interface{}{tld} domainArgs = []interface{}{tld}
feedWhere = "tld = $1" feedWhere = "domain_tld = $1"
feedArgs = []interface{}{tld} feedArgs = []interface{}{tld}
} }
@@ -1614,36 +1618,36 @@ func (c *Crawler) handleAPISearchStats(w http.ResponseWriter, r *http.Request) {
// d:npr.org -> exact match // d:npr.org -> exact match
domainWhere = "tld = $1 AND LOWER(host) = $2" domainWhere = "tld = $1 AND LOWER(host) = $2"
domainArgs = []interface{}{tldFilter, strings.ToLower(hostPart)} domainArgs = []interface{}{tldFilter, strings.ToLower(hostPart)}
feedWhere = "LOWER(source_host) = $1" feedWhere = "LOWER(domain_host || '.' || domain_tld) = $1"
feedArgs = []interface{}{strings.ToLower(sq.Pattern)} feedArgs = []interface{}{strings.ToLower(sq.Pattern)}
} else if tldFilter != "" { } else if tldFilter != "" {
domainWhere = "tld = $1 AND LOWER(host) LIKE $2" domainWhere = "tld = $1 AND LOWER(host) LIKE $2"
domainArgs = []interface{}{tldFilter, searchPattern} domainArgs = []interface{}{tldFilter, searchPattern}
feedWhere = "tld = $1 AND LOWER(source_host) LIKE $2" feedWhere = "domain_tld = $1 AND LOWER(domain_host || '.' || domain_tld) LIKE $2"
feedArgs = []interface{}{tldFilter, searchPattern} feedArgs = []interface{}{tldFilter, searchPattern}
} else { } else {
domainWhere = "LOWER(host) LIKE $1" domainWhere = "LOWER(host) LIKE $1"
domainArgs = []interface{}{searchPattern} domainArgs = []interface{}{searchPattern}
feedWhere = "LOWER(source_host) LIKE $1" feedWhere = "LOWER(domain_host || '.' || domain_tld) LIKE $1"
feedArgs = []interface{}{searchPattern} feedArgs = []interface{}{searchPattern}
} }
case "url": case "url":
domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.source_host = (host || '.' || tld) AND LOWER(f.url) LIKE $1)" domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.url) LIKE $1)"
domainArgs = []interface{}{searchPattern} domainArgs = []interface{}{searchPattern}
feedWhere = "LOWER(url) LIKE $1" feedWhere = "LOWER(url) LIKE $1"
feedArgs = []interface{}{searchPattern} feedArgs = []interface{}{searchPattern}
case "title": case "title":
domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.source_host = (host || '.' || tld) AND LOWER(f.title) LIKE $1)" domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.title) LIKE $1)"
domainArgs = []interface{}{searchPattern} domainArgs = []interface{}{searchPattern}
feedWhere = "LOWER(title) LIKE $1" feedWhere = "LOWER(title) LIKE $1"
feedArgs = []interface{}{searchPattern} feedArgs = []interface{}{searchPattern}
case "description": case "description":
domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.source_host = (host || '.' || tld) AND LOWER(f.description) LIKE $1)" domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.description) LIKE $1)"
domainArgs = []interface{}{searchPattern} domainArgs = []interface{}{searchPattern}
feedWhere = "LOWER(description) LIKE $1" feedWhere = "LOWER(description) LIKE $1"
feedArgs = []interface{}{searchPattern} feedArgs = []interface{}{searchPattern}
case "item": case "item":
domainWhere = "EXISTS (SELECT 1 FROM feeds f INNER JOIN items i ON i.feed_url = f.url WHERE f.source_host = (host || '.' || tld) AND LOWER(i.title) LIKE $1)" domainWhere = "EXISTS (SELECT 1 FROM feeds f INNER JOIN items i ON i.feed_url = f.url WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(i.title) LIKE $1)"
domainArgs = []interface{}{searchPattern} domainArgs = []interface{}{searchPattern}
feedWhere = "EXISTS (SELECT 1 FROM items i WHERE i.feed_url = url AND LOWER(i.title) LIKE $1)" feedWhere = "EXISTS (SELECT 1 FROM items i WHERE i.feed_url = url AND LOWER(i.title) LIKE $1)"
feedArgs = []interface{}{searchPattern} feedArgs = []interface{}{searchPattern}
@@ -1654,26 +1658,26 @@ func (c *Crawler) handleAPISearchStats(w http.ResponseWriter, r *http.Request) {
domainWhere = `( domainWhere = `(
LOWER(host) LIKE $1 OR LOWER(host) LIKE $1 OR
(LOWER(host) = $2 AND tld::text = $3) OR (LOWER(host) = $2 AND tld::text = $3) OR
EXISTS (SELECT 1 FROM feeds f WHERE f.source_host = (host || '.' || tld) AND ( EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND (
LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1 LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1
)) ))
)` )`
domainArgs = []interface{}{searchPattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} domainArgs = []interface{}{searchPattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)}
fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD) fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD)
feedWhere = `( feedWhere = `(
LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1 OR LOWER(source_host) = $2 LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1 OR LOWER(domain_host || '.' || domain_tld) = $2
)` )`
feedArgs = []interface{}{searchPattern, fullDomain} feedArgs = []interface{}{searchPattern, fullDomain}
} else { } else {
domainWhere = `( domainWhere = `(
LOWER(host) LIKE $1 OR LOWER(host) LIKE $1 OR
EXISTS (SELECT 1 FROM feeds f WHERE f.source_host = (host || '.' || tld) AND ( EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND (
LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1 LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1
)) ))
)` )`
domainArgs = []interface{}{searchPattern} domainArgs = []interface{}{searchPattern}
feedWhere = `( feedWhere = `(
LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1 LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1
)` )`
feedArgs = []interface{}{searchPattern} feedArgs = []interface{}{searchPattern}
} }
@@ -1834,11 +1838,13 @@ func getPDSCredentials() (pdsHost, pdsAdminPassword string) {
// getDomainDIDs returns all unique publish_account DIDs for a domain's feeds // getDomainDIDs returns all unique publish_account DIDs for a domain's feeds
func (c *Crawler) getDomainDIDs(host string) []string { func (c *Crawler) getDomainDIDs(host string) []string {
domainHost := stripTLD(host)
domainTLD := getTLD(host)
var dids []string var dids []string
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT DISTINCT publish_account FROM feeds SELECT DISTINCT publish_account FROM feeds
WHERE source_host = $1 AND publish_account IS NOT NULL AND publish_account != '' WHERE domain_host = $1 AND domain_tld = $2 AND publish_account IS NOT NULL AND publish_account != ''
`, host) `, domainHost, domainTLD)
if err == nil { if err == nil {
defer rows.Close() defer rows.Close()
for rows.Next() { for rows.Next() {
@@ -1871,10 +1877,12 @@ func (c *Crawler) skipDomain(host string) DomainActionResult {
} }
// Mark feeds as skipped (but don't delete) // Mark feeds as skipped (but don't delete)
domainHost := stripTLD(host)
domainTLD := getTLD(host)
feedsAffected, err := c.db.Exec(` feedsAffected, err := c.db.Exec(`
UPDATE feeds SET status = 'skip', publish_status = 'skip' UPDATE feeds SET status = 'skip', publish_status = 'skip'
WHERE source_host = $1 WHERE domain_host = $1 AND domain_tld = $2
`, host) `, domainHost, domainTLD)
if err != nil { if err != nil {
result.Error = fmt.Sprintf("failed to update feeds: %v", err) result.Error = fmt.Sprintf("failed to update feeds: %v", err)
return result return result
@@ -1942,8 +1950,10 @@ func (c *Crawler) dropDomain(host string) DomainActionResult {
} }
// Get feed URLs for this domain (needed to delete items) // Get feed URLs for this domain (needed to delete items)
domainHost := stripTLD(host)
domainTLD := getTLD(host)
var feedURLs []string var feedURLs []string
feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE source_host = $1`, host) feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE domain_host = $1 AND domain_tld = $2`, domainHost, domainTLD)
if err == nil { if err == nil {
defer feedRows.Close() defer feedRows.Close()
for feedRows.Next() { for feedRows.Next() {
@@ -1963,7 +1973,7 @@ func (c *Crawler) dropDomain(host string) DomainActionResult {
} }
// Delete all feeds from this domain // Delete all feeds from this domain
feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE source_host = $1`, host) feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE domain_host = $1 AND domain_tld = $2`, domainHost, domainTLD)
if err != nil { if err != nil {
result.Error = fmt.Sprintf("failed to delete feeds: %v", err) result.Error = fmt.Sprintf("failed to delete feeds: %v", err)
return result return result
@@ -2031,10 +2041,12 @@ func (c *Crawler) restoreDomain(host string) DomainActionResult {
} }
// Restore feeds to pass status // Restore feeds to pass status
domainHost := stripTLD(host)
domainTLD := getTLD(host)
feedsAffected, err := c.db.Exec(` feedsAffected, err := c.db.Exec(`
UPDATE feeds SET status = 'pass', publish_status = 'pass' UPDATE feeds SET status = 'pass', publish_status = 'pass'
WHERE source_host = $1 WHERE domain_host = $1 AND domain_tld = $2
`, host) `, domainHost, domainTLD)
if err != nil { if err != nil {
result.Error = fmt.Sprintf("failed to update feeds: %v", err) result.Error = fmt.Sprintf("failed to update feeds: %v", err)
return result return result
+11 -8
View File
@@ -154,7 +154,7 @@ func (c *Crawler) handleAPIFeedsByStatus(w http.ResponseWriter, r *http.Request)
} }
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, title, type, source_host, tld, status, last_error, item_count SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count
FROM feeds FROM feeds
WHERE status = $1 WHERE status = $1
ORDER BY url ASC ORDER BY url ASC
@@ -218,7 +218,7 @@ func (c *Crawler) handleAPIFeeds(w http.ResponseWriter, r *http.Request) {
var err error var err error
if publishStatus != "" { if publishStatus != "" {
rows, err = c.db.Query(` rows, err = c.db.Query(`
SELECT url, title, type, source_host, tld, status, last_error, item_count, publish_status, language SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, publish_status, language
FROM feeds FROM feeds
WHERE publish_status = $1 WHERE publish_status = $1
ORDER BY url ASC ORDER BY url ASC
@@ -226,7 +226,7 @@ func (c *Crawler) handleAPIFeeds(w http.ResponseWriter, r *http.Request) {
`, publishStatus, limit, offset) `, publishStatus, limit, offset)
} else { } else {
rows, err = c.db.Query(` rows, err = c.db.Query(`
SELECT url, title, type, source_host, tld, status, last_error, item_count, publish_status, language SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, publish_status, language
FROM feeds FROM feeds
ORDER BY url ASC ORDER BY url ASC
LIMIT $1 OFFSET $2 LIMIT $1 OFFSET $2
@@ -279,19 +279,22 @@ func (c *Crawler) filterFeeds(w http.ResponseWriter, tld, domain, status string,
var args []interface{} var args []interface{}
argNum := 1 argNum := 1
query := ` query := `
SELECT url, title, type, category, source_host, tld, status, last_error, item_count, language SELECT url, title, type, category, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, language
FROM feeds FROM feeds
WHERE 1=1` WHERE 1=1`
if tld != "" { if tld != "" {
query += fmt.Sprintf(" AND tld = $%d", argNum) query += fmt.Sprintf(" AND domain_tld = $%d", argNum)
args = append(args, tld) args = append(args, tld)
argNum++ argNum++
} }
if domain != "" { if domain != "" {
query += fmt.Sprintf(" AND source_host = $%d", argNum) // Parse domain into host and tld parts
args = append(args, domain) domainHost := stripTLD(domain)
argNum++ domainTLD := getTLD(domain)
query += fmt.Sprintf(" AND domain_host = $%d AND domain_tld = $%d", argNum, argNum+1)
args = append(args, domainHost, domainTLD)
argNum += 2
} }
if status != "" { if status != "" {
query += fmt.Sprintf(" AND status = $%d", argNum) query += fmt.Sprintf(" AND status = $%d", argNum)
+8 -6
View File
@@ -150,7 +150,7 @@ func (c *Crawler) handleAPIPublishDenied(w http.ResponseWriter, r *http.Request)
result = append(result, FeedDeniedInfo{ result = append(result, FeedDeniedInfo{
URL: f.URL, URL: f.URL,
Title: f.Title, Title: f.Title,
SourceHost: f.SourceHost, SourceHost: fullHost(f.DomainHost, f.DomainTLD),
}) })
} }
@@ -193,7 +193,7 @@ func (c *Crawler) handleAPIPublishCandidates(w http.ResponseWriter, r *http.Requ
URL: f.URL, URL: f.URL,
Title: f.Title, Title: f.Title,
Category: f.Category, Category: f.Category,
SourceHost: f.SourceHost, SourceHost: fullHost(f.DomainHost, f.DomainTLD),
ItemCount: f.ItemCount, ItemCount: f.ItemCount,
DerivedHandle: DeriveHandleFromFeed(f.URL), DerivedHandle: DeriveHandleFromFeed(f.URL),
}) })
@@ -346,9 +346,10 @@ func (c *Crawler) ensureFeedAccountExists(feedURL, account string) (bool, error)
// Set up profile // Set up profile
feed, _ := c.getFeed(feedURL) feed, _ := c.getFeed(feedURL)
if feed != nil { if feed != nil {
sourceHost := fullHost(feed.DomainHost, feed.DomainTLD)
displayName := feed.Title displayName := feed.Title
if displayName == "" { if displayName == "" {
displayName = feed.SourceHost displayName = sourceHost
} }
description := feed.Description description := feed.Description
if description == "" { if description == "" {
@@ -366,7 +367,7 @@ func (c *Crawler) ensureFeedAccountExists(feedURL, account string) (bool, error)
// Try to fetch favicon // Try to fetch favicon
var avatar *BlobRef var avatar *BlobRef
faviconData, mimeType, err := FetchFaviconBytes(feed.SourceHost) faviconData, mimeType, err := FetchFaviconBytes(sourceHost)
if err == nil && len(faviconData) > 0 { if err == nil && len(faviconData) > 0 {
avatar, _ = publisher.UploadBlob(session, faviconData, mimeType) avatar, _ = publisher.UploadBlob(session, faviconData, mimeType)
} }
@@ -819,15 +820,16 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
fmt.Printf("Created account: %s (%s)\n", session.Handle, session.DID) fmt.Printf("Created account: %s (%s)\n", session.Handle, session.DID)
// Set up profile with feed title and favicon // Set up profile with feed title and favicon
sourceHost := fullHost(feed.DomainHost, feed.DomainTLD)
displayName := feed.Title displayName := feed.Title
if displayName == "" { if displayName == "" {
displayName = feed.SourceHost displayName = sourceHost
} }
description := feed.Description description := feed.Description
// Try to fetch favicon for avatar // Try to fetch favicon for avatar
var avatar *BlobRef var avatar *BlobRef
faviconData, mimeType, err := FetchFaviconBytes(feed.SourceHost) faviconData, mimeType, err := FetchFaviconBytes(sourceHost)
if err == nil && len(faviconData) > 0 { if err == nil && len(faviconData) > 0 {
avatar, err = publisher.UploadBlob(session, faviconData, mimeType) avatar, err = publisher.UploadBlob(session, faviconData, mimeType)
if err != nil { if err != nil {
+5 -5
View File
@@ -138,17 +138,17 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
return url, sf, true return url, sf, true
} }
// Search feeds by source_host (LIKE search for domain matching) // Search feeds by domain_host (LIKE search for domain matching)
// Use LOWER() to leverage trigram index // Use LOWER() to leverage trigram index
lowerPattern := "%" + strings.ToLower(query) + "%" lowerPattern := "%" + strings.ToLower(query) + "%"
hostRows, err := c.db.Query(` hostRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld,
item_count, oldest_item_date, newest_item_date, no_update item_count, oldest_item_date, newest_item_date, no_update
FROM feeds FROM feeds
WHERE LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1 WHERE LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1
LIMIT $2 LIMIT $2
`, lowerPattern, limit) `, lowerPattern, limit)
if err == nil { if err == nil {
@@ -168,7 +168,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld,
item_count, oldest_item_date, newest_item_date, no_update item_count, oldest_item_date, newest_item_date, no_update
FROM feeds FROM feeds
WHERE search_vector @@ to_tsquery('english', $1) WHERE search_vector @@ to_tsquery('english', $1)
@@ -243,7 +243,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SELECT type, category, title, description, language, site_url, SELECT type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld,
item_count, oldest_item_date, newest_item_date, no_update item_count, oldest_item_date, newest_item_date, no_update
FROM feeds WHERE url = $1 FROM feeds WHERE url = $1
`, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl, `, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl,
+25 -5
View File
@@ -2,6 +2,7 @@ package main
import ( import (
"context" "context"
"crypto/tls"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@@ -42,16 +43,35 @@ func NewCrawler(connString string) (*Crawler, error) {
return nil, fmt.Errorf("failed to open database: %v", err) return nil, fmt.Errorf("failed to open database: %v", err)
} }
// Custom transport with longer timeouts (HTTP/2 disabled for compatibility)
transport := &http.Transport{
TLSClientConfig: &tls.Config{
MinVersion: tls.VersionTLS12,
NextProtos: []string{"http/1.1"}, // Force HTTP/1.1 for compatibility
},
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
ForceAttemptHTTP2: false,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 30 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
ResponseHeaderTimeout: 60 * time.Second,
}
return &Crawler{ return &Crawler{
MaxDepth: 10, MaxDepth: 10,
MaxPagesPerHost: 10, MaxPagesPerHost: 10,
Timeout: 10 * time.Second, Timeout: 60 * time.Second,
UserAgent: "FeedCrawler/1.0", UserAgent: "Mozilla/5.0 (compatible; FeedCrawler/1.0; +https://1440.news)",
startTime: time.Now(), startTime: time.Now(),
db: db, db: db,
shutdownCh: make(chan struct{}), shutdownCh: make(chan struct{}),
client: &http.Client{ client: &http.Client{
Timeout: 10 * time.Second, Timeout: 60 * time.Second,
Transport: transport,
CheckRedirect: func(req *http.Request, via []*http.Request) error { CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 10 { if len(via) >= 10 {
return fmt.Errorf("stopped after 10 redirects") return fmt.Errorf("stopped after 10 redirects")
@@ -347,7 +367,7 @@ type FeedInfo struct {
func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo { func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo {
var title, description, siteURL, sourceHost *string var title, description, siteURL, sourceHost *string
err := c.db.QueryRow(` err := c.db.QueryRow(`
SELECT title, description, site_url, source_host FROM feeds WHERE url = $1 SELECT title, description, site_url, domain_host || '.' || domain_tld as source_host FROM feeds WHERE url = $1
`, feedURL).Scan(&title, &description, &siteURL, &sourceHost) `, feedURL).Scan(&title, &description, &siteURL, &sourceHost)
if err != nil { if err != nil {
return nil return nil
@@ -363,7 +383,7 @@ func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo {
// RefreshAllProfiles updates profiles for all existing accounts with feed URLs // RefreshAllProfiles updates profiles for all existing accounts with feed URLs
func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) { func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) {
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, title, description, site_url, source_host, publish_account SELECT url, title, description, site_url, domain_host || '.' || domain_tld as source_host, publish_account
FROM feeds FROM feeds
WHERE publish_account IS NOT NULL AND publish_account <> '' WHERE publish_account IS NOT NULL AND publish_account <> ''
`) `)
+3 -3
View File
@@ -92,9 +92,9 @@ func (c *Crawler) UpdateStats() {
func (c *Crawler) fetchAllDomainsFromDB() []DomainStat { func (c *Crawler) fetchAllDomainsFromDB() []DomainStat {
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT tld, source_host, COUNT(*) as cnt FROM feeds SELECT domain_tld as tld, domain_host || '.' || domain_tld as source_host, COUNT(*) as cnt FROM feeds
GROUP BY tld, source_host GROUP BY domain_tld, domain_host
ORDER BY tld, source_host ORDER BY domain_tld, domain_host
`) `)
if err != nil { if err != nil {
fmt.Printf("fetchAllDomainsFromDB error: %v\n", err) fmt.Printf("fetchAllDomainsFromDB error: %v\n", err)
+28 -92
View File
@@ -36,14 +36,17 @@ CREATE INDEX IF NOT EXISTS idx_domains_host_trgm ON domains USING GIN(host gin_t
CREATE TABLE IF NOT EXISTS feeds ( CREATE TABLE IF NOT EXISTS feeds (
url TEXT PRIMARY KEY, url TEXT PRIMARY KEY,
domain_host TEXT NOT NULL,
domain_tld tld_enum NOT NULL,
type TEXT, type TEXT,
category TEXT DEFAULT 'main', category TEXT DEFAULT 'main',
title TEXT, title TEXT,
description TEXT, description TEXT,
language TEXT, language TEXT,
site_url TEXT, site_url TEXT,
source_url TEXT,
discovered_at TIMESTAMP NOT NULL, discovered_at TIMESTAMP NOT NULL DEFAULT NOW(),
last_checked_at TIMESTAMP, -- feed_check: when last checked for new items last_checked_at TIMESTAMP, -- feed_check: when last checked for new items
next_check_at TIMESTAMP, -- feed_check: when to next check next_check_at TIMESTAMP, -- feed_check: when to next check
last_build_date TIMESTAMP, last_build_date TIMESTAMP,
@@ -51,134 +54,67 @@ CREATE TABLE IF NOT EXISTS feeds (
etag TEXT, etag TEXT,
last_modified TEXT, last_modified TEXT,
status TEXT DEFAULT 'pass' CHECK(status IN ('hold', 'pass', 'skip')), status TEXT NOT NULL DEFAULT 'pass',
last_error TEXT, last_error TEXT,
last_error_at TIMESTAMP, last_error_at TIMESTAMP,
source_url TEXT, item_count INTEGER NOT NULL DEFAULT 0,
source_host TEXT,
tld TEXT,
item_count INTEGER,
oldest_item_date TIMESTAMP, oldest_item_date TIMESTAMP,
newest_item_date TIMESTAMP, newest_item_date TIMESTAMP,
no_update INTEGER DEFAULT 0, no_update INTEGER NOT NULL DEFAULT 0,
-- Publishing to PDS -- Publishing to PDS
publish_status TEXT DEFAULT 'hold' CHECK(publish_status IN ('hold', 'pass', 'skip')), publish_status TEXT NOT NULL DEFAULT 'hold',
publish_account TEXT, publish_account TEXT,
-- Full-text search vector FOREIGN KEY (domain_host, domain_tld) REFERENCES domains(host, tld)
search_vector tsvector GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(description, '')), 'B') ||
setweight(to_tsvector('english', coalesce(url, '')), 'C')
) STORED
); );
CREATE INDEX IF NOT EXISTS idx_feeds_source_host ON feeds(source_host); -- Indexes will be added as needed based on query patterns
CREATE INDEX IF NOT EXISTS idx_feeds_publish_status ON feeds(publish_status);
CREATE INDEX IF NOT EXISTS idx_feeds_source_host_url ON feeds(source_host, url);
CREATE INDEX IF NOT EXISTS idx_feeds_tld ON feeds(tld);
CREATE INDEX IF NOT EXISTS idx_feeds_tld_source_host ON feeds(tld, source_host);
CREATE INDEX IF NOT EXISTS idx_feeds_source_host_trgm ON feeds USING GIN(source_host gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_feeds_type ON feeds(type);
CREATE INDEX IF NOT EXISTS idx_feeds_category ON feeds(category);
CREATE INDEX IF NOT EXISTS idx_feeds_status ON feeds(status);
CREATE INDEX IF NOT EXISTS idx_feeds_discovered_at ON feeds(discovered_at);
CREATE INDEX IF NOT EXISTS idx_feeds_title ON feeds(title);
CREATE INDEX IF NOT EXISTS idx_feeds_search ON feeds USING GIN(search_vector);
-- idx_feeds_to_check created in migrations after column rename
CREATE TABLE IF NOT EXISTS items ( CREATE TABLE IF NOT EXISTS items (
id BIGSERIAL PRIMARY KEY, guid TEXT NOT NULL,
feed_url TEXT NOT NULL, feed_url TEXT NOT NULL REFERENCES feeds(url) ON DELETE CASCADE,
guid TEXT,
title TEXT, title TEXT,
link TEXT, link TEXT,
description TEXT, description TEXT,
content TEXT, content TEXT,
author TEXT, author TEXT,
pub_date TIMESTAMP, pub_date TIMESTAMP,
discovered_at TIMESTAMP NOT NULL, discovered_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP, updated_at TIMESTAMP,
-- Media attachments -- Media attachments
enclosure_url TEXT, enclosure_url TEXT,
enclosure_type TEXT, enclosure_type TEXT,
enclosure_length BIGINT, enclosure_length BIGINT,
image_urls TEXT, -- JSON array of image URLs image_urls JSONB,
tags TEXT, -- JSON array of category/tag strings tags JSONB,
-- Publishing to PDS -- Publishing to PDS
published_at TIMESTAMP, published_at TIMESTAMP,
published_uri TEXT, published_uri TEXT,
-- Full-text search vector PRIMARY KEY (guid, feed_url)
search_vector tsvector GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(description, '')), 'B') ||
setweight(to_tsvector('english', coalesce(content, '')), 'C') ||
setweight(to_tsvector('english', coalesce(author, '')), 'D')
) STORED,
UNIQUE(feed_url, guid)
); );
CREATE INDEX IF NOT EXISTS idx_items_feed_url ON items(feed_url); -- Indexes will be added as needed based on query patterns
CREATE INDEX IF NOT EXISTS idx_items_pub_date ON items(pub_date DESC);
CREATE INDEX IF NOT EXISTS idx_items_link ON items(link);
CREATE INDEX IF NOT EXISTS idx_items_feed_url_pub_date ON items(feed_url, pub_date DESC);
CREATE INDEX IF NOT EXISTS idx_items_unpublished ON items(feed_url, published_at) WHERE published_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_items_search ON items USING GIN(search_vector);
-- URL Shortener tables -- OAuth sessions
CREATE TABLE IF NOT EXISTS short_urls ( CREATE TABLE IF NOT EXISTS sessions (
code TEXT PRIMARY KEY,
original_url TEXT NOT NULL,
item_id BIGINT REFERENCES items(id),
feed_url TEXT,
created_at TIMESTAMP NOT NULL DEFAULT (NOW() AT TIME ZONE 'UTC'),
click_count INTEGER DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_short_urls_original ON short_urls(original_url);
CREATE INDEX IF NOT EXISTS idx_short_urls_item_id ON short_urls(item_id);
CREATE INDEX IF NOT EXISTS idx_short_urls_feed_url ON short_urls(feed_url);
CREATE TABLE IF NOT EXISTS clicks (
id BIGSERIAL PRIMARY KEY,
short_code TEXT NOT NULL REFERENCES short_urls(code),
clicked_at TIMESTAMP NOT NULL DEFAULT (NOW() AT TIME ZONE 'UTC'),
referrer TEXT,
user_agent TEXT,
ip_hash TEXT,
country TEXT
);
CREATE INDEX IF NOT EXISTS idx_clicks_short_code ON clicks(short_code);
CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON clicks(clicked_at DESC);
-- OAuth sessions (persisted for login persistence across deploys)
CREATE TABLE IF NOT EXISTS oauth_sessions (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
did TEXT NOT NULL, did TEXT NOT NULL,
handle TEXT NOT NULL, handle TEXT NOT NULL,
created_at TIMESTAMP NOT NULL, access_token TEXT NOT NULL,
expires_at TIMESTAMP NOT NULL,
access_token TEXT,
refresh_token TEXT, refresh_token TEXT,
token_expiry TIMESTAMP, token_type TEXT NOT NULL DEFAULT 'DPoP',
dpop_private_jwk TEXT, expires_at TIMESTAMP NOT NULL,
dpop_authserver_nonce TEXT, created_at TIMESTAMP NOT NULL DEFAULT NOW(),
dpop_pds_nonce TEXT, dpop_nonce TEXT,
pds_url TEXT, dpop_private_jwk TEXT
authserver_iss TEXT
); );
CREATE INDEX IF NOT EXISTS idx_oauth_sessions_expires_at ON oauth_sessions(expires_at);
-- Trigger to normalize feed URLs on insert/update (strips https://, http://, www.) -- Trigger to normalize feed URLs on insert/update (strips https://, http://, www.)
CREATE OR REPLACE FUNCTION normalize_feed_url() CREATE OR REPLACE FUNCTION normalize_feed_url()
RETURNS TRIGGER AS $$ RETURNS TRIGGER AS $$
@@ -212,8 +148,8 @@ func OpenDatabase(connString string) (*DB, error) {
// Build from individual env vars // Build from individual env vars
host := getEnvOrDefault("DB_HOST", "atproto-postgres") host := getEnvOrDefault("DB_HOST", "atproto-postgres")
port := getEnvOrDefault("DB_PORT", "5432") port := getEnvOrDefault("DB_PORT", "5432")
user := getEnvOrDefault("DB_USER", "news_1440") user := getEnvOrDefault("DB_USER", "dba_1440_news")
dbname := getEnvOrDefault("DB_NAME", "news_1440") dbname := getEnvOrDefault("DB_NAME", "db_1440_news")
// Support Docker secrets (password file) or direct password // Support Docker secrets (password file) or direct password
password := os.Getenv("DB_PASSWORD") password := os.Getenv("DB_PASSWORD")
@@ -271,7 +207,7 @@ func OpenDatabase(connString string) (*DB, error) {
// Indexes must match LOWER() used in queries // Indexes must match LOWER() used in queries
pool.Exec(ctx, "CREATE EXTENSION IF NOT EXISTS pg_trgm") pool.Exec(ctx, "CREATE EXTENSION IF NOT EXISTS pg_trgm")
pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_domains_host_trgm ON domains USING gin (LOWER(host) gin_trgm_ops)") pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_domains_host_trgm ON domains USING gin (LOWER(host) gin_trgm_ops)")
pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_feeds_source_host_trgm ON feeds USING gin (LOWER(source_host) gin_trgm_ops)") pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_feeds_domain_host_trgm ON feeds USING gin (LOWER(domain_host) gin_trgm_ops)")
// Migration: rename feed columns for consistent terminology // Migration: rename feed columns for consistent terminology
// last_crawled_at -> last_checked_at (feed_check = checking feeds for new items) // last_crawled_at -> last_checked_at (feed_check = checking feeds for new items)
+22 -22
View File
@@ -116,8 +116,8 @@ type Feed struct {
// Discovery source // Discovery source
SourceURL string `json:"source_url,omitempty"` SourceURL string `json:"source_url,omitempty"`
SourceHost string `json:"source_host,omitempty"` DomainHost string `json:"domain_host,omitempty"`
TLD string `json:"tld,omitempty"` DomainTLD string `json:"domain_tld,omitempty"`
// Content stats // Content stats
ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check
@@ -139,7 +139,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
// Auto-pass feeds from our own domain // Auto-pass feeds from our own domain
publishStatus := feed.PublishStatus publishStatus := feed.PublishStatus
if publishStatus == "" { if publishStatus == "" {
if strings.HasSuffix(feed.SourceHost, "1440.news") || feed.SourceHost == "1440.news" { if strings.HasSuffix(feed.DomainHost, "1440.news") || feed.DomainHost == "1440.news" {
publishStatus = "pass" publishStatus = "pass"
} else if feed.Language == "" || (feed.Language != "en" && !strings.HasPrefix(feed.Language, "en-")) { } else if feed.Language == "" || (feed.Language != "en" && !strings.HasPrefix(feed.Language, "en-")) {
publishStatus = "skip" publishStatus = "skip"
@@ -156,7 +156,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
@@ -188,7 +188,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
feed.DiscoveredAt, NullableTime(feed.LastCheckedAt), NullableTime(feed.NextCheckAt), NullableTime(feed.LastBuildDate), feed.DiscoveredAt, NullableTime(feed.LastCheckedAt), NullableTime(feed.NextCheckAt), NullableTime(feed.LastBuildDate),
NullableString(feed.ETag), NullableString(feed.LastModified), NullableString(feed.ETag), NullableString(feed.LastModified),
feed.Status, NullableString(feed.LastError), NullableTime(feed.LastErrorAt), feed.Status, NullableString(feed.LastError), NullableTime(feed.LastErrorAt),
NullableString(feed.SourceURL), NullableString(feed.SourceHost), NullableString(feed.TLD), NullableString(feed.SourceURL), NullableString(feed.DomainHost), NullableString(feed.DomainTLD),
feed.ItemCount, NullableTime(feed.OldestItemDate), NullableTime(feed.NewestItemDate), feed.ItemCount, NullableTime(feed.OldestItemDate), NullableTime(feed.NewestItemDate),
feed.NoUpdate, feed.NoUpdate,
publishStatus, NullableString(feed.PublishAccount), publishStatus, NullableString(feed.PublishAccount),
@@ -201,7 +201,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
feed := &Feed{} feed := &Feed{}
var category, title, description, language, siteURL *string var category, title, description, language, siteURL *string
var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string var etag, lastModified, lastError, sourceURL, domainHost, domainTLD *string
var publishStatus, publishAccount *string var publishStatus, publishAccount *string
var itemCount, noUpdate *int var itemCount, noUpdate *int
@@ -210,7 +210,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
@@ -220,7 +220,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
&feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, &feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&etag, &lastModified, &etag, &lastModified,
&feed.Status, &lastError, &lastErrorAt, &feed.Status, &lastError, &lastErrorAt,
&sourceURL, &sourceHost, &tld, &sourceURL, &domainHost, &domainTLD,
&itemCount, &oldestItemDate, &newestItemDate, &itemCount, &oldestItemDate, &newestItemDate,
&noUpdate, &noUpdate,
&publishStatus, &publishAccount, &publishStatus, &publishAccount,
@@ -251,8 +251,8 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
feed.LastError = StringValue(lastError) feed.LastError = StringValue(lastError)
feed.LastErrorAt = TimeValue(lastErrorAt) feed.LastErrorAt = TimeValue(lastErrorAt)
feed.SourceURL = StringValue(sourceURL) feed.SourceURL = StringValue(sourceURL)
feed.SourceHost = StringValue(sourceHost) feed.DomainHost = StringValue(domainHost)
feed.TLD = StringValue(tld) feed.DomainTLD = StringValue(domainTLD)
if itemCount != nil { if itemCount != nil {
feed.ItemCount = *itemCount feed.ItemCount = *itemCount
} }
@@ -285,7 +285,7 @@ func (c *Crawler) GetAllFeeds() ([]*Feed, error) {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
@@ -309,7 +309,7 @@ func (c *Crawler) GetFeedCount() (int, error) {
// GetFeedCountByHost returns the number of feeds for a specific host // GetFeedCountByHost returns the number of feeds for a specific host
func (c *Crawler) GetFeedCountByHost(host string) (int, error) { func (c *Crawler) GetFeedCountByHost(host string) (int, error) {
var count int var count int
err := c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE source_host = $1", host).Scan(&count) err := c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE domain_host = $1", host).Scan(&count)
return count, err return count, err
} }
@@ -320,7 +320,7 @@ func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
@@ -344,11 +344,11 @@ func (c *Crawler) GetFeedsByHost(host string) ([]*Feed, error) {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
FROM feeds WHERE source_host = $1 FROM feeds WHERE domain_host = $1
`, host) `, host)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -366,7 +366,7 @@ func (c *Crawler) SearchFeeds(query string) ([]*Feed, error) {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
@@ -390,7 +390,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
feed := &Feed{} feed := &Feed{}
var feedType, category, title, description, language, siteURL *string var feedType, category, title, description, language, siteURL *string
var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string var etag, lastModified, lastError, sourceURL, domainHost, domainTLD *string
var itemCount, noUpdate *int var itemCount, noUpdate *int
var status *string var status *string
var publishStatus, publishAccount *string var publishStatus, publishAccount *string
@@ -400,7 +400,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
&feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, &feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&etag, &lastModified, &etag, &lastModified,
&status, &lastError, &lastErrorAt, &status, &lastError, &lastErrorAt,
&sourceURL, &sourceHost, &tld, &sourceURL, &domainHost, &domainTLD,
&itemCount, &oldestItemDate, &newestItemDate, &itemCount, &oldestItemDate, &newestItemDate,
&noUpdate, &noUpdate,
&publishStatus, &publishAccount, &publishStatus, &publishAccount,
@@ -428,8 +428,8 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
feed.LastError = StringValue(lastError) feed.LastError = StringValue(lastError)
feed.LastErrorAt = TimeValue(lastErrorAt) feed.LastErrorAt = TimeValue(lastErrorAt)
feed.SourceURL = StringValue(sourceURL) feed.SourceURL = StringValue(sourceURL)
feed.SourceHost = StringValue(sourceHost) feed.DomainHost = StringValue(domainHost)
feed.TLD = StringValue(tld) feed.DomainTLD = StringValue(domainTLD)
if itemCount != nil { if itemCount != nil {
feed.ItemCount = *itemCount feed.ItemCount = *itemCount
} }
@@ -474,7 +474,7 @@ func (c *Crawler) GetFeedsByPublishStatus(status string) ([]*Feed, error) {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
@@ -496,7 +496,7 @@ func (c *Crawler) GetPublishCandidates(limit int) ([]*Feed, error) {
discovered_at, last_checked_at, next_check_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, domain_host, domain_tld,
item_count, oldest_item_date, newest_item_date, item_count, oldest_item_date, newest_item_date,
no_update, no_update,
publish_status, publish_account publish_status, publish_account
+4 -4
View File
@@ -33,8 +33,8 @@ func (c *Crawler) processFeed(feedURL, sourceHost, body string, headers http.Hea
DiscoveredAt: now, DiscoveredAt: now,
LastCheckedAt: now, LastCheckedAt: now,
Status: "pass", Status: "pass",
SourceHost: sourceHost, DomainHost: getDomainHost(sourceHost),
TLD: getTLD(sourceHost), DomainTLD: getTLD(sourceHost),
ETag: headers.Get("ETag"), ETag: headers.Get("ETag"),
LastModified: headers.Get("Last-Modified"), LastModified: headers.Get("Last-Modified"),
} }
@@ -90,8 +90,8 @@ func (c *Crawler) addFeed(feedURL, feedType, sourceHost, sourceURL string) {
DiscoveredAt: now, DiscoveredAt: now,
Status: "pass", Status: "pass",
SourceURL: normalizeURL(sourceURL), SourceURL: normalizeURL(sourceURL),
SourceHost: sourceHost, DomainHost: getDomainHost(sourceHost),
TLD: getTLD(sourceHost), DomainTLD: getTLD(sourceHost),
NextCheckAt: now, // Should be crawled immediately NextCheckAt: now, // Should be crawled immediately
} }
+2 -2
View File
@@ -445,8 +445,8 @@ const dashboardHTML = `<!DOCTYPE html>
<title>1440.news Feed Crawler</title> <title>1440.news Feed Crawler</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/static/dashboard.css?v=1769990750"> <link rel="stylesheet" href="/static/dashboard.css?v=1769995130">
<script src="/static/dashboard.js?v=1769990750"></script> <script src="/static/dashboard.js?v=1769995130"></script>
</head> </head>
<body> <body>
<div id="topSection"> <div id="topSection">
+6
View File
@@ -61,6 +61,12 @@ func stripTLD(host string) string {
return host return host
} }
// getDomainHost extracts the host part from a full domain (without TLD)
// e.g., "npr.org" -> "npr", "bbc.co.uk" -> "bbc.co"
func getDomainHost(domain string) string {
return stripTLD(domain)
}
// fullHost reconstructs the full hostname from host and tld // fullHost reconstructs the full hostname from host and tld
// e.g., ("example", "com") -> "example.com" // e.g., ("example", "com") -> "example.com"
func fullHost(host, tld string) string { func fullHost(host, tld string) string {