diff --git a/api_domains.go b/api_domains.go deleted file mode 100644 index da1c0f8..0000000 --- a/api_domains.go +++ /dev/null @@ -1,2068 +0,0 @@ -package main - -import ( - "bufio" - "encoding/json" - "fmt" - "net/http" - "os" - "strings" - - "github.com/jackc/pgx/v5" -) - -// buildTLDSearchQuery builds a query to get TLDs based on search type -// Returns (query, args) for the database query -func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) { - pattern := "%" + strings.ToLower(sq.Pattern) + "%" - - switch sq.Type { - case "domain": - // Check if pattern includes TLD (e.g., d:npr.org -> exact match) - hostPart, tldFilter := parseSearchTerm(sq.Pattern) - if tldFilter != "" { - // Exact match - return just the matching TLD - return ` - SELECT tld::text as tld, COUNT(*) as domain_count - FROM domains - WHERE tld = $1 AND LOWER(host) = $2 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{tldFilter, strings.ToLower(hostPart)} - } - // Pattern match - search all TLDs - return ` - SELECT tld::text as tld, COUNT(*) as domain_count - FROM domains - WHERE LOWER(host) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "url": - // Search feed URL paths (after domain) - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count - FROM feeds - WHERE tld IS NOT NULL AND LOWER(url) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "title": - // Search feed titles - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count - FROM feeds - WHERE tld IS NOT NULL AND LOWER(title) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "description": - // Search feed descriptions - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count - FROM feeds - WHERE tld IS NOT NULL AND LOWER(description) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "item": - // Search item titles - return ` - SELECT f.tld, COUNT(DISTINCT f.domain_host || '.' || f.domain_tld) as domain_count - FROM feeds f - INNER JOIN items i ON i.feed_url = f.url - WHERE f.tld IS NOT NULL AND LOWER(i.title) LIKE $1 - GROUP BY f.tld - ORDER BY f.tld ASC - `, []interface{}{pattern} - - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if sq.DomainHost != "" && sq.DomainTLD != "" { - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM ( - -- Domains matching host pattern - SELECT tld::text as tld, host || '.' || tld as source_host - FROM domains WHERE LOWER(host) LIKE $1 - UNION - -- Exact domain match - SELECT tld::text as tld, host || '.' || tld as source_host - FROM domains WHERE LOWER(host) = $2 AND tld::text = $3 - UNION - -- Feeds matching URL - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(url) LIKE $1 - UNION - -- Feeds matching title - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(title) LIKE $1 - UNION - -- Feeds matching description - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(description) LIKE $1 - ) combined - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - } - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM ( - -- Domains matching host - SELECT tld::text as tld, host || '.' || tld as source_host - FROM domains WHERE LOWER(host) LIKE $1 - UNION - -- Feeds matching URL - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(url) LIKE $1 - UNION - -- Feeds matching title - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(title) LIKE $1 - UNION - -- Feeds matching description - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(description) LIKE $1 - ) combined - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - } -} - -// buildDomainSearchQuery builds a query to get domains based on search type -// Returns (whereClause, args, argNum) to append to the base query -func buildDomainSearchQuery(sq SearchQuery, tldFilter string, argNum int) (string, []interface{}, int) { - pattern := "%" + strings.ToLower(sq.Pattern) + "%" - var where string - var args []interface{} - - switch sq.Type { - case "domain": - if sq.ExactMatch && tldFilter != "" { - // d:npr.org -> exact match - where = fmt.Sprintf(" AND d.tld = $%d AND LOWER(d.host) = $%d", argNum, argNum+1) - args = []interface{}{tldFilter, strings.ToLower(sq.Pattern)} - argNum += 2 - } else if tldFilter != "" { - where = fmt.Sprintf(" AND d.tld = $%d AND LOWER(d.host) LIKE $%d", argNum, argNum+1) - args = []interface{}{tldFilter, pattern} - argNum += 2 - } else { - where = fmt.Sprintf(" AND LOWER(d.host) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - } - - case "url": - where = fmt.Sprintf(" AND LOWER(f.url) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - case "title": - where = fmt.Sprintf(" AND LOWER(f.title) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - case "description": - where = fmt.Sprintf(" AND LOWER(f.description) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - case "item": - // Need to join items - handled separately - where = fmt.Sprintf(" AND EXISTS (SELECT 1 FROM items i WHERE i.feed_url = f.url AND LOWER(i.title) LIKE $%d)", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - default: - // "all" - search everything, also include exact domain match if pattern looks like a domain - if tldFilter != "" { - if sq.DomainHost != "" && sq.DomainTLD != "" { - where = fmt.Sprintf(` AND d.tld = $%d AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d OR - (LOWER(d.host) = $%d AND d.tld::text = $%d) - )`, argNum, argNum+1, argNum+1, argNum+1, argNum+1, argNum+2, argNum+3) - args = []interface{}{tldFilter, pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - argNum += 4 - } else { - where = fmt.Sprintf(` AND d.tld = $%d AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d - )`, argNum, argNum+1, argNum+1, argNum+1, argNum+1) - args = []interface{}{tldFilter, pattern} - argNum += 2 - } - } else { - if sq.DomainHost != "" && sq.DomainTLD != "" { - where = fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d OR - (LOWER(d.host) = $%d AND d.tld::text = $%d) - )`, argNum, argNum, argNum, argNum, argNum+1, argNum+2) - args = []interface{}{pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - argNum += 3 - } else { - where = fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d - )`, argNum, argNum, argNum, argNum) - args = []interface{}{pattern} - argNum++ - } - } - } - - return where, args, argNum -} - -func (c *Crawler) handleAPIAllDomains(w http.ResponseWriter, r *http.Request) { - offset := 0 - limit := 100 - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 100 { - limit = 100 - } - } - - // Serve from cache (updated once per minute in background) - c.statsMu.RLock() - cached := c.cachedAllDomains - c.statsMu.RUnlock() - - var domains []DomainStat - if cached != nil && offset < len(cached) { - end := offset + limit - if end > len(cached) { - end = len(cached) - } - domains = cached[offset:end] - } - if domains == nil { - domains = []DomainStat{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -// handleAPIDomains lists domains with optional status filter, including their feeds -func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") - hasFeeds := r.URL.Query().Get("has_feeds") == "true" - search := r.URL.Query().Get("search") - tldFilter := r.URL.Query().Get("tld") - feedMode := r.URL.Query().Get("feedMode") // include or exclude - feedStatuses := r.URL.Query().Get("feedStatuses") // comma-separated - feedTypes := r.URL.Query().Get("feedTypes") // comma-separated - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - // Parse comma-separated values - var statusList, typeList []string - if feedStatuses != "" { - statusList = strings.Split(feedStatuses, ",") - } - if feedTypes != "" { - typeList = strings.Split(feedTypes, ",") - } - - // Parse search prefix for type-specific searching - var searchQuery SearchQuery - if search != "" { - searchQuery = parseSearchPrefix(search) - // Only extract TLD for domain searches (d:npr.org -> exact match for npr.org) - // All other searches use the literal pattern - if searchQuery.Type == "domain" { - hostPart, detectedTLD := parseSearchTerm(searchQuery.Pattern) - if detectedTLD != "" { - searchQuery.Pattern = hostPart - searchQuery.ExactMatch = true // d:npr.org matches exactly npr.org - if tldFilter == "" { - tldFilter = detectedTLD - } - } - } - } - - // First get domains - var rows pgx.Rows - var err error - - // If feed filter is specified, query domains that have matching feeds - if len(statusList) > 0 || len(typeList) > 0 || feedMode != "" { - // Build dynamic query to get domains with matching feeds - query := ` - SELECT DISTINCT d.host, d.tld, d.status, d.last_error, d.feeds_found - FROM domains d - INNER JOIN feeds f ON f.domain_host = d.host AND f.domain_tld = d.tld - WHERE 1=1` - args := []interface{}{} - argNum := 1 - - if tldFilter != "" { - query += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - if status != "" { - query += fmt.Sprintf(" AND d.status = $%d", argNum) - args = append(args, status) - argNum++ - } - - // Handle status filters (publish_status for pass/skip/hold/dead) - if len(statusList) > 0 { - if feedMode == "exclude" { - query += fmt.Sprintf(" AND (f.publish_status IS NULL OR f.publish_status NOT IN (SELECT unnest($%d::text[])))", argNum) - } else { - query += fmt.Sprintf(" AND f.publish_status IN (SELECT unnest($%d::text[]))", argNum) - } - args = append(args, statusList) - argNum++ - } - - // Handle type filters (including special "empty" type) - if len(typeList) > 0 { - hasEmpty := false - var regularTypes []string - for _, t := range typeList { - if t == "empty" { - hasEmpty = true - } else { - regularTypes = append(regularTypes, t) - } - } - - if feedMode == "exclude" { - // Exclude mode - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[]))) AND f.item_count > 0", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[])))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND f.item_count > 0" - } - } else { - // Include mode - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND (f.type IN (SELECT unnest($%d::text[])) OR f.item_count IS NULL OR f.item_count = 0)", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND f.type IN (SELECT unnest($%d::text[]))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND (f.item_count IS NULL OR f.item_count = 0)" - } - } - } - - if search != "" && searchQuery.Pattern != "" { - searchPattern := "%" + strings.ToLower(searchQuery.Pattern) + "%" - switch searchQuery.Type { - case "domain": - if searchQuery.ExactMatch { - // d:npr.org -> exact match for host "npr" (tld already filtered above) - query += fmt.Sprintf(" AND LOWER(d.host) = $%d", argNum) - args = append(args, strings.ToLower(searchQuery.Pattern)) - } else { - // d:npr -> pattern match - query += fmt.Sprintf(" AND LOWER(d.host) LIKE $%d", argNum) - args = append(args, searchPattern) - } - argNum++ - case "url": - query += fmt.Sprintf(" AND LOWER(f.url) LIKE $%d", argNum) - args = append(args, searchPattern) - argNum++ - case "title": - query += fmt.Sprintf(" AND LOWER(f.title) LIKE $%d", argNum) - args = append(args, searchPattern) - argNum++ - case "description": - query += fmt.Sprintf(" AND LOWER(f.description) LIKE $%d", argNum) - args = append(args, searchPattern) - argNum++ - case "item": - query += fmt.Sprintf(" AND EXISTS (SELECT 1 FROM items i WHERE i.feed_url = f.url AND LOWER(i.title) LIKE $%d)", argNum) - args = append(args, searchPattern) - argNum++ - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if searchQuery.DomainHost != "" && searchQuery.DomainTLD != "" { - query += fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d OR - (LOWER(d.host) = $%d AND d.tld::text = $%d) - )`, argNum, argNum, argNum, argNum, argNum+1, argNum+2) - args = append(args, searchPattern, strings.ToLower(searchQuery.DomainHost), strings.ToLower(searchQuery.DomainTLD)) - argNum += 3 - } else { - query += fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d - )`, argNum, argNum, argNum, argNum) - args = append(args, searchPattern) - argNum++ - } - } - } - query += fmt.Sprintf(" ORDER BY d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - args = append(args, limit, offset) - - rows, err = c.db.Query(query, args...) - } else if hasFeeds { - // Only domains with feeds - searchPattern := "%" + strings.ToLower(search) + "%" - if tldFilter != "" && status != "" { - // Filter by specific TLD and status - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.tld = $1 AND d.status = $2 - ORDER BY d.host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, status, limit, offset) - } else if tldFilter != "" { - // Filter by specific TLD only (exclude 'skip' by default) - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status != 'skip' AND d.tld = $1 - ORDER BY d.host ASC - LIMIT $2 OFFSET $3 - `, tldFilter, limit, offset) - } else if search != "" { - // Search in domain host only (uses trigram index) - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status != 'skip' AND LOWER(d.host) LIKE $1 - ORDER BY d.tld ASC, d.host ASC - LIMIT $2 OFFSET $3 - `, searchPattern, limit, offset) - } else if status != "" { - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status = $1 - ORDER BY d.tld ASC, d.host ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - } else { - // Default: exclude 'skip' status domains - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status != 'skip' - ORDER BY d.tld ASC, d.host ASC - LIMIT $1 OFFSET $2 - `, limit, offset) - } - } else if tldFilter != "" && search != "" && status != "" { - // Filter by TLD, status, and search - if searchQuery.ExactMatch { - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 AND LOWER(host) = $3 - ORDER BY host ASC - LIMIT $4 OFFSET $5 - `, tldFilter, status, strings.ToLower(searchQuery.Pattern), limit, offset) - } else if searchQuery.DomainHost != "" && strings.ToLower(searchQuery.DomainTLD) == strings.ToLower(tldFilter) { - // Domain-like search with matching TLD - search for exact host - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 AND LOWER(host) = $3 - ORDER BY host ASC - LIMIT $4 OFFSET $5 - `, tldFilter, status, strings.ToLower(searchQuery.DomainHost), limit, offset) - } else { - searchPattern := "%" + strings.ToLower(searchQuery.Pattern) + "%" - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 AND LOWER(host) LIKE $3 - ORDER BY host ASC - LIMIT $4 OFFSET $5 - `, tldFilter, status, searchPattern, limit, offset) - } - } else if tldFilter != "" && search != "" { - // Filter by TLD and search - // If search looks like a domain with matching TLD, use DomainHost for exact/pattern match - if searchQuery.ExactMatch { - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND LOWER(host) = $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, strings.ToLower(searchQuery.Pattern), limit, offset) - } else if searchQuery.DomainHost != "" && strings.ToLower(searchQuery.DomainTLD) == strings.ToLower(tldFilter) { - // Domain-like search with matching TLD - search for exact host or pattern - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND LOWER(host) = $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, strings.ToLower(searchQuery.DomainHost), limit, offset) - } else { - searchPattern := "%" + strings.ToLower(searchQuery.Pattern) + "%" - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND LOWER(host) LIKE $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, searchPattern, limit, offset) - } - } else if tldFilter != "" && status != "" { - // Filter by TLD and status - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, status, limit, offset) - } else if tldFilter != "" { - // Filter by TLD only (show all statuses) - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 - ORDER BY host ASC - LIMIT $2 OFFSET $3 - `, tldFilter, limit, offset) - } else if status != "" { - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE status = $1 - ORDER BY tld ASC, host ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - } else { - // Default: exclude 'skip' status domains - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE status != 'skip' - ORDER BY tld ASC, host ASC - LIMIT $1 OFFSET $2 - `, limit, offset) - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type,omitempty"` - Status string `json:"status,omitempty"` - PublishStatus string `json:"publish_status,omitempty"` - Language string `json:"language,omitempty"` - ItemCount int `json:"item_count,omitempty"` - } - - type DomainInfo struct { - Host string `json:"host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - Feeds []FeedInfo `json:"feeds,omitempty"` - } - - var domains []DomainInfo - var hosts []string - for rows.Next() { - var d DomainInfo - var tld, lastError *string - if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.TLD = StringValue(tld) - d.LastError = StringValue(lastError) - domains = append(domains, d) - // Build full domain for feed lookup (source_host = host.tld) - fullDomain := d.Host - if d.TLD != "" { - fullDomain = d.Host + "." + d.TLD - } - hosts = append(hosts, fullDomain) - } - - // Now get feeds for these domains (with actual item count from items table) - // Apply the same feed filters used for domain selection - if len(hosts) > 0 { - feedQuery := ` - SELECT f.domain_host || '.' || f.domain_tld as source_host, f.url, f.title, f.type, f.status, f.publish_status, f.language, - (SELECT COUNT(*) FROM items WHERE feed_url = f.url) as item_count - FROM feeds f - WHERE f.domain_host || '.' || f.domain_tld = ANY($1)` - feedArgs := []interface{}{hosts} - feedArgNum := 2 - - // Apply feed status filters (publish_status for pass/skip/hold/dead) - if len(statusList) > 0 { - if feedMode == "exclude" { - feedQuery += fmt.Sprintf(" AND (f.publish_status IS NULL OR f.publish_status NOT IN (SELECT unnest($%d::text[])))", feedArgNum) - } else { - feedQuery += fmt.Sprintf(" AND f.publish_status IN (SELECT unnest($%d::text[]))", feedArgNum) - } - feedArgs = append(feedArgs, statusList) - feedArgNum++ - } - - // Apply feed type filters (including special "empty" type) - if len(typeList) > 0 { - hasEmpty := false - var regularTypes []string - for _, t := range typeList { - if t == "empty" { - hasEmpty = true - } else { - regularTypes = append(regularTypes, t) - } - } - - if feedMode == "exclude" { - if len(regularTypes) > 0 && hasEmpty { - feedQuery += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[]))) AND f.item_count > 0", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if len(regularTypes) > 0 { - feedQuery += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[])))", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if hasEmpty { - feedQuery += " AND f.item_count > 0" - } - } else { - if len(regularTypes) > 0 && hasEmpty { - feedQuery += fmt.Sprintf(" AND (f.type IN (SELECT unnest($%d::text[])) OR f.item_count IS NULL OR f.item_count = 0)", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if len(regularTypes) > 0 { - feedQuery += fmt.Sprintf(" AND f.type IN (SELECT unnest($%d::text[]))", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if hasEmpty { - feedQuery += " AND (f.item_count IS NULL OR f.item_count = 0)" - } - } - } - - feedQuery += " ORDER BY f.domain_host, f.domain_tld, f.url" - - feedRows, err := c.db.Query(feedQuery, feedArgs...) - if err == nil { - defer feedRows.Close() - feedsByHost := make(map[string][]FeedInfo) - for feedRows.Next() { - var host string - var f FeedInfo - var title, feedType, status, publishStatus, language *string - var itemCount *int - if err := feedRows.Scan(&host, &f.URL, &title, &feedType, &status, &publishStatus, &language, &itemCount); err != nil { - continue - } - f.Title = StringValue(title) - f.Type = StringValue(feedType) - f.Status = StringValue(status) - f.PublishStatus = StringValue(publishStatus) - f.Language = StringValue(language) - if itemCount != nil { - f.ItemCount = *itemCount - } - feedsByHost[host] = append(feedsByHost[host], f) - } - // Attach feeds to domains (feedsByHost is keyed by full domain) - for i := range domains { - fullHost := domains[i].Host - if domains[i].TLD != "" { - fullHost = domains[i].Host + "." + domains[i].TLD - } - if feeds, ok := feedsByHost[fullHost]; ok { - domains[i].Feeds = feeds - } - } - } - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -func (c *Crawler) handleAPIDomainsByStatus(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") - if status == "" { - http.Error(w, "status parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - rows, err := c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE status = $1 - ORDER BY tld ASC, host ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type DomainInfo struct { - Host string `json:"host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - } - - var domains []DomainInfo - for rows.Next() { - var d DomainInfo - var tld, lastError *string - if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.TLD = StringValue(tld) - d.LastError = StringValue(lastError) - domains = append(domains, d) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - // Parse host into domain_host and domain_tld - domainHost := stripTLD(host) - domainTLD := getTLD(host) - - rows, err := c.db.Query(` - SELECT url, title, type, status, last_error, item_count, publish_status, language - FROM feeds - WHERE domain_host = $1 AND domain_tld = $2 - ORDER BY url ASC - LIMIT $3 OFFSET $4 - `, domainHost, domainTLD, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title"` - Type string `json:"type"` - Status string `json:"status,omitempty"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - PublishStatus string `json:"publish_status,omitempty"` - Language string `json:"language,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, status, lastError, publishStatus, language *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &status, &lastError, &itemCount, &publishStatus, &language); err != nil { - continue - } - f.Title = StringValue(title) - f.Status = StringValue(status) - f.LastError = StringValue(lastError) - f.PublishStatus = StringValue(publishStatus) - f.Language = StringValue(language) - if itemCount != nil { - f.ItemCount = *itemCount - } - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(feeds) -} - -// handleAPISetDomainStatus sets the status for a domain -// status must be 'hold', 'pass', or 'skip' (use /api/dropDomain for 'drop') -func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - status := r.URL.Query().Get("status") - - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - if status != "hold" && status != "pass" && status != "skip" { - http.Error(w, "status must be 'hold', 'pass', or 'skip' (use /api/dropDomain for permanent deletion)", http.StatusBadRequest) - return - } - - host = normalizeHost(host) - - // Setting to 'skip' triggers takedown (hide content but preserve data) - if status == "skip" { - result := c.skipDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) - return - } - - // When setting to pass, clear any last_error - var err error - strippedHost := stripTLD(host) - tld := getTLD(host) - if status == "pass" { - _, err = c.db.Exec(` - UPDATE domains SET status = $1, last_error = NULL - WHERE host = $2 AND tld = $3 - `, status, strippedHost, tld) - } else { - _, err = c.db.Exec(` - UPDATE domains SET status = $1 - WHERE host = $2 AND tld = $3 - `, status, strippedHost, tld) - } - - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ - "host": host, - "status": status, - }) -} - -func (c *Crawler) handleAPIRevisitDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - _, err := c.db.Exec(` - UPDATE domains SET status = 'pass', crawled_at = '0001-01-01 00:00:00', last_error = NULL - WHERE host = $1 AND tld = $2 - `, stripTLD(host), getTLD(host)) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{"status": "queued", "host": host}) -} - -// handleAPIPriorityCrawl immediately crawls a domain (adds it if not exists) -func (c *Crawler) handleAPIPriorityCrawl(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - host = normalizeHost(host) - - // Add domain if it doesn't exist, or reset to pass for crawling - _, err := c.db.Exec(` - INSERT INTO domains (host, status, tld) - VALUES ($1, 'pass', $2) - ON CONFLICT(host, tld) DO UPDATE SET status = 'pass', crawled_at = '0001-01-01 00:00:00', last_error = NULL - `, stripTLD(host), getTLD(host)) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Crawl synchronously - fmt.Printf("Priority crawl: %s\n", host) - feedsFound, crawlErr := c.feedCrawl(host) - - errStr := "" - if crawlErr != nil { - errStr = crawlErr.Error() - } - - // Mark as crawled - c.markDomainCrawled(stripTLD(host), getTLD(host), feedsFound, errStr) - - // Get the feeds we found - feeds, _ := c.GetFeedsByHost(host) - - type FeedSummary struct { - URL string `json:"url"` - Title string `json:"title"` - Type string `json:"type"` - Category string `json:"category"` - Status string `json:"status"` - } - var feedSummaries []FeedSummary - for _, f := range feeds { - feedSummaries = append(feedSummaries, FeedSummary{ - URL: f.URL, - Title: f.Title, - Type: f.Type, - Category: f.Category, - Status: f.Status, - }) - } - - result := map[string]interface{}{ - "host": host, - "feeds_found": feedsFound, - "feeds": feedSummaries, - } - if crawlErr != nil { - result["error"] = crawlErr.Error() - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPIFilter handles flexible filtering with stackable parameters -func (c *Crawler) handleAPIFilter(w http.ResponseWriter, r *http.Request) { - tld := r.URL.Query().Get("tld") - domain := r.URL.Query().Get("domain") - feedStatus := r.URL.Query().Get("feedStatus") - domainStatus := r.URL.Query().Get("domainStatus") - languages := r.URL.Query().Get("languages") // comma-separated list - show := r.URL.Query().Get("show") // "feeds" or "domains" - sort := r.URL.Query().Get("sort") // "alpha" or "feeds" - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - // Parse languages into slice - var langList []string - if languages != "" { - for _, lang := range strings.Split(languages, ",") { - lang = strings.TrimSpace(lang) - if lang != "" { - langList = append(langList, lang) - } - } - } - - // Determine what to show based on filters - if show == "" { - if feedStatus != "" || domain != "" || len(langList) > 0 { - show = "feeds" - } else { - show = "domains" - } - } - - if show == "feeds" { - c.filterFeeds(w, tld, domain, feedStatus, langList, limit, offset) - } else { - c.filterDomains(w, tld, domainStatus, sort, limit, offset) - } -} - -func (c *Crawler) filterDomains(w http.ResponseWriter, tld, status, sort string, limit, offset int) { - var args []interface{} - argNum := 1 - query := ` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE 1=1` - - if tld != "" { - query += fmt.Sprintf(" AND tld = $%d", argNum) - args = append(args, tld) - argNum++ - } - if status != "" { - query += fmt.Sprintf(" AND status = $%d", argNum) - args = append(args, status) - argNum++ - } - - // Sort by feed count descending or alphabetically - if sort == "feeds" { - query += fmt.Sprintf(" ORDER BY feeds_found DESC, host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - } else { - query += fmt.Sprintf(" ORDER BY tld ASC, host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - } - args = append(args, limit, offset) - - rows, err := c.db.Query(query, args...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type DomainInfo struct { - Host string `json:"host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - } - - var domains []DomainInfo - for rows.Next() { - var d DomainInfo - var tldVal, lastError *string - if err := rows.Scan(&d.Host, &tldVal, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.TLD = StringValue(tldVal) - d.LastError = StringValue(lastError) - domains = append(domains, d) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "type": "domains", - "data": domains, - }) -} - -func (c *Crawler) handleAPITLDDomains(w http.ResponseWriter, r *http.Request) { - tld := r.URL.Query().Get("tld") - if tld == "" { - http.Error(w, "tld parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - rows, err := c.db.Query(` - SELECT host, status, last_error, feeds_found - FROM domains - WHERE tld = $1 - ORDER BY host ASC - LIMIT $2 OFFSET $3 - `, tld, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type DomainInfo struct { - Host string `json:"host"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - } - - var domains []DomainInfo - for rows.Next() { - var d DomainInfo - var lastError *string - if err := rows.Scan(&d.Host, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.LastError = StringValue(lastError) - domains = append(domains, d) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") // domain status: pass, skip, hold, dead - feedMode := r.URL.Query().Get("feedMode") // include or exclude - feedStatuses := r.URL.Query().Get("feedStatuses") // comma-separated: pass,skip,hold,dead - feedTypes := r.URL.Query().Get("feedTypes") // comma-separated: rss,atom,json,unknown,empty - search := r.URL.Query().Get("search") // search query - - // Parse comma-separated values - var statusList, typeList []string - if feedStatuses != "" { - statusList = strings.Split(feedStatuses, ",") - } - if feedTypes != "" { - typeList = strings.Split(feedTypes, ",") - } - - var rows pgx.Rows - var err error - - // If feed filter is specified, query from feeds table instead - if len(statusList) > 0 || len(typeList) > 0 || feedMode == "exclude" { - // Build query to get TLDs from feeds - query := `SELECT domain_tld as tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM feeds WHERE domain_tld IS NOT NULL` - args := []interface{}{} - argNum := 1 - - // Handle status filters (publish_status for pass/skip/hold/dead) - if len(statusList) > 0 { - if feedMode == "exclude" { - query += fmt.Sprintf(" AND (publish_status IS NULL OR publish_status NOT IN (SELECT unnest($%d::text[])))", argNum) - } else { - query += fmt.Sprintf(" AND publish_status IN (SELECT unnest($%d::text[]))", argNum) - } - args = append(args, statusList) - argNum++ - } - - // Handle type filters (including special "empty" type) - if len(typeList) > 0 { - hasEmpty := false - var regularTypes []string - for _, t := range typeList { - if t == "empty" { - hasEmpty = true - } else { - regularTypes = append(regularTypes, t) - } - } - - if feedMode == "exclude" { - // Exclude mode: exclude these types - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND type NOT IN (SELECT unnest($%d::text[])) AND item_count > 0", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND (type IS NULL OR type NOT IN (SELECT unnest($%d::text[])))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND item_count > 0" - } - } else { - // Include mode: include these types - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND (type IN (SELECT unnest($%d::text[])) OR item_count IS NULL OR item_count = 0)", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND type IN (SELECT unnest($%d::text[]))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND (item_count IS NULL OR item_count = 0)" - } - } - } - - if search != "" { - sq := parseSearchPrefix(search) - searchPattern := "%" + strings.ToLower(sq.Pattern) + "%" - - // Only extract TLD for domain searches (d:npr.org -> exact match for npr.org) - var tldFilter string - var exactMatch bool - hostSearchPattern := searchPattern - if sq.Type == "domain" { - hostPattern, detectedTLD := parseSearchTerm(sq.Pattern) - if detectedTLD != "" { - tldFilter = detectedTLD - exactMatch = true - hostSearchPattern = "%" + strings.ToLower(hostPattern) + "%" - } - } - - switch sq.Type { - case "domain": - // Search domain names - if exactMatch && tldFilter != "" { - // d:npr.org -> exact match - query += fmt.Sprintf(" AND LOWER(domain_host || '.' || domain_tld) = $%d", argNum) - args = append(args, strings.ToLower(sq.Pattern)) - } else if tldFilter != "" { - query += fmt.Sprintf(" AND domain_tld = $%d AND LOWER(domain_host || '.' || domain_tld) LIKE $%d", argNum, argNum+1) - args = append(args, tldFilter, hostSearchPattern) - } else { - query += fmt.Sprintf(" AND LOWER(domain_host || '.' || domain_tld) LIKE $%d", argNum) - args = append(args, hostSearchPattern) - } - case "url": - query += fmt.Sprintf(" AND LOWER(url) LIKE $%d", argNum) - args = append(args, searchPattern) - case "title": - query += fmt.Sprintf(" AND LOWER(title) LIKE $%d", argNum) - args = append(args, searchPattern) - case "description": - query += fmt.Sprintf(" AND LOWER(description) LIKE $%d", argNum) - args = append(args, searchPattern) - case "item": - query += fmt.Sprintf(" AND EXISTS (SELECT 1 FROM items i WHERE i.feed_url = feeds.url AND LOWER(i.title) LIKE $%d)", argNum) - args = append(args, searchPattern) - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if sq.DomainHost != "" && sq.DomainTLD != "" { - fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD) - query += fmt.Sprintf(` AND ( - LOWER(domain_host || '.' || domain_tld) LIKE $%d OR - LOWER(url) LIKE $%d OR - LOWER(title) LIKE $%d OR - LOWER(description) LIKE $%d OR - LOWER(domain_host || '.' || domain_tld) = $%d - )`, argNum, argNum, argNum, argNum, argNum+1) - args = append(args, searchPattern, fullDomain) - } else { - query += fmt.Sprintf(` AND ( - LOWER(domain_host || '.' || domain_tld) LIKE $%d OR - LOWER(url) LIKE $%d OR - LOWER(title) LIKE $%d OR - LOWER(description) LIKE $%d - )`, argNum, argNum, argNum, argNum) - args = append(args, searchPattern) - } - } - } - query += " GROUP BY domain_tld ORDER BY domain_tld ASC" - rows, err = c.db.Query(query, args...) - } else if search != "" { - // Parse search prefix for type-specific searching - sq := parseSearchPrefix(search) - - // Use the helper to build the TLD search query - query, args := buildTLDSearchQuery(sq) - rows, err = c.db.Query(query, args...) - } else if status != "" { - // TLDs filtered by domain status - rows, err = c.db.Query(` - SELECT tld::text as tld, COUNT(*) as domain_count - FROM domains - WHERE tld IS NOT NULL AND status = $1 - GROUP BY tld - HAVING COUNT(*) > 0 - ORDER BY tld ASC - `, status) - } else { - // All TLDs from enum with domain counts - rows, err = c.db.Query(` - SELECT e.enumlabel as tld, COALESCE(d.cnt, 0) as domain_count - FROM pg_enum e - LEFT JOIN ( - SELECT tld::text as tld, COUNT(*) as cnt - FROM domains - GROUP BY tld - ) d ON e.enumlabel = d.tld - WHERE e.enumtypid = 'tld_enum'::regtype - ORDER BY e.enumlabel ASC - `) - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type TLDInfo struct { - TLD string `json:"tld"` - DomainCount int `json:"domain_count"` - } - - var tlds []TLDInfo - for rows.Next() { - var t TLDInfo - if err := rows.Scan(&t.TLD, &t.DomainCount); err != nil { - continue - } - tlds = append(tlds, t) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(tlds) -} - -func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) { - tld := r.URL.Query().Get("tld") - if tld == "" { - http.Error(w, "tld parameter required", http.StatusBadRequest) - return - } - search := r.URL.Query().Get("search") - - stats := map[string]interface{}{ - "tld": tld, - } - - // Build WHERE clause based on whether search is provided - var domainWhere, feedWhere string - var domainArgs, feedArgs []interface{} - - if search != "" { - // Parse search prefix for type-specific searching - sq := parseSearchPrefix(search) - searchPattern := "%" + strings.ToLower(sq.Pattern) + "%" - - // For domain searches, check for exact match - if sq.Type == "domain" { - hostPart, detectedTLD := parseSearchTerm(sq.Pattern) - if detectedTLD != "" { - // d:npr.org -> exact match for host "npr" in specified TLD - domainWhere = "tld = $1 AND lower(host) = $2" - domainArgs = []interface{}{tld, strings.ToLower(hostPart)} - feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) = $2" - feedArgs = []interface{}{tld, strings.ToLower(sq.Pattern)} - } else { - // d:npr -> pattern match in specified TLD - domainWhere = "tld = $1 AND lower(host) LIKE $2" - domainArgs = []interface{}{tld, searchPattern} - feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) LIKE $2" - feedArgs = []interface{}{tld, searchPattern} - } - } else { - // Other search types - pattern match - domainWhere = "tld = $1 AND lower(host) LIKE $2" - domainArgs = []interface{}{tld, searchPattern} - feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) LIKE $2" - feedArgs = []interface{}{tld, searchPattern} - } - stats["search"] = search - } else { - // Filter by TLD only - domainWhere = "tld = $1" - domainArgs = []interface{}{tld} - feedWhere = "domain_tld = $1" - feedArgs = []interface{}{tld} - } - - // Domain stats by status - var totalDomains, passDomains, skipDomains, holdDomains, deadDomains int - err := c.db.QueryRow(`SELECT COUNT(*) FROM domains WHERE `+domainWhere, domainArgs...).Scan(&totalDomains) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - stats["total_domains"] = totalDomains - - rows, err := c.db.Query(`SELECT status, COUNT(*) FROM domains WHERE `+domainWhere+` GROUP BY status`, domainArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for rows.Next() { - var status string - var count int - if err := rows.Scan(&status, &count); err != nil { - continue - } - switch status { - case "pass": - passDomains = count - case "skip": - skipDomains = count - case "hold": - holdDomains = count - case "dead": - deadDomains = count - } - } - rows.Close() - stats["pass_domains"] = passDomains - stats["skip_domains"] = skipDomains - stats["hold_domains"] = holdDomains - stats["dead_domains"] = deadDomains - - // Feed stats - var totalFeeds, passFeeds, skipFeeds, holdFeeds, deadFeeds, emptyFeeds int - var rssFeeds, atomFeeds, jsonFeeds, unknownFeeds int - - err = c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE `+feedWhere, feedArgs...).Scan(&totalFeeds) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - stats["total_feeds"] = totalFeeds - - // Feed status counts - statusRows, err := c.db.Query(`SELECT COALESCE(status, 'hold'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY status`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for statusRows.Next() { - var status string - var count int - if err := statusRows.Scan(&status, &count); err != nil { - continue - } - switch status { - case "pass": - passFeeds = count - case "skip": - skipFeeds = count - case "hold": - holdFeeds = count - case "dead": - deadFeeds = count - } - } - statusRows.Close() - stats["pass_feeds"] = passFeeds - stats["skip_feeds"] = skipFeeds - stats["hold_feeds"] = holdFeeds - stats["dead_feeds"] = deadFeeds - - // Empty feeds count - c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE (`+feedWhere+`) AND (item_count IS NULL OR item_count = 0)`, feedArgs...).Scan(&emptyFeeds) - stats["empty_feeds"] = emptyFeeds - - // Feed type counts - typeRows, err := c.db.Query(`SELECT COALESCE(type, 'unknown'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY type`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for typeRows.Next() { - var feedType string - var count int - if err := typeRows.Scan(&feedType, &count); err != nil { - continue - } - switch feedType { - case "rss": - rssFeeds = count - case "atom": - atomFeeds = count - case "json": - jsonFeeds = count - default: - unknownFeeds += count - } - } - typeRows.Close() - stats["rss_feeds"] = rssFeeds - stats["atom_feeds"] = atomFeeds - stats["json_feeds"] = jsonFeeds - stats["unknown_feeds"] = unknownFeeds - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(stats) -} - -func (c *Crawler) handleAPISearchStats(w http.ResponseWriter, r *http.Request) { - search := r.URL.Query().Get("search") - if search == "" { - http.Error(w, "search parameter required", http.StatusBadRequest) - return - } - - // Parse search prefix for type-specific searching - sq := parseSearchPrefix(search) - searchPattern := "%" + strings.ToLower(sq.Pattern) + "%" - - // Only extract TLD for domain searches (d:npr.org -> exact match for npr.org) - var tldFilter, hostPart string - var exactMatch bool - if sq.Type == "domain" { - hostPart, tldFilter = parseSearchTerm(sq.Pattern) - if tldFilter != "" { - searchPattern = "%" + strings.ToLower(hostPart) + "%" - exactMatch = true - } - } - - stats := map[string]interface{}{} - - // Build WHERE clause based on search type - var domainWhere, feedWhere string - var domainArgs, feedArgs []interface{} - - switch sq.Type { - case "domain": - if exactMatch && tldFilter != "" { - // d:npr.org -> exact match - domainWhere = "tld = $1 AND LOWER(host) = $2" - domainArgs = []interface{}{tldFilter, strings.ToLower(hostPart)} - feedWhere = "LOWER(domain_host || '.' || domain_tld) = $1" - feedArgs = []interface{}{strings.ToLower(sq.Pattern)} - } else if tldFilter != "" { - domainWhere = "tld = $1 AND LOWER(host) LIKE $2" - domainArgs = []interface{}{tldFilter, searchPattern} - feedWhere = "domain_tld = $1 AND LOWER(domain_host || '.' || domain_tld) LIKE $2" - feedArgs = []interface{}{tldFilter, searchPattern} - } else { - domainWhere = "LOWER(host) LIKE $1" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(domain_host || '.' || domain_tld) LIKE $1" - feedArgs = []interface{}{searchPattern} - } - case "url": - domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.url) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(url) LIKE $1" - feedArgs = []interface{}{searchPattern} - case "title": - domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.title) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(title) LIKE $1" - feedArgs = []interface{}{searchPattern} - case "description": - domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.description) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(description) LIKE $1" - feedArgs = []interface{}{searchPattern} - case "item": - domainWhere = "EXISTS (SELECT 1 FROM feeds f INNER JOIN items i ON i.feed_url = f.url WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(i.title) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "EXISTS (SELECT 1 FROM items i WHERE i.feed_url = url AND LOWER(i.title) LIKE $1)" - feedArgs = []interface{}{searchPattern} - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if sq.DomainHost != "" && sq.DomainTLD != "" { - domainWhere = `( - LOWER(host) LIKE $1 OR - (LOWER(host) = $2 AND tld::text = $3) OR - EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND ( - LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1 - )) - )` - domainArgs = []interface{}{searchPattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD) - feedWhere = `( - LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1 OR LOWER(domain_host || '.' || domain_tld) = $2 - )` - feedArgs = []interface{}{searchPattern, fullDomain} - } else { - domainWhere = `( - LOWER(host) LIKE $1 OR - EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND ( - LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1 - )) - )` - domainArgs = []interface{}{searchPattern} - feedWhere = `( - LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1 - )` - feedArgs = []interface{}{searchPattern} - } - } - - // Count matching domains by status - var totalDomains, passDomains, skipDomains, holdDomains, deadDomains int - rows, err := c.db.Query(`SELECT status, COUNT(*) FROM domains WHERE `+domainWhere+` GROUP BY status`, domainArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for rows.Next() { - var status string - var count int - if err := rows.Scan(&status, &count); err != nil { - continue - } - totalDomains += count - switch status { - case "pass": - passDomains = count - case "skip": - skipDomains = count - case "hold": - holdDomains = count - case "dead": - deadDomains = count - } - } - rows.Close() - stats["total_domains"] = totalDomains - stats["pass_domains"] = passDomains - stats["skip_domains"] = skipDomains - stats["hold_domains"] = holdDomains - stats["dead_domains"] = deadDomains - - // Count matching feeds by status - var totalFeeds, passFeeds, skipFeeds, holdFeeds, deadFeeds, emptyFeeds int - var rssFeeds, atomFeeds, jsonFeeds, unknownFeeds int - - statusRows, err := c.db.Query(`SELECT COALESCE(status, 'hold'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY status`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for statusRows.Next() { - var status string - var count int - if err := statusRows.Scan(&status, &count); err != nil { - continue - } - totalFeeds += count - switch status { - case "pass": - passFeeds = count - case "skip": - skipFeeds = count - case "hold": - holdFeeds = count - case "dead": - deadFeeds = count - } - } - statusRows.Close() - stats["total_feeds"] = totalFeeds - stats["pass_feeds"] = passFeeds - stats["skip_feeds"] = skipFeeds - stats["hold_feeds"] = holdFeeds - stats["dead_feeds"] = deadFeeds - - // Count empty feeds - c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE (`+feedWhere+`) AND (item_count IS NULL OR item_count = 0)`, feedArgs...).Scan(&emptyFeeds) - stats["empty_feeds"] = emptyFeeds - - typeRows, err := c.db.Query(`SELECT COALESCE(type, 'unknown'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY type`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for typeRows.Next() { - var feedType string - var count int - if err := typeRows.Scan(&feedType, &count); err != nil { - continue - } - switch feedType { - case "rss": - rssFeeds = count - case "atom": - atomFeeds = count - case "json": - jsonFeeds = count - default: - unknownFeeds += count - } - } - typeRows.Close() - stats["rss_feeds"] = rssFeeds - stats["atom_feeds"] = atomFeeds - stats["json_feeds"] = jsonFeeds - stats["unknown_feeds"] = unknownFeeds - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(stats) -} - -// handleAPIDenyDomain skips a domain (takedown accounts, preserve data) -func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - result := c.skipDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// DomainActionResult contains the results of a domain action -type DomainActionResult struct { - Success bool `json:"success"` - Host string `json:"host"` - Action string `json:"action"` - FeedsAffected int64 `json:"feeds_affected,omitempty"` - ItemsDeleted int64 `json:"items_deleted,omitempty"` - AccountsAffected int `json:"accounts_affected,omitempty"` - AccountErrors []string `json:"account_errors,omitempty"` - Error string `json:"error,omitempty"` -} - -// getPDSCredentials loads PDS credentials from environment or pds.env file -func getPDSCredentials() (pdsHost, pdsAdminPassword string) { - pdsHost = os.Getenv("PDS_HOST") - pdsAdminPassword = os.Getenv("PDS_ADMIN_PASSWORD") - if pdsHost == "" || pdsAdminPassword == "" { - if file, err := os.Open("pds.env"); err == nil { - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - if strings.HasPrefix(line, "PDS_HOST=") { - pdsHost = strings.TrimPrefix(line, "PDS_HOST=") - } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") { - pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=") - } - } - file.Close() - } - } - return -} - -// getDomainDIDs returns all unique publish_account DIDs for a domain's feeds -func (c *Crawler) getDomainDIDs(host string) []string { - domainHost := stripTLD(host) - domainTLD := getTLD(host) - var dids []string - rows, err := c.db.Query(` - SELECT DISTINCT publish_account FROM feeds - WHERE domain_host = $1 AND domain_tld = $2 AND publish_account IS NOT NULL AND publish_account != '' - `, domainHost, domainTLD) - if err == nil { - defer rows.Close() - for rows.Next() { - var did string - if err := rows.Scan(&did); err == nil && did != "" { - dids = append(dids, did) - } - } - } - return dids -} - -// skipDomain sets a domain to skip, takes down PDS accounts but preserves all data -func (c *Crawler) skipDomain(host string) DomainActionResult { - result := DomainActionResult{Host: host, Action: "skip"} - - pdsHost, pdsAdminPassword := getPDSCredentials() - dids := c.getDomainDIDs(host) - - // Takedown PDS accounts (hide content but preserve data) - if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { - publisher := NewPublisher(pdsHost) - for _, did := range dids { - if err := publisher.TakedownAccount(pdsAdminPassword, did, "domain-skip"); err != nil { - result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) - } else { - result.AccountsAffected++ - } - } - } - - // Mark feeds as skipped (but don't delete) - domainHost := stripTLD(host) - domainTLD := getTLD(host) - feedsAffected, err := c.db.Exec(` - UPDATE feeds SET status = 'skip', publish_status = 'skip' - WHERE domain_host = $1 AND domain_tld = $2 - `, domainHost, domainTLD) - if err != nil { - result.Error = fmt.Sprintf("failed to update feeds: %v", err) - return result - } - result.FeedsAffected = feedsAffected - - // Update domain status to skip - _, err = c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)) - if err != nil { - result.Error = fmt.Sprintf("failed to update domain status: %v", err) - return result - } - - result.Success = true - return result -} - -// handleAPIDropDomain permanently deletes all data for a skipped domain -func (c *Crawler) handleAPIDropDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - // Verify domain is currently skipped - var status string - err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)).Scan(&status) - if err != nil { - http.Error(w, "domain not found", http.StatusNotFound) - return - } - if status != "skip" { - http.Error(w, "domain must be skipped before dropping", http.StatusBadRequest) - return - } - - result := c.dropDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// dropDomain permanently deletes all data for a domain (feeds, items, PDS accounts) -func (c *Crawler) dropDomain(host string) DomainActionResult { - result := DomainActionResult{Host: host, Action: "drop"} - - pdsHost, pdsAdminPassword := getPDSCredentials() - dids := c.getDomainDIDs(host) - - // Delete PDS accounts - if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { - publisher := NewPublisher(pdsHost) - for _, did := range dids { - if err := publisher.DeleteAccount(pdsAdminPassword, did); err != nil { - result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) - } else { - result.AccountsAffected++ - } - } - } - - // Get feed URLs for this domain (needed to delete items) - domainHost := stripTLD(host) - domainTLD := getTLD(host) - var feedURLs []string - feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE domain_host = $1 AND domain_tld = $2`, domainHost, domainTLD) - if err == nil { - defer feedRows.Close() - for feedRows.Next() { - var url string - if err := feedRows.Scan(&url); err == nil { - feedURLs = append(feedURLs, url) - } - } - } - - // Delete items for all feeds from this domain - for _, feedURL := range feedURLs { - deleted, err := c.db.Exec(`DELETE FROM items WHERE feed_url = $1`, feedURL) - if err == nil { - result.ItemsDeleted += deleted - } - } - - // Delete all feeds from this domain - feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE domain_host = $1 AND domain_tld = $2`, domainHost, domainTLD) - if err != nil { - result.Error = fmt.Sprintf("failed to delete feeds: %v", err) - return result - } - result.FeedsAffected = feedsDeleted - - // Update domain status to drop - _, err = c.db.Exec(`UPDATE domains SET status = 'drop' WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)) - if err != nil { - result.Error = fmt.Sprintf("failed to update domain status: %v", err) - return result - } - - result.Success = true - return result -} - -// handleAPIUndenyDomain removes skip status from a domain (restores accounts) -func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - // Verify domain is currently skipped - var status string - err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)).Scan(&status) - if err != nil { - http.Error(w, "domain not found", http.StatusNotFound) - return - } - if status != "skip" { - http.Error(w, "domain is not skipped", http.StatusBadRequest) - return - } - - result := c.restoreDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// restoreDomain removes skip status and restores PDS accounts -func (c *Crawler) restoreDomain(host string) DomainActionResult { - result := DomainActionResult{Host: host, Action: "restore"} - - pdsHost, pdsAdminPassword := getPDSCredentials() - dids := c.getDomainDIDs(host) - - // Restore PDS accounts (remove takedown) - if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { - publisher := NewPublisher(pdsHost) - for _, did := range dids { - if err := publisher.RestoreAccount(pdsAdminPassword, did); err != nil { - result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) - } else { - result.AccountsAffected++ - } - } - } - - // Restore feeds to pass status - domainHost := stripTLD(host) - domainTLD := getTLD(host) - feedsAffected, err := c.db.Exec(` - UPDATE feeds SET status = 'pass', publish_status = 'pass' - WHERE domain_host = $1 AND domain_tld = $2 - `, domainHost, domainTLD) - if err != nil { - result.Error = fmt.Sprintf("failed to update feeds: %v", err) - return result - } - result.FeedsAffected = feedsAffected - - // Update domain status back to pass - _, err = c.db.Exec(` - UPDATE domains SET status = 'pass', last_error = NULL - WHERE host = $1 AND tld = $2 - `, stripTLD(host), getTLD(host)) - if err != nil { - result.Error = fmt.Sprintf("failed to update domain status: %v", err) - return result - } - - result.Success = true - return result -} diff --git a/api_feeds.go b/api_feeds.go deleted file mode 100644 index 73b4bd4..0000000 --- a/api_feeds.go +++ /dev/null @@ -1,481 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net/http" - "strings" - "time" - - "github.com/jackc/pgx/v5" -) - -func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - type FeedDetails struct { - URL string `json:"url"` - Type string `json:"type,omitempty"` - Category string `json:"category,omitempty"` - Title string `json:"title,omitempty"` - Description string `json:"description,omitempty"` - Language string `json:"language,omitempty"` - SiteURL string `json:"siteUrl,omitempty"` - DiscoveredAt string `json:"discoveredAt,omitempty"` - LastCheckedAt string `json:"lastCheckedAt,omitempty"` - NextCheckAt string `json:"nextCheckAt,omitempty"` - LastBuildDate string `json:"lastBuildDate,omitempty"` - Status string `json:"status,omitempty"` - LastError string `json:"lastError,omitempty"` - ItemCount int `json:"itemCount,omitempty"` - OldestItemDate string `json:"oldestItemDate,omitempty"` - NewestItemDate string `json:"newestItemDate,omitempty"` - PublishStatus string `json:"publishStatus,omitempty"` - PublishAccount string `json:"publishAccount,omitempty"` - } - - var f FeedDetails - var category, title, description, language, siteUrl *string - var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time - var status, lastError *string - var oldestItemDate, newestItemDate *time.Time - var itemCount *int - var discoveredAt time.Time - var publishStatus, publishAccount *string - - err := c.db.QueryRow(` - SELECT url, type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, - (SELECT COUNT(*) FROM items WHERE feed_url = feeds.url) as item_count, - oldest_item_date, newest_item_date, - publish_status, publish_account - FROM feeds WHERE url = $1 - `, feedURL).Scan( - &f.URL, &f.Type, &category, &title, &description, &language, &siteUrl, - &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, - &status, &lastError, - &itemCount, &oldestItemDate, &newestItemDate, - &publishStatus, &publishAccount, - ) - - if err == pgx.ErrNoRows { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - f.Category = StringValue(category) - f.Title = StringValue(title) - f.Description = StringValue(description) - f.Language = StringValue(language) - f.SiteURL = StringValue(siteUrl) - f.DiscoveredAt = discoveredAt.Format(time.RFC3339) - if lastCheckedAt != nil { - f.LastCheckedAt = lastCheckedAt.Format(time.RFC3339) - } - if nextCheckAt != nil { - f.NextCheckAt = nextCheckAt.Format(time.RFC3339) - } - if lastBuildDate != nil { - f.LastBuildDate = lastBuildDate.Format(time.RFC3339) - } - f.Status = StringValue(status) - f.LastError = StringValue(lastError) - if itemCount != nil { - f.ItemCount = *itemCount - } - if oldestItemDate != nil { - f.OldestItemDate = oldestItemDate.Format(time.RFC3339) - } - if newestItemDate != nil { - f.NewestItemDate = newestItemDate.Format(time.RFC3339) - } - f.PublishStatus = StringValue(publishStatus) - f.PublishAccount = StringValue(publishAccount) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(f) -} - -func (c *Crawler) handleAPIFeedItems(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - limit := 50 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 100 { - limit = 100 - } - } - - items, err := c.GetItemsByFeed(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if items == nil { - items = []*Item{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(items) -} - -func (c *Crawler) handleAPIFeedsByStatus(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") - if status == "" { - http.Error(w, "status parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - rows, err := c.db.Query(` - SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count - FROM feeds - WHERE status = $1 - ORDER BY url ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, sourceHost, tld, lastError *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &sourceHost, &tld, &f.Status, &lastError, &itemCount); err != nil { - continue - } - f.Title = StringValue(title) - f.SourceHost = StringValue(sourceHost) - f.TLD = StringValue(tld) - f.LastError = StringValue(lastError) - if itemCount != nil { - f.ItemCount = *itemCount - } - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(feeds) -} - -// handleAPIFeeds lists feeds with optional publish_status filter -func (c *Crawler) handleAPIFeeds(w http.ResponseWriter, r *http.Request) { - publishStatus := r.URL.Query().Get("publish_status") - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - var rows pgx.Rows - var err error - if publishStatus != "" { - rows, err = c.db.Query(` - SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, publish_status, language - FROM feeds - WHERE publish_status = $1 - ORDER BY url ASC - LIMIT $2 OFFSET $3 - `, publishStatus, limit, offset) - } else { - rows, err = c.db.Query(` - SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, publish_status, language - FROM feeds - ORDER BY url ASC - LIMIT $1 OFFSET $2 - `, limit, offset) - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - PublishStatus string `json:"publish_status,omitempty"` - Language string `json:"language,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, sourceHost, tld, lastError, publishStatus, language *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &sourceHost, &tld, &f.Status, &lastError, &itemCount, &publishStatus, &language); err != nil { - continue - } - f.Title = StringValue(title) - f.SourceHost = StringValue(sourceHost) - f.TLD = StringValue(tld) - f.LastError = StringValue(lastError) - f.PublishStatus = StringValue(publishStatus) - f.Language = StringValue(language) - if itemCount != nil { - f.ItemCount = *itemCount - } - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(feeds) -} - -func (c *Crawler) filterFeeds(w http.ResponseWriter, tld, domain, status string, languages []string, limit, offset int) { - var args []interface{} - argNum := 1 - query := ` - SELECT url, title, type, category, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, language - FROM feeds - WHERE 1=1` - - if tld != "" { - query += fmt.Sprintf(" AND domain_tld = $%d", argNum) - args = append(args, tld) - argNum++ - } - if domain != "" { - // Parse domain into host and tld parts - domainHost := stripTLD(domain) - domainTLD := getTLD(domain) - query += fmt.Sprintf(" AND domain_host = $%d AND domain_tld = $%d", argNum, argNum+1) - args = append(args, domainHost, domainTLD) - argNum += 2 - } - if status != "" { - query += fmt.Sprintf(" AND status = $%d", argNum) - args = append(args, status) - argNum++ - } - if len(languages) > 0 { - // Build IN clause for languages, handling 'unknown' as empty string - placeholders := make([]string, len(languages)) - for i, lang := range languages { - placeholders[i] = fmt.Sprintf("$%d", argNum) - if lang == "unknown" { - args = append(args, "") - } else { - args = append(args, lang) - } - argNum++ - } - query += fmt.Sprintf(" AND COALESCE(language, '') IN (%s)", strings.Join(placeholders, ",")) - } - - query += fmt.Sprintf(" ORDER BY url ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - args = append(args, limit, offset) - - rows, err := c.db.Query(query, args...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type"` - Category string `json:"category"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - Language string `json:"language,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, category, sourceHost, tldVal, lastError, language *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &category, &sourceHost, &tldVal, &f.Status, &lastError, &itemCount, &language); err != nil { - continue - } - f.Title = StringValue(title) - if category != nil && *category != "" { - f.Category = *category - } else { - f.Category = "main" - } - f.SourceHost = StringValue(sourceHost) - f.TLD = StringValue(tldVal) - f.LastError = StringValue(lastError) - if itemCount != nil { - f.ItemCount = *itemCount - } - f.Language = StringValue(language) - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "type": "feeds", - "data": feeds, - }) -} - -// handleAPICheckFeed immediately checks a feed and returns items -func (c *Crawler) handleAPICheckFeed(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - force := r.URL.Query().Get("force") == "true" - - feedURL = normalizeURL(feedURL) - - // Get the feed - feed, err := c.getFeed(feedURL) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if feed == nil { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - - // Clear cache headers if force is requested - if force { - feed.ETag = "" - feed.LastModified = "" - } - - // Force check the feed - fmt.Printf("Force check feed: %s (force=%v)\n", feedURL, force) - changed, checkErr := c.CheckFeed(feed) - - // Get updated feed info - feed, _ = c.getFeed(feedURL) - - // Get items - items, _ := c.GetItemsByFeed(feedURL, 20) - - type ItemSummary struct { - Title string `json:"title"` - Link string `json:"link"` - PubDate string `json:"pub_date,omitempty"` - Author string `json:"author,omitempty"` - } - var itemSummaries []ItemSummary - for _, item := range items { - is := ItemSummary{ - Title: item.Title, - Link: item.Link, - Author: item.Author, - } - if !item.PubDate.IsZero() { - is.PubDate = item.PubDate.Format("2006-01-02 15:04") - } - itemSummaries = append(itemSummaries, is) - } - - result := map[string]interface{}{ - "url": feedURL, - "title": feed.Title, - "type": feed.Type, - "category": feed.Category, - "status": feed.Status, - "changed": changed, - "itemCount": feed.ItemCount, - "items": itemSummaries, - } - if checkErr != nil { - result["error"] = checkErr.Error() - } - if feed.LastError != "" { - result["lastError"] = feed.LastError - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPILanguages returns distinct languages with counts -func (c *Crawler) handleAPILanguages(w http.ResponseWriter, r *http.Request) { - rows, err := c.db.Query(` - SELECT COALESCE(NULLIF(language, ''), 'unknown') as lang, COUNT(*) as cnt - FROM feeds - GROUP BY lang - ORDER BY cnt DESC - `) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type LangInfo struct { - Language string `json:"language"` - Count int `json:"count"` - } - - var languages []LangInfo - for rows.Next() { - var l LangInfo - if err := rows.Scan(&l.Language, &l.Count); err != nil { - continue - } - languages = append(languages, l) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(languages) -} diff --git a/api_publish.go b/api_publish.go deleted file mode 100644 index 54f9296..0000000 --- a/api_publish.go +++ /dev/null @@ -1,1031 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net/http" - "os" - "strings" - "time" -) - -// handleAPIEnablePublish sets a feed's publish status to 'pass' -// If account is not provided, it will be auto-derived from the feed URL -func (c *Crawler) handleAPIEnablePublish(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - account := r.URL.Query().Get("account") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - feedURL = normalizeURL(feedURL) - - // Auto-derive account handle if not provided - if account == "" { - account = DeriveHandleFromFeed(feedURL) - if account == "" { - http.Error(w, "could not derive account handle from URL", http.StatusBadRequest) - return - } - } - - // Check feed exists - feed, err := c.getFeed(feedURL) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if feed == nil { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - - if err := c.SetPublishStatus(feedURL, "pass", account); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Get unpublished count - count, _ := c.GetUnpublishedItemCount(feedURL) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "pass", - "url": feedURL, - "account": account, - "unpublished_items": count, - }) -} - -// handleAPIDeriveHandle shows what handle would be derived from a feed URL -func (c *Crawler) handleAPIDeriveHandle(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - handle := DeriveHandleFromFeed(feedURL) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "url": feedURL, - "handle": handle, - }) -} - -// handleAPIDisablePublish sets a feed's publish status to 'skip' -func (c *Crawler) handleAPIDisablePublish(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - feedURL = normalizeURL(feedURL) - - if err := c.SetPublishStatus(feedURL, "skip", ""); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "skip", - "url": feedURL, - }) -} - -// handleAPIPublishEnabled returns all feeds with publish status 'pass' -func (c *Crawler) handleAPIPublishEnabled(w http.ResponseWriter, r *http.Request) { - feeds, err := c.GetFeedsByPublishStatus("pass") - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - type FeedPublishInfo struct { - URL string `json:"url"` - Title string `json:"title"` - Account string `json:"account"` - UnpublishedCount int `json:"unpublished_count"` - } - - var result []FeedPublishInfo - for _, f := range feeds { - count, _ := c.GetUnpublishedItemCount(f.URL) - result = append(result, FeedPublishInfo{ - URL: f.URL, - Title: f.Title, - Account: f.PublishAccount, - UnpublishedCount: count, - }) - } - - if result == nil { - result = []FeedPublishInfo{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPIPublishDenied returns all feeds with publish status 'skip' -func (c *Crawler) handleAPIPublishDenied(w http.ResponseWriter, r *http.Request) { - feeds, err := c.GetFeedsByPublishStatus("skip") - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - type FeedDeniedInfo struct { - URL string `json:"url"` - Title string `json:"title"` - SourceHost string `json:"source_host"` - } - - var result []FeedDeniedInfo - for _, f := range feeds { - result = append(result, FeedDeniedInfo{ - URL: f.URL, - Title: f.Title, - SourceHost: fullHost(f.DomainHost, f.DomainTLD), - }) - } - - if result == nil { - result = []FeedDeniedInfo{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPIPublishCandidates returns feeds pending review that have items -func (c *Crawler) handleAPIPublishCandidates(w http.ResponseWriter, r *http.Request) { - limit := 50 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 200 { - limit = 200 - } - } - - feeds, err := c.GetPublishCandidates(limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - type CandidateInfo struct { - URL string `json:"url"` - Title string `json:"title"` - Category string `json:"category"` - SourceHost string `json:"source_host"` - ItemCount int `json:"item_count"` - DerivedHandle string `json:"derived_handle"` - } - - var result []CandidateInfo - for _, f := range feeds { - result = append(result, CandidateInfo{ - URL: f.URL, - Title: f.Title, - Category: f.Category, - SourceHost: fullHost(f.DomainHost, f.DomainTLD), - ItemCount: f.ItemCount, - DerivedHandle: DeriveHandleFromFeed(f.URL), - }) - } - - if result == nil { - result = []CandidateInfo{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPISetPublishStatus sets the publish status for a feed -// Status values: -// - 'pass': Create account if needed, begin publishing -// - 'hold': Crawl and store items but don't publish (default) -// - 'skip': Stop crawling but keep existing data -// - 'drop': Full cleanup - remove items, posts, and account -func (c *Crawler) handleAPISetPublishStatus(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - status := r.URL.Query().Get("status") - account := r.URL.Query().Get("account") - - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - if status != "pass" && status != "skip" && status != "hold" && status != "drop" { - http.Error(w, "status must be 'pass', 'hold', 'skip', or 'drop'", http.StatusBadRequest) - return - } - - feedURL = normalizeURL(feedURL) - - result := map[string]interface{}{ - "url": feedURL, - "status": status, - } - - // Handle 'drop' - full cleanup then set to skip - if status == "drop" { - cleanup := c.cleanupFeedPublishing(feedURL) - result["cleanup"] = cleanup - // After dropping, set status to skip with no account - if err := c.SetPublishStatus(feedURL, "skip", ""); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - result["account"] = "" - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) - return - } - - // Handle 'pass' - create account if needed and publish - if status == "pass" { - if account == "" { - account = DeriveHandleFromFeed(feedURL) - } - - // Check if account exists on PDS, create if not - created, err := c.ensureFeedAccountExists(feedURL, account) - if err != nil { - result["error"] = err.Error() - } else if created { - result["account_created"] = true - } - result["account"] = account - } - - // Handle 'hold' and 'skip' - just update status - if status == "hold" || status == "skip" { - // Get current account if any (don't change it) - feed, _ := c.getFeed(feedURL) - if feed != nil { - account = feed.PublishAccount - } - } - - if err := c.SetPublishStatus(feedURL, status, account); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - result["account"] = account - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// ensureFeedAccountExists creates the PDS account for a feed if it doesn't exist -// Returns (created bool, error) -func (c *Crawler) ensureFeedAccountExists(feedURL, account string) (bool, error) { - // Load PDS credentials - pdsHost := os.Getenv("PDS_HOST") - pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD") - feedPassword := os.Getenv("FEED_PASSWORD") - - if pdsHost == "" { - if envData, err := os.ReadFile("pds.env"); err == nil { - for _, line := range strings.Split(string(envData), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "PDS_HOST=") { - pdsHost = strings.TrimPrefix(line, "PDS_HOST=") - } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") { - pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=") - } else if strings.HasPrefix(line, "FEED_PASSWORD=") { - feedPassword = strings.TrimPrefix(line, "FEED_PASSWORD=") - } - } - } - } - - if pdsHost == "" || pdsAdminPassword == "" { - return false, fmt.Errorf("PDS credentials not configured") - } - if feedPassword == "" { - feedPassword = "feed1440!" - } - - publisher := NewPublisher(pdsHost) - - // account is already the full handle (e.g., "ycombinator-blog.1440.news") - handle := account - if !strings.HasSuffix(handle, ".1440.news") { - handle = account + ".1440.news" - } - - // Try to login - if successful, account exists - _, err := publisher.CreateSession(handle, feedPassword) - if err == nil { - return false, nil // Account already exists - } - - // Account doesn't exist, create it - inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1) - if err != nil { - return false, fmt.Errorf("failed to create invite: %w", err) - } - - email := handle + "@1440.news" - session, err := publisher.CreateAccount(handle, email, feedPassword, inviteCode) - if err != nil { - return false, fmt.Errorf("failed to create account: %w", err) - } - - fmt.Printf("Created account %s for feed %s\n", handle, feedURL) - - // Set up profile - feed, _ := c.getFeed(feedURL) - if feed != nil { - sourceHost := fullHost(feed.DomainHost, feed.DomainTLD) - displayName := feed.Title - if displayName == "" { - displayName = sourceHost - } - description := feed.Description - if description == "" { - description = "News feed via 1440.news" - } - // Add feed URL to description - feedURLFull := "https://" + feedURL - description = feedURLFull + "\n\n" + description - if len(displayName) > 64 { - displayName = displayName[:61] + "..." - } - if len(description) > 256 { - description = description[:253] + "..." - } - - // Try to fetch favicon - var avatar *BlobRef - faviconData, mimeType, err := FetchFaviconBytes(sourceHost) - if err == nil && len(faviconData) > 0 { - avatar, _ = publisher.UploadBlob(session, faviconData, mimeType) - } - - if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil { - fmt.Printf("Failed to set profile for %s: %v\n", handle, err) - } - } - - // Have directory account follow this new account - if err := publisher.FollowAsDirectory(session.DID); err != nil { - fmt.Printf("Directory follow failed for %s: %v\n", handle, err) - } - - return true, nil -} - -// cleanupFeedPublishing removes all published content for a feed -// Returns a summary of what was cleaned up -func (c *Crawler) cleanupFeedPublishing(feedURL string) map[string]interface{} { - result := map[string]interface{}{ - "posts_deleted": 0, - "account_deleted": false, - "items_cleared": 0, - } - - // Get feed info to find the account - feed, err := c.getFeed(feedURL) - if err != nil || feed == nil { - result["error"] = "feed not found" - return result - } - - if feed.PublishAccount == "" { - // No account associated, just clear items - itemsCleared, _ := c.db.Exec(`UPDATE items SET published_at = NULL WHERE feed_url = $1`, feedURL) - result["items_cleared"] = itemsCleared - return result - } - - // Load PDS credentials - pdsHost := os.Getenv("PDS_HOST") - pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD") - feedPassword := os.Getenv("FEED_PASSWORD") - - if pdsHost == "" { - // Try loading from pds.env - if envData, err := os.ReadFile("pds.env"); err == nil { - for _, line := range strings.Split(string(envData), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "PDS_HOST=") { - pdsHost = strings.TrimPrefix(line, "PDS_HOST=") - } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") { - pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=") - } else if strings.HasPrefix(line, "FEED_PASSWORD=") { - feedPassword = strings.TrimPrefix(line, "FEED_PASSWORD=") - } - } - } - } - - if pdsHost == "" || feedPassword == "" { - result["error"] = "PDS credentials not configured" - // Still clear items in database - itemsCleared, _ := c.db.Exec(`UPDATE items SET published_at = NULL WHERE feed_url = $1`, feedURL) - result["items_cleared"] = itemsCleared - return result - } - - publisher := NewPublisher(pdsHost) - - // Try to authenticate as the feed account - session, err := publisher.CreateSession(feed.PublishAccount, feedPassword) - if err == nil && session != nil { - // Delete all posts - deleted, err := publisher.DeleteAllPosts(session) - if err == nil { - result["posts_deleted"] = deleted - } else { - result["posts_delete_error"] = err.Error() - } - } else { - result["session_error"] = "could not authenticate to delete posts" - } - - // Delete the account using admin API - if pdsAdminPassword != "" && session != nil { - err := publisher.DeleteAccount(pdsAdminPassword, session.DID) - if err == nil { - result["account_deleted"] = true - } else { - result["account_delete_error"] = err.Error() - } - } - - // Clear published_at on all items - itemsCleared, _ := c.db.Exec(`UPDATE items SET published_at = NULL WHERE feed_url = $1`, feedURL) - result["items_cleared"] = itemsCleared - - // Clear publish_account on feed - c.db.Exec(`UPDATE feeds SET publish_account = NULL WHERE url = $1`, feedURL) - - return result -} - -// handleAPIUnpublishedItems returns unpublished items for a feed -func (c *Crawler) handleAPIUnpublishedItems(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - limit := 50 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 200 { - limit = 200 - } - } - - items, err := c.GetUnpublishedItems(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if items == nil { - items = []*Item{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(items) -} - -// handleAPITestPublish tests publishing a single item to PDS -// Requires: feedUrl, guid, handle, password, pds (optional, defaults to https://1440.news) -func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("feedUrl") - guidParam := r.URL.Query().Get("guid") - handle := r.URL.Query().Get("handle") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - - if feedURL == "" || guidParam == "" { - http.Error(w, "feedUrl and guid parameters required", http.StatusBadRequest) - return - } - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://1440.news" - } - - // Get the item - var item Item - var guid, title, link, description, content, author *string - var pubDate, updatedAt, publishedAt *time.Time - var publishedUri *string - - err := c.db.QueryRow(` - SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri - FROM items WHERE feed_url = $1 AND guid = $2 - `, feedURL, guidParam).Scan( - &item.FeedURL, &guid, &title, &link, - &description, &content, &author, &pubDate, - &item.DiscoveredAt, &updatedAt, &publishedAt, &publishedUri, - ) - if err != nil { - http.Error(w, "item not found: "+err.Error(), http.StatusNotFound) - return - } - - item.GUID = StringValue(guid) - item.Title = StringValue(title) - item.Link = StringValue(link) - item.Description = StringValue(description) - item.Content = StringValue(content) - item.Author = StringValue(author) - if pubDate != nil { - item.PubDate = *pubDate - } - - // Create publisher and authenticate - publisher := NewPublisher(pdsHost) - session, err := publisher.CreateSession(handle, password) - if err != nil { - http.Error(w, "auth failed: "+err.Error(), http.StatusUnauthorized) - return - } - - // Publish the item - uri, err := publisher.PublishItem(session, &item) - if err != nil { - http.Error(w, "publish failed: "+err.Error(), http.StatusInternalServerError) - return - } - - // Mark as published - c.MarkItemPublished(item.FeedURL, item.GUID, uri) - - // Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt - rkeyTime := item.PubDate - if rkeyTime.IsZero() { - rkeyTime = item.DiscoveredAt - } - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "published", - "uri": uri, - "feedUrl": item.FeedURL, - "guid": item.GUID, - "title": item.Title, - "rkey": GenerateRkey(item.GUID, rkeyTime), - }) -} - -// handleAPIPublishFeed publishes unpublished items for a feed -// Requires: url (feed), handle, password, pds (optional), limit (optional, default 10) -func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - handle := r.URL.Query().Get("handle") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://1440.news" - } - - limit := 10 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 50 { - limit = 50 - } - } - - feedURL = normalizeURL(feedURL) - - // Get unpublished items (ordered by pubDate ASC - oldest first) - items, err := c.GetUnpublishedItems(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if len(items) == 0 { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "no_items", - "published": 0, - }) - return - } - - // Create publisher and authenticate - publisher := NewPublisher(pdsHost) - session, err := publisher.CreateSession(handle, password) - if err != nil { - http.Error(w, "auth failed: "+err.Error(), http.StatusUnauthorized) - return - } - - type PublishResult struct { - FeedURL string `json:"feed_url"` - GUID string `json:"guid"` - Title string `json:"title"` - URI string `json:"uri,omitempty"` - Error string `json:"error,omitempty"` - } - - var results []PublishResult - published := 0 - failed := 0 - - for i, item := range items { - result := PublishResult{ - FeedURL: item.FeedURL, - GUID: item.GUID, - Title: item.Title, - } - - uri, err := publisher.PublishItem(session, item) - if err != nil { - result.Error = err.Error() - failed++ - } else { - result.URI = uri - c.MarkItemPublished(item.FeedURL, item.GUID, uri) - published++ - } - - results = append(results, result) - - // Add delay between posts to ensure unique timestamps for relay indexing - if i < len(items)-1 { - time.Sleep(1100 * time.Millisecond) - } - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "complete", - "published": published, - "failed": failed, - "results": results, - }) -} - -// handleAPICreateAccount creates a new account on the PDS -// Requires: handle, email, password, pds (optional), inviteCode (optional) -// If pdsAdminPassword is provided, it will create an invite code first -func (c *Crawler) handleAPICreateAccount(w http.ResponseWriter, r *http.Request) { - handle := r.URL.Query().Get("handle") - email := r.URL.Query().Get("email") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - inviteCode := r.URL.Query().Get("inviteCode") - pdsAdminPassword := r.URL.Query().Get("pdsAdminPassword") - - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - if email == "" { - // Generate a placeholder email from handle - email = handle + "@1440.news" - } - - publisher := NewPublisher(pdsHost) - - // If PDS admin password provided, create an invite code first - if pdsAdminPassword != "" && inviteCode == "" { - code, err := publisher.CreateInviteCode(pdsAdminPassword, 1) - if err != nil { - http.Error(w, "create invite failed: "+err.Error(), http.StatusInternalServerError) - return - } - inviteCode = code - } - - // Create the account - session, err := publisher.CreateAccount(handle, email, password, inviteCode) - if err != nil { - http.Error(w, "create account failed: "+err.Error(), http.StatusInternalServerError) - return - } - - // Have directory account follow this new account - if err := publisher.FollowAsDirectory(session.DID); err != nil { - fmt.Printf("API: directory follow failed for %s: %v\n", handle, err) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "created", - "handle": session.Handle, - "did": session.DID, - }) -} - -// handleAPIPublishFeedFull creates an account (if needed) and publishes items -// This is a convenience endpoint that combines account creation and publishing -// Requires: url (feed), pdsAdminPassword, pds (optional), limit (optional), feedPassword (optional) -func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - pdsAdminPassword := r.URL.Query().Get("pdsAdminPassword") - pdsHost := r.URL.Query().Get("pds") - feedPassword := r.URL.Query().Get("feedPassword") // Password for new feed accounts - - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - if pdsAdminPassword == "" { - http.Error(w, "pdsAdminPassword parameter required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - if feedPassword == "" { - feedPassword = "feed1440!" // Default password for feed accounts - } - - limit := 10 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 50 { - limit = 50 - } - } - - feedURL = normalizeURL(feedURL) - - // Get the feed to check its status and get the derived handle - feed, err := c.getFeed(feedURL) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if feed == nil { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - if feed.PublishStatus != "pass" { - http.Error(w, "feed is not approved for publishing (status: "+feed.PublishStatus+")", http.StatusBadRequest) - return - } - - handle := feed.PublishAccount - if handle == "" { - handle = DeriveHandleFromFeed(feedURL) - } - email := handle + "@1440.news" - - publisher := NewPublisher(pdsHost) - - // First, try to authenticate with the feed account - session, err := publisher.CreateSession(handle, feedPassword) - if err != nil { - // Account doesn't exist, create it - fmt.Printf("Account %s doesn't exist, creating...\n", handle) - - // Create invite code using PDS admin password - inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1) - if err != nil { - http.Error(w, "create invite failed: "+err.Error(), http.StatusInternalServerError) - return - } - - // Create the account - session, err = publisher.CreateAccount(handle, email, feedPassword, inviteCode) - if err != nil { - http.Error(w, "create account failed: "+err.Error(), http.StatusInternalServerError) - return - } - fmt.Printf("Created account: %s (%s)\n", session.Handle, session.DID) - - // Set up profile with feed title and favicon - sourceHost := fullHost(feed.DomainHost, feed.DomainTLD) - displayName := feed.Title - if displayName == "" { - displayName = sourceHost - } - description := feed.Description - - // Try to fetch favicon for avatar - var avatar *BlobRef - faviconData, mimeType, err := FetchFaviconBytes(sourceHost) - if err == nil && len(faviconData) > 0 { - avatar, err = publisher.UploadBlob(session, faviconData, mimeType) - if err != nil { - fmt.Printf("Failed to upload favicon: %v\n", err) - } - } - - if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil { - fmt.Printf("Failed to update profile: %v\n", err) - } else { - fmt.Printf("Set profile for %s: %s\n", handle, displayName) - } - - // Have directory account follow this new account - if err := publisher.FollowAsDirectory(session.DID); err != nil { - fmt.Printf("API: directory follow failed for %s: %v\n", handle, err) - } - } - - // Get unpublished items - items, err := c.GetUnpublishedItems(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if len(items) == 0 { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "no_items", - "handle": handle, - "published": 0, - }) - return - } - - type PublishResult struct { - FeedURL string `json:"feed_url"` - GUID string `json:"guid"` - Title string `json:"title"` - URI string `json:"uri,omitempty"` - Error string `json:"error,omitempty"` - } - - var results []PublishResult - published := 0 - failed := 0 - - for i, item := range items { - result := PublishResult{ - FeedURL: item.FeedURL, - GUID: item.GUID, - Title: item.Title, - } - - uri, err := publisher.PublishItem(session, item) - if err != nil { - result.Error = err.Error() - failed++ - } else { - result.URI = uri - c.MarkItemPublished(item.FeedURL, item.GUID, uri) - published++ - } - - results = append(results, result) - - // Add delay between posts to ensure unique timestamps for relay indexing - if i < len(items)-1 { - time.Sleep(1100 * time.Millisecond) - } - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "complete", - "handle": handle, - "did": session.DID, - "published": published, - "failed": failed, - "results": results, - }) -} - -// handleAPIUpdateProfile updates a profile for an existing account -// Requires: handle, password, pds (optional), displayName (optional), description (optional), faviconUrl (optional) -func (c *Crawler) handleAPIUpdateProfile(w http.ResponseWriter, r *http.Request) { - handle := r.URL.Query().Get("handle") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - displayName := r.URL.Query().Get("displayName") - description := r.URL.Query().Get("description") - faviconURL := r.URL.Query().Get("faviconUrl") - - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - - publisher := NewPublisher(pdsHost) - - // Authenticate - session, err := publisher.CreateSession(handle, password) - if err != nil { - http.Error(w, "auth failed: "+err.Error(), http.StatusUnauthorized) - return - } - - // Fetch favicon if URL provided - var avatar *BlobRef - if faviconURL != "" { - faviconData, mimeType, err := FetchFaviconBytes(faviconURL) - if err != nil { - http.Error(w, "fetch favicon failed: "+err.Error(), http.StatusBadRequest) - return - } - avatar, err = publisher.UploadBlob(session, faviconData, mimeType) - if err != nil { - http.Error(w, "upload favicon failed: "+err.Error(), http.StatusInternalServerError) - return - } - } - - // Update profile - if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil { - http.Error(w, "update profile failed: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "updated", - "handle": handle, - "displayName": displayName, - "hasAvatar": avatar != nil, - }) -} - -// handleAPIResetAllPublishing clears all publish accounts and published_at timestamps -func (c *Crawler) handleAPIResetAllPublishing(w http.ResponseWriter, r *http.Request) { - // Clear all publish_account fields - accountsCleared, err := c.db.Exec(`UPDATE feeds SET publish_account = NULL WHERE publish_account IS NOT NULL`) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Clear all published_at timestamps - itemsCleared, err := c.db.Exec(`UPDATE items SET published_at = NULL WHERE published_at IS NOT NULL`) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Reset all publish_status to 'hold' - statusReset, err := c.db.Exec(`UPDATE feeds SET publish_status = 'hold' WHERE publish_status IS NOT NULL`) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "success": true, - "accounts_cleared": accountsCleared, - "items_cleared": itemsCleared, - "status_reset": statusReset, - }) -} - -// handleAPIRefreshProfiles refreshes all account profiles (avatars, descriptions) -// Requires: password (feed account password), pds (optional, defaults to pds.1440.news) -func (c *Crawler) handleAPIRefreshProfiles(w http.ResponseWriter, r *http.Request) { - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - - if password == "" { - http.Error(w, "password parameter required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - - publisher := NewPublisher(pdsHost) - - // Run RefreshAllProfiles synchronously - c.RefreshAllProfiles(publisher, password) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "success": true, - "message": "profiles refreshed", - }) -} diff --git a/api_search.go b/api_search.go deleted file mode 100644 index f6945c5..0000000 --- a/api_search.go +++ /dev/null @@ -1,311 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net/http" - "strings" - "time" - - "github.com/jackc/pgx/v5" -) - -// SearchResult represents a search result with feed and matching items -type SearchResult struct { - Feed SearchFeed `json:"feed"` - Items []SearchItem `json:"items"` -} - -type SearchFeed struct { - URL string `json:"url"` - Type string `json:"type"` - Category string `json:"category"` - Title string `json:"title"` - Description string `json:"description"` - Language string `json:"language"` - SiteURL string `json:"site_url"` - DiscoveredAt string `json:"discovered_at"` - LastCheckedAt string `json:"last_checked_at"` - NextCheckAt string `json:"next_check_at"` - LastBuildDate string `json:"last_build_date"` - Status string `json:"status"` - LastError string `json:"last_error"` - LastErrorAt string `json:"last_error_at"` - SourceURL string `json:"source_url"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - ItemCount int `json:"item_count"` - OldestItemDate string `json:"oldest_item_date"` - NewestItemDate string `json:"newest_item_date"` - NoUpdate bool `json:"no_update"` -} - -type SearchItem struct { - FeedURL string `json:"feed_url"` - GUID string `json:"guid"` - Title string `json:"title"` - Link string `json:"link"` - Description string `json:"description"` - Content string `json:"content"` - Author string `json:"author"` - PubDate string `json:"pub_date"` - DiscoveredAt string `json:"discovered_at"` - UpdatedAt string `json:"updated_at"` -} - -func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) { - query := r.URL.Query().Get("q") - if query == "" { - http.Error(w, "q parameter required", http.StatusBadRequest) - return - } - - limit := 100 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - - // Results map: feedURL -> SearchResult - results := make(map[string]*SearchResult) - - // Helper to scan feed row into SearchFeed - scanFeed := func(rows pgx.Rows) (string, SearchFeed, bool) { - var url string - var feedType, category, title, description, language, siteUrl *string - var discoveredAt time.Time - var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time - var itemCount *int - var status, lastError *string - var lastErrorAt *time.Time - var sourceUrl, sourceHost, tld *string - var oldestItemDate, newestItemDate *time.Time - var noUpdate *bool - - if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl, - &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, - &status, &lastError, &lastErrorAt, - &sourceUrl, &sourceHost, &tld, - &itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil { - return "", SearchFeed{}, false - } - cat := StringValue(category) - if cat == "" { - cat = "main" - } - sf := SearchFeed{ - URL: url, - Type: StringValue(feedType), - Category: cat, - Title: StringValue(title), - Description: StringValue(description), - Language: StringValue(language), - SiteURL: StringValue(siteUrl), - DiscoveredAt: discoveredAt.Format(time.RFC3339), - Status: StringValue(status), - LastError: StringValue(lastError), - SourceURL: StringValue(sourceUrl), - SourceHost: StringValue(sourceHost), - TLD: StringValue(tld), - } - if lastCheckedAt != nil { - sf.LastCheckedAt = lastCheckedAt.Format(time.RFC3339) - } - if nextCheckAt != nil { - sf.NextCheckAt = nextCheckAt.Format(time.RFC3339) - } - if lastBuildDate != nil { - sf.LastBuildDate = lastBuildDate.Format(time.RFC3339) - } - if lastErrorAt != nil { - sf.LastErrorAt = lastErrorAt.Format(time.RFC3339) - } - if itemCount != nil { - sf.ItemCount = *itemCount - } - if oldestItemDate != nil { - sf.OldestItemDate = oldestItemDate.Format(time.RFC3339) - } - if newestItemDate != nil { - sf.NewestItemDate = newestItemDate.Format(time.RFC3339) - } - if noUpdate != nil { - sf.NoUpdate = *noUpdate - } - return url, sf, true - } - - // Search feeds by domain_host (LIKE search for domain matching) - // Use LOWER() to leverage trigram index - lowerPattern := "%" + strings.ToLower(query) + "%" - hostRows, err := c.db.Query(` - SELECT url, type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, last_error_at, - source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld, - item_count, oldest_item_date, newest_item_date, no_update - FROM feeds - WHERE LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 - LIMIT $2 - `, lowerPattern, limit) - if err == nil { - defer hostRows.Close() - for hostRows.Next() { - if url, feed, ok := scanFeed(hostRows); ok { - if _, exists := results[url]; !exists { - results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}} - } - } - } - } - - // Search feeds via full-text search - tsQuery := ToSearchQuery(query) - feedRows, err := c.db.Query(` - SELECT url, type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, last_error_at, - source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld, - item_count, oldest_item_date, newest_item_date, no_update - FROM feeds - WHERE search_vector @@ to_tsquery('english', $1) - LIMIT $2 - `, tsQuery, limit) - if err == nil { - defer feedRows.Close() - for feedRows.Next() { - if url, feed, ok := scanFeed(feedRows); ok { - if _, exists := results[url]; !exists { - results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}} - } - } - } - } - - // Search items via full-text search - itemRows, err := c.db.Query(` - SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at - FROM items i - WHERE i.search_vector @@ to_tsquery('english', $1) - ORDER BY i.pub_date DESC - LIMIT $2 - `, tsQuery, limit) - if err == nil { - defer itemRows.Close() - for itemRows.Next() { - var feedUrl string - var guid, title, link, description, content, author *string - var pubDate, discoveredAt, updatedAt *time.Time - if err := itemRows.Scan(&feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil { - continue - } - - item := SearchItem{ - FeedURL: feedUrl, - GUID: StringValue(guid), - Title: StringValue(title), - Link: StringValue(link), - Description: StringValue(description), - Content: StringValue(content), - Author: StringValue(author), - } - if pubDate != nil { - item.PubDate = pubDate.Format(time.RFC3339) - } - if discoveredAt != nil { - item.DiscoveredAt = discoveredAt.Format(time.RFC3339) - } - if updatedAt != nil { - item.UpdatedAt = updatedAt.Format(time.RFC3339) - } - - // Add to existing result or create new one - if result, exists := results[feedUrl]; exists { - result.Items = append(result.Items, item) - } else { - // Fetch feed info for this item's feed - var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string - var fDiscoveredAt time.Time - var fLastCheckedAt, fNextCheckAt, fLastBuildDate *time.Time - var fItemCount *int - var fStatus, fLastError *string - var fLastErrorAt *time.Time - var fSourceUrl, fSourceHost, fTLD *string - var fOldestItemDate, fNewestItemDate *time.Time - var fNoUpdate *bool - - c.db.QueryRow(` - SELECT type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, last_error_at, - source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld, - item_count, oldest_item_date, newest_item_date, no_update - FROM feeds WHERE url = $1 - `, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl, - &fDiscoveredAt, &fLastCheckedAt, &fNextCheckAt, &fLastBuildDate, - &fStatus, &fLastError, &fLastErrorAt, - &fSourceUrl, &fSourceHost, &fTLD, - &fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate) - - fCat := StringValue(fCategory) - if fCat == "" { - fCat = "main" - } - sf := SearchFeed{ - URL: feedUrl, - Type: StringValue(fType), - Category: fCat, - Title: StringValue(fTitle), - Description: StringValue(fDesc), - Language: StringValue(fLang), - SiteURL: StringValue(fSiteUrl), - DiscoveredAt: fDiscoveredAt.Format(time.RFC3339), - Status: StringValue(fStatus), - LastError: StringValue(fLastError), - SourceURL: StringValue(fSourceUrl), - SourceHost: StringValue(fSourceHost), - TLD: StringValue(fTLD), - } - if fLastCheckedAt != nil { - sf.LastCheckedAt = fLastCheckedAt.Format(time.RFC3339) - } - if fNextCheckAt != nil { - sf.NextCheckAt = fNextCheckAt.Format(time.RFC3339) - } - if fLastBuildDate != nil { - sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339) - } - if fLastErrorAt != nil { - sf.LastErrorAt = fLastErrorAt.Format(time.RFC3339) - } - if fItemCount != nil { - sf.ItemCount = *fItemCount - } - if fOldestItemDate != nil { - sf.OldestItemDate = fOldestItemDate.Format(time.RFC3339) - } - if fNewestItemDate != nil { - sf.NewestItemDate = fNewestItemDate.Format(time.RFC3339) - } - if fNoUpdate != nil { - sf.NoUpdate = *fNoUpdate - } - results[feedUrl] = &SearchResult{ - Feed: sf, - Items: []SearchItem{item}, - } - } - } - } - - // Convert map to slice - var resultList []SearchResult - for _, r := range results { - resultList = append(resultList, *r) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resultList) -} diff --git a/crawler.go b/crawler.go index bc6be12..0be1c46 100644 --- a/crawler.go +++ b/crawler.go @@ -18,23 +18,20 @@ import ( ) type Crawler struct { - MaxDepth int - MaxPagesPerHost int - Timeout time.Duration - UserAgent string - visited sync.Map - feedsMu sync.Mutex - client *http.Client + MaxDepth int + MaxPagesPerHost int + Timeout time.Duration + UserAgent string + visited sync.Map + feedsMu sync.Mutex + client *http.Client domainsCrawled int32 // feed_crawl: domains crawled for feed discovery domainsChecked int32 // domain_check: domains checked for liveness feedsChecked int32 // feed_check: feeds checked for new items startTime time.Time db *DB domainsImported int32 - cachedStats *DashboardStats - cachedAllDomains []DomainStat - statsMu sync.RWMutex - shutdownCh chan struct{} // closed on shutdown to signal goroutines + shutdownCh chan struct{} // closed on shutdown to signal goroutines } func NewCrawler(connString string) (*Crawler, error) { @@ -107,17 +104,6 @@ func (c *Crawler) Close() error { return nil } -// StartStatsLoop updates cached stats every 10 seconds -func (c *Crawler) StartStatsLoop() { - for { - if c.IsShuttingDown() { - return - } - c.UpdateStats() - time.Sleep(10 * time.Second) - } -} - // StartCleanupLoop runs item cleanup once per week func (c *Crawler) StartCleanupLoop() { for { @@ -367,7 +353,7 @@ type FeedInfo struct { func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo { var title, description, siteURL, sourceHost *string err := c.db.QueryRow(` - SELECT title, description, site_url, domain_host || '.' || domain_tld as source_host FROM feeds WHERE url = $1 + SELECT title, description, site_url, domain_host as source_host FROM feeds WHERE url = $1 `, feedURL).Scan(&title, &description, &siteURL, &sourceHost) if err != nil { return nil @@ -383,7 +369,7 @@ func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo { // RefreshAllProfiles updates profiles for all existing accounts with feed URLs func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) { rows, err := c.db.Query(` - SELECT url, title, description, site_url, domain_host || '.' || domain_tld as source_host, publish_account + SELECT url, title, description, site_url, domain_host as source_host, publish_account FROM feeds WHERE publish_account IS NOT NULL AND publish_account <> '' `) diff --git a/dashboard.go b/dashboard.go deleted file mode 100644 index 5ff21fa..0000000 --- a/dashboard.go +++ /dev/null @@ -1,265 +0,0 @@ -package main - -import ( - "fmt" - "time" -) - -// DashboardStats holds all statistics for the dashboard -type DashboardStats struct { - // Domain stats - TotalDomains int `json:"total_domains"` - HoldDomains int `json:"hold_domains"` - PassDomains int `json:"pass_domains"` - SkipDomains int `json:"skip_domains"` - DeadDomains int `json:"dead_domains"` - - // Feed stats - TotalFeeds int `json:"total_feeds"` - AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds) - PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing) - SkipFeeds int `json:"skip_feeds"` - HoldFeeds int `json:"hold_feeds"` - DeadFeeds int `json:"dead_feeds"` - EmptyFeeds int `json:"empty_feeds"` - RSSFeeds int `json:"rss_feeds"` - AtomFeeds int `json:"atom_feeds"` - JSONFeeds int `json:"json_feeds"` - UnknownFeeds int `json:"unknown_feeds"` - - // Processing rates (per minute) - DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count - DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute - FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute - FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute - - // Timing - UpdatedAt time.Time `json:"updated_at"` -} - -type TLDStat struct { - TLD string `json:"tld"` - Count int `json:"count"` -} - -type RecentFeed struct { - URL string `json:"url"` - Title string `json:"title"` - Type string `json:"type"` - DiscoveredAt time.Time `json:"discovered_at"` -} - -type DomainStat struct { - Host string `json:"host"` - FeedsFound int `json:"feeds_found"` -} - -// commaFormat formats an integer with comma separators -func commaFormat(n int) string { - s := fmt.Sprintf("%d", n) - if len(s) <= 3 { - return s - } - var result []byte - for i, c := range s { - if i > 0 && (len(s)-i)%3 == 0 { - result = append(result, ',') - } - result = append(result, byte(c)) - } - return string(result) -} - -// UpdateStats recalculates and caches dashboard statistics -func (c *Crawler) UpdateStats() { - fmt.Println("UpdateStats: calculating stats...") - stats, err := c.calculateStats() - if err != nil { - fmt.Printf("UpdateStats: error calculating stats: %v\n", err) - return - } - // Cache all domains with feeds (runs in background, so slow query is OK) - fmt.Println("UpdateStats: fetching all domains...") - allDomains := c.fetchAllDomainsFromDB() - fmt.Printf("UpdateStats: got %d domains\n", len(allDomains)) - - c.statsMu.Lock() - c.cachedStats = stats - c.cachedAllDomains = allDomains - c.statsMu.Unlock() - fmt.Println("UpdateStats: complete") -} - -func (c *Crawler) fetchAllDomainsFromDB() []DomainStat { - rows, err := c.db.Query(` - SELECT domain_tld as tld, domain_host || '.' || domain_tld as source_host, COUNT(*) as cnt FROM feeds - GROUP BY domain_tld, domain_host - ORDER BY domain_tld, domain_host - `) - if err != nil { - fmt.Printf("fetchAllDomainsFromDB error: %v\n", err) - return nil - } - defer rows.Close() - - var domains []DomainStat - for rows.Next() { - var ds DomainStat - var tld string - if err := rows.Scan(&tld, &ds.Host, &ds.FeedsFound); err != nil { - continue - } - domains = append(domains, ds) - } - return domains -} - -// GetDashboardStats returns cached statistics (returns empty stats if not yet cached) -func (c *Crawler) GetDashboardStats() (*DashboardStats, error) { - c.statsMu.RLock() - stats := c.cachedStats - c.statsMu.RUnlock() - - if stats != nil { - return stats, nil - } - // Return empty stats while background calculation runs (don't block HTTP requests) - return &DashboardStats{UpdatedAt: time.Now()}, nil -} - -// calculateStats collects all statistics for the dashboard -func (c *Crawler) calculateStats() (*DashboardStats, error) { - stats := &DashboardStats{ - UpdatedAt: time.Now(), - DomainsCrawled: c.domainsCrawled, - } - - // Calculate rates (per minute) - elapsed := time.Since(c.startTime).Minutes() - if elapsed > 0 { - stats.DomainCheckRate = int(float64(c.domainsChecked) / elapsed) - stats.FeedCrawlRate = int(float64(c.domainsCrawled) / elapsed) - stats.FeedCheckRate = int(float64(c.feedsChecked) / elapsed) - } - - // Get domain stats - if err := c.collectDomainStats(stats); err != nil { - return nil, err - } - - // Get feed stats - if err := c.collectFeedStats(stats); err != nil { - return nil, err - } - - return stats, nil -} - -func (c *Crawler) collectDomainStats(stats *DashboardStats) error { - // Use COUNT(*) for total count - err := c.db.QueryRow("SELECT COUNT(*) FROM domains").Scan(&stats.TotalDomains) - if err != nil { - return err - } - - // Single query to get all status counts (one index scan instead of three) - rows, err := c.db.Query("SELECT status, COUNT(*) FROM domains GROUP BY status") - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var status string - var count int - if err := rows.Scan(&status, &count); err != nil { - continue - } - switch status { - case "hold": - stats.HoldDomains = count - case "pass": - stats.PassDomains = count - case "skip": - stats.SkipDomains = count - case "dead": - stats.DeadDomains = count - } - } - if err := rows.Err(); err != nil { - return err - } - - return rows.Err() -} - -func (c *Crawler) collectFeedStats(stats *DashboardStats) error { - // Use COUNT(*) for total count - err := c.db.QueryRow("SELECT COUNT(*) FROM feeds").Scan(&stats.TotalFeeds) - if err != nil { - return err - } - - // Get status counts - statusRows, err := c.db.Query("SELECT status, COUNT(*) FROM feeds GROUP BY status") - if err != nil { - return err - } - defer statusRows.Close() - - for statusRows.Next() { - var status *string - var count int - if err := statusRows.Scan(&status, &count); err != nil { - continue - } - if status != nil { - switch *status { - case "pass": - stats.AliveFeeds = count - case "skip": - stats.SkipFeeds = count - case "hold": - stats.HoldFeeds = count - case "dead": - stats.DeadFeeds = count - } - } - } - - // Count feeds approved for publishing (publish_status='pass') - c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE publish_status = 'pass'").Scan(&stats.PublishFeeds) - - // Count empty feeds (item_count = 0 or NULL) - c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE item_count IS NULL OR item_count = 0").Scan(&stats.EmptyFeeds) - - // Single query to get all type counts (one index scan instead of three) - rows, err := c.db.Query("SELECT type, COUNT(*) FROM feeds GROUP BY type") - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var feedType *string - var count int - if err := rows.Scan(&feedType, &count); err != nil { - continue - } - if feedType == nil { - stats.UnknownFeeds += count - } else { - switch *feedType { - case "rss": - stats.RSSFeeds = count - case "atom": - stats.AtomFeeds = count - case "json": - stats.JSONFeeds = count - default: - stats.UnknownFeeds += count - } - } - } - return rows.Err() -} diff --git a/main.go b/main.go index 372bcc6..d7c9a54 100644 --- a/main.go +++ b/main.go @@ -19,16 +19,6 @@ func main() { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) - // Start dashboard in background - go func() { - if err := crawler.StartDashboard("0.0.0.0:4321"); err != nil { - fmt.Fprintf(os.Stderr, "Dashboard error: %v\n", err) - } - }() - - // Initialize stats in background (can be slow with large DBs) - go crawler.UpdateStats() - // Start all loops independently fmt.Println("Starting import and processing loops...") @@ -44,9 +34,6 @@ func main() { // feed_check loop (background) - checks feeds for new items go crawler.StartFeedCheckLoop() - // Stats loop (background) - updates once per minute - go crawler.StartStatsLoop() - // Cleanup loop (background) - removes old items once per week go crawler.StartCleanupLoop() diff --git a/oauth.go b/oauth.go deleted file mode 100644 index aeff41b..0000000 --- a/oauth.go +++ /dev/null @@ -1,287 +0,0 @@ -package main - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - neturl "net/url" - "os" - "strings" - "time" - - oauth "github.com/haileyok/atproto-oauth-golang" - "github.com/haileyok/atproto-oauth-golang/helpers" - "github.com/lestrrat-go/jwx/v2/jwk" -) - -// OAuthManager handles OAuth 2.0 authentication for the dashboard -type OAuthManager struct { - client *oauth.Client - clientID string - redirectURI string - privateJWK jwk.Key - publicJWK jwk.Key - sessions *SessionStore - cookieSecret []byte - allowedScope string -} - -// OAuthConfig holds configuration for the OAuth manager -type OAuthConfig struct { - ClientID string // URL to client metadata (e.g., https://app.1440.news/.well-known/oauth-client-metadata) - RedirectURI string // OAuth callback URL (e.g., https://app.1440.news/auth/callback) - CookieSecret string // 32-byte hex string for AES-256-GCM encryption - PrivateJWK string // ES256 private key as JSON -} - -// NewOAuthManager creates a new OAuth manager -func NewOAuthManager(cfg OAuthConfig, db *DB) (*OAuthManager, error) { - // Parse cookie secret (must be 32 bytes for AES-256) - cookieSecret, err := parseHexSecret(cfg.CookieSecret) - if err != nil { - return nil, fmt.Errorf("invalid cookie secret: %v", err) - } - if len(cookieSecret) != 32 { - return nil, fmt.Errorf("cookie secret must be 32 bytes, got %d", len(cookieSecret)) - } - - // Parse private JWK - privateJWK, err := helpers.ParseJWKFromBytes([]byte(cfg.PrivateJWK)) - if err != nil { - return nil, fmt.Errorf("invalid private JWK: %v", err) - } - - // Extract public key - publicJWK, err := privateJWK.PublicKey() - if err != nil { - return nil, fmt.Errorf("failed to extract public key: %v", err) - } - - // Create HTTP client with longer timeout - httpClient := &http.Client{ - Timeout: 30 * time.Second, - } - - // Create OAuth client - client, err := oauth.NewClient(oauth.ClientArgs{ - Http: httpClient, - ClientJwk: privateJWK, - ClientId: cfg.ClientID, - RedirectUri: cfg.RedirectURI, - }) - if err != nil { - return nil, fmt.Errorf("failed to create OAuth client: %v", err) - } - - return &OAuthManager{ - client: client, - clientID: cfg.ClientID, - redirectURI: cfg.RedirectURI, - privateJWK: privateJWK, - publicJWK: publicJWK, - sessions: NewSessionStore(db), - cookieSecret: cookieSecret, - allowedScope: "atproto", - }, nil -} - -// LoadOAuthConfig loads OAuth configuration from environment or oauth.env file -func LoadOAuthConfig(baseURL string) (*OAuthConfig, error) { - cfg := &OAuthConfig{ - ClientID: baseURL + "/.well-known/oauth-client-metadata", - RedirectURI: baseURL + "/auth/callback", - } - - // Try environment variables first - cfg.CookieSecret = os.Getenv("OAUTH_COOKIE_SECRET") - cfg.PrivateJWK = os.Getenv("OAUTH_PRIVATE_JWK") - - // Fall back to oauth.env file - if cfg.CookieSecret == "" || cfg.PrivateJWK == "" { - if data, err := os.ReadFile("oauth.env"); err == nil { - for _, line := range strings.Split(string(data), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "#") || line == "" { - continue - } - parts := strings.SplitN(line, "=", 2) - if len(parts) == 2 { - key := strings.TrimSpace(parts[0]) - value := strings.TrimSpace(parts[1]) - switch key { - case "OAUTH_COOKIE_SECRET": - cfg.CookieSecret = value - case "OAUTH_PRIVATE_JWK": - cfg.PrivateJWK = value - } - } - } - } - } - - // Validate required fields - if cfg.CookieSecret == "" { - return nil, fmt.Errorf("OAUTH_COOKIE_SECRET not configured") - } - if cfg.PrivateJWK == "" { - return nil, fmt.Errorf("OAUTH_PRIVATE_JWK not configured") - } - - return cfg, nil -} - -// parseHexSecret converts a hex string to bytes -func parseHexSecret(hex string) ([]byte, error) { - if len(hex)%2 != 0 { - return nil, fmt.Errorf("hex string must have even length") - } - b := make([]byte, len(hex)/2) - for i := 0; i < len(hex); i += 2 { - var val byte - for j := 0; j < 2; j++ { - c := hex[i+j] - switch { - case c >= '0' && c <= '9': - val = val*16 + (c - '0') - case c >= 'a' && c <= 'f': - val = val*16 + (c - 'a' + 10) - case c >= 'A' && c <= 'F': - val = val*16 + (c - 'A' + 10) - default: - return nil, fmt.Errorf("invalid hex character: %c", c) - } - } - b[i/2] = val - } - return b, nil -} - -// resolveHandle resolves a Bluesky handle to a DID -func resolveHandle(ctx context.Context, handle string) (string, error) { - // Normalize handle (remove @ prefix and whitespace) - handle = strings.TrimSpace(handle) - handle = strings.TrimPrefix(handle, "@") - handle = strings.ToLower(handle) - - // Try DNS-based resolution first - url := fmt.Sprintf("https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle=%s", neturl.QueryEscape(handle)) - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) - if err != nil { - return "", err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - return "", fmt.Errorf("resolve handle failed: %s", string(body)) - } - - var result struct { - DID string `json:"did"` - } - if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { - return "", err - } - - return result.DID, nil -} - -// resolveDIDToHandle resolves a DID to the current handle -func resolveDIDToHandle(ctx context.Context, did string) (string, error) { - // Fetch DID document - var docURL string - if strings.HasPrefix(did, "did:plc:") { - docURL = fmt.Sprintf("https://plc.directory/%s", did) - } else if strings.HasPrefix(did, "did:web:") { - domain := strings.TrimPrefix(did, "did:web:") - docURL = fmt.Sprintf("https://%s/.well-known/did.json", domain) - } else { - return "", fmt.Errorf("unsupported DID method: %s", did) - } - - req, err := http.NewRequestWithContext(ctx, "GET", docURL, nil) - if err != nil { - return "", err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to fetch DID document: %d", resp.StatusCode) - } - - var doc struct { - AlsoKnownAs []string `json:"alsoKnownAs"` - } - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { - return "", err - } - - // Find the at:// handle - for _, aka := range doc.AlsoKnownAs { - if strings.HasPrefix(aka, "at://") { - return strings.TrimPrefix(aka, "at://"), nil - } - } - - return "", fmt.Errorf("no handle found for DID %s", did) -} - -// resolveDIDToService gets the PDS service URL from a DID -func resolveDIDToService(ctx context.Context, did string) (string, error) { - var docURL string - if strings.HasPrefix(did, "did:plc:") { - docURL = fmt.Sprintf("https://plc.directory/%s", did) - } else if strings.HasPrefix(did, "did:web:") { - domain := strings.TrimPrefix(did, "did:web:") - docURL = fmt.Sprintf("https://%s/.well-known/did.json", domain) - } else { - return "", fmt.Errorf("unsupported DID method: %s", did) - } - - req, err := http.NewRequestWithContext(ctx, "GET", docURL, nil) - if err != nil { - return "", err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to fetch DID document: %d", resp.StatusCode) - } - - var doc struct { - Service []struct { - ID string `json:"id"` - Type string `json:"type"` - ServiceEndpoint string `json:"serviceEndpoint"` - } `json:"service"` - } - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { - return "", err - } - - // Find the atproto_pds service - for _, svc := range doc.Service { - if svc.Type == "AtprotoPersonalDataServer" || svc.ID == "#atproto_pds" { - return svc.ServiceEndpoint, nil - } - } - - return "", fmt.Errorf("no PDS service found for DID %s", did) -} diff --git a/oauth_handlers.go b/oauth_handlers.go deleted file mode 100644 index c351f14..0000000 --- a/oauth_handlers.go +++ /dev/null @@ -1,521 +0,0 @@ -package main - -import ( - "context" - "encoding/json" - "fmt" - "html/template" - "net/http" - "net/url" - "strings" - "time" - - "github.com/haileyok/atproto-oauth-golang/helpers" -) - -var allowedHandles = map[string]bool{ - "1440.news": true, - "wehrv.bsky.social": true, -} - -// HandleClientMetadata serves the OAuth client metadata -func (m *OAuthManager) HandleClientMetadata(w http.ResponseWriter, r *http.Request) { - // Get the JWKS URI from the same host - scheme := "https" - if r.TLS == nil && (r.Host == "localhost" || r.Host == "127.0.0.1" || r.Host == "app.1440.localhost:4321") { - scheme = "http" - } - baseURL := scheme + "://" + r.Host - - metadata := map[string]interface{}{ - "client_id": m.clientID, - "client_name": "1440.news Dashboard", - "client_uri": baseURL, - "redirect_uris": []string{m.redirectURI}, - "grant_types": []string{"authorization_code", "refresh_token"}, - "response_types": []string{"code"}, - "scope": "atproto", - "token_endpoint_auth_method": "private_key_jwt", - "token_endpoint_auth_signing_alg": "ES256", - "dpop_bound_access_tokens": true, - "jwks_uri": baseURL + "/.well-known/jwks.json", - "application_type": "web", - "subject_type": "public", - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(metadata) -} - -// HandleJWKS serves the public JWK set -func (m *OAuthManager) HandleJWKS(w http.ResponseWriter, r *http.Request) { - jwks := helpers.CreateJwksResponseObject(m.publicJWK) - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(jwks) -} - -// HandleLogin serves the login page or initiates OAuth flow -func (m *OAuthManager) HandleLogin(w http.ResponseWriter, r *http.Request) { - // Check if already logged in - if session := m.GetSessionFromCookie(r); session != nil { - http.Redirect(w, r, "/dashboard", http.StatusFound) - return - } - - // If handle is provided, start OAuth flow - handle := r.URL.Query().Get("handle") - if handle != "" { - // Save handle to cookie for prefill on next visit - http.SetCookie(w, &http.Cookie{ - Name: "last_handle", - Value: handle, - Path: "/", - MaxAge: 86400 * 365, // 1 year - HttpOnly: true, - SameSite: http.SameSiteLaxMode, - }) - m.startOAuthFlow(w, r, handle) - return - } - - // Get last handle from cookie for prefill - lastHandle := "" - if cookie, err := r.Cookie("last_handle"); err == nil { - lastHandle = cookie.Value - } - - // Serve login page - w.Header().Set("Content-Type", "text/html; charset=utf-8") - tmpl := template.Must(template.New("login").Parse(loginPageHTML)) - tmpl.Execute(w, map[string]string{"LastHandle": lastHandle}) -} - -// startOAuthFlow initiates the OAuth flow for a given handle -func (m *OAuthManager) startOAuthFlow(w http.ResponseWriter, r *http.Request, handle string) { - ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) - defer cancel() - - // Auto-append .bsky.social if handle has no dots - if !strings.Contains(handle, ".") { - handle = handle + ".bsky.social" - } - - fmt.Printf("OAuth: starting flow for handle: %s\n", handle) - - // Resolve handle to DID - did, err := resolveHandle(ctx, handle) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to resolve handle: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: resolved DID: %s\n", did) - - // Resolve DID to PDS service URL - pdsURL, err := resolveDIDToService(ctx, did) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to resolve PDS: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: PDS URL: %s\n", pdsURL) - - // Get auth server from PDS - authServerURL, err := m.client.ResolvePdsAuthServer(ctx, pdsURL) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to resolve auth server: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: auth server: %s\n", authServerURL) - - // Fetch auth server metadata - authMeta, err := m.client.FetchAuthServerMetadata(ctx, authServerURL) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to fetch auth metadata: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: auth endpoint: %s\n", authMeta.AuthorizationEndpoint) - - // Generate DPoP private key for this auth flow - dpopKey, err := helpers.GenerateKey(nil) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to generate DPoP key: %v", err), http.StatusInternalServerError) - return - } - dpopKeyBytes, err := json.Marshal(dpopKey) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to marshal DPoP key: %v", err), http.StatusInternalServerError) - return - } - - // Send PAR (Pushed Authorization Request) - fmt.Printf("OAuth: sending PAR to %s\n", authServerURL) - parResp, err := m.client.SendParAuthRequest( - ctx, - authServerURL, - authMeta, - handle, - m.allowedScope, - dpopKey, - ) - if err != nil { - fmt.Printf("OAuth: PAR failed: %v\n", err) - http.Error(w, fmt.Sprintf("PAR request failed: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: PAR success, request_uri: %s\n", parResp.RequestUri) - - // Save pending auth state - pending := &PendingAuth{ - State: parResp.State, - PkceVerifier: parResp.PkceVerifier, - DpopPrivateJWK: string(dpopKeyBytes), - DpopNonce: parResp.DpopAuthserverNonce, - DID: did, - PdsURL: pdsURL, - AuthserverIss: authMeta.Issuer, - } - m.sessions.SavePending(parResp.State, pending) - - // Build authorization URL - authURL, err := url.Parse(authMeta.AuthorizationEndpoint) - if err != nil { - http.Error(w, fmt.Sprintf("Invalid auth endpoint: %v", err), http.StatusInternalServerError) - return - } - - q := authURL.Query() - q.Set("client_id", m.clientID) - q.Set("request_uri", parResp.RequestUri) - authURL.RawQuery = q.Encode() - - fmt.Printf("OAuth: redirecting to: %s\n", authURL.String()) - - http.Redirect(w, r, authURL.String(), http.StatusFound) -} - -// HandleCallback handles the OAuth callback -func (m *OAuthManager) HandleCallback(w http.ResponseWriter, r *http.Request) { - fmt.Printf("OAuth callback: received request from %s\n", r.URL.String()) - - ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) - defer cancel() - - // Get callback parameters - code := r.URL.Query().Get("code") - state := r.URL.Query().Get("state") - iss := r.URL.Query().Get("iss") - errorParam := r.URL.Query().Get("error") - errorDesc := r.URL.Query().Get("error_description") - - codePreview := code - if len(codePreview) > 10 { - codePreview = codePreview[:10] - } - statePreview := state - if len(statePreview) > 10 { - statePreview = statePreview[:10] - } - fmt.Printf("OAuth callback: code=%s..., state=%s..., iss=%s, error=%s\n", - codePreview, statePreview, iss, errorParam) - - // Check for errors from auth server - if errorParam != "" { - http.Error(w, fmt.Sprintf("Authorization error: %s - %s", errorParam, errorDesc), http.StatusBadRequest) - return - } - - if code == "" || state == "" { - http.Error(w, "Missing code or state parameter", http.StatusBadRequest) - return - } - - // Retrieve pending auth state - pending := m.sessions.GetPending(state) - if pending == nil { - fmt.Printf("OAuth callback: no pending state found for %s\n", state) - http.Error(w, "Invalid or expired state", http.StatusBadRequest) - return - } - fmt.Printf("OAuth callback: found pending state for DID %s\n", pending.DID) - - // Verify issuer matches - if iss != "" && iss != pending.AuthserverIss { - http.Error(w, "Issuer mismatch", http.StatusBadRequest) - return - } - - // Parse DPoP private key - dpopKey, err := helpers.ParseJWKFromBytes([]byte(pending.DpopPrivateJWK)) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to parse DPoP key: %v", err), http.StatusInternalServerError) - return - } - - // Exchange code for tokens - fmt.Printf("OAuth callback: exchanging code for tokens at %s\n", pending.AuthserverIss) - tokenResp, err := m.client.InitialTokenRequest( - ctx, - code, - pending.AuthserverIss, - pending.PkceVerifier, - pending.DpopNonce, - dpopKey, - ) - if err != nil { - fmt.Printf("OAuth callback: token exchange failed: %v\n", err) - http.Error(w, fmt.Sprintf("Token exchange failed: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth callback: token exchange success, sub=%s, scope=%s\n", tokenResp.Sub, tokenResp.Scope) - - // Verify scope - if tokenResp.Scope != m.allowedScope { - fmt.Printf("OAuth callback: scope mismatch: expected %s, got %s\n", m.allowedScope, tokenResp.Scope) - http.Error(w, fmt.Sprintf("Invalid scope: expected %s, got %s", m.allowedScope, tokenResp.Scope), http.StatusForbidden) - return - } - - // Resolve DID to handle - fmt.Printf("OAuth callback: resolving DID %s to handle\n", tokenResp.Sub) - handle, err := resolveDIDToHandle(ctx, tokenResp.Sub) - if err != nil { - fmt.Printf("OAuth callback: failed to resolve handle: %v\n", err) - http.Error(w, fmt.Sprintf("Failed to resolve handle: %v", err), http.StatusInternalServerError) - return - } - fmt.Printf("OAuth callback: resolved handle: %s\n", handle) - - // CRITICAL: Verify user is allowed - if !allowedHandles[handle] { - fmt.Printf("OAuth callback: access denied for handle: %s (allowed: %v)\n", handle, allowedHandles) - http.Error(w, "Access denied.", http.StatusForbidden) - return - } - fmt.Printf("OAuth callback: handle %s is allowed\n", handle) - - // Create session - fmt.Printf("OAuth callback: creating session for %s\n", handle) - session, err := m.sessions.CreateSession(tokenResp.Sub, handle) - if err != nil { - fmt.Printf("OAuth callback: failed to create session: %v\n", err) - http.Error(w, fmt.Sprintf("Failed to create session: %v", err), http.StatusInternalServerError) - return - } - fmt.Printf("OAuth callback: session created with ID %s\n", session.ID) - - // Store token info in session - session.AccessToken = tokenResp.AccessToken - session.RefreshToken = tokenResp.RefreshToken - session.TokenExpiry = time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second) - session.DpopPrivateJWK = pending.DpopPrivateJWK - session.DpopAuthserverNonce = tokenResp.DpopAuthserverNonce - session.PdsURL = pending.PdsURL - session.AuthserverIss = pending.AuthserverIss - m.sessions.UpdateSession(session) - - // Set session cookie - fmt.Printf("OAuth callback: setting session cookie\n") - if err := m.SetSessionCookie(w, r, session.ID); err != nil { - fmt.Printf("OAuth callback: failed to set cookie: %v\n", err) - http.Error(w, fmt.Sprintf("Failed to set cookie: %v", err), http.StatusInternalServerError) - return - } - - // Redirect to dashboard - fmt.Printf("OAuth callback: success! redirecting to /dashboard\n") - http.Redirect(w, r, "/dashboard", http.StatusFound) -} - -// HandleLogout clears the session and redirects to login -func (m *OAuthManager) HandleLogout(w http.ResponseWriter, r *http.Request) { - // Get current session - session := m.GetSessionFromCookie(r) - if session != nil { - // Delete session from store - m.sessions.DeleteSession(session.ID) - } - - // Clear cookie - m.ClearSessionCookie(w) - - // Handle API vs browser request - if r.Method == http.MethodPost || isAPIRequest(r) { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ - "status": "logged out", - }) - return - } - - // Redirect to login for browser requests - http.Redirect(w, r, "/auth/login", http.StatusFound) -} - -// HandleSessionInfo returns current session info (for API calls) -func (m *OAuthManager) HandleSessionInfo(w http.ResponseWriter, r *http.Request) { - session := m.GetSessionFromCookie(r) - if session == nil { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusUnauthorized) - json.NewEncoder(w).Encode(map[string]string{ - "error": "not authenticated", - }) - return - } - - info := &SessionInfo{ - DID: session.DID, - Handle: session.Handle, - ExpiresAt: session.ExpiresAt, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(info) -} - -const loginPageHTML = ` - -
-Dashboard Authentication
- -Curated news feeds on Bluesky
-{{.Count}} feeds available
-No feeds available yet.
- {{end}} -No feeds match your search.
- - -