diff --git a/api_domains.go b/api_domains.go deleted file mode 100644 index da1c0f8..0000000 --- a/api_domains.go +++ /dev/null @@ -1,2068 +0,0 @@ -package main - -import ( - "bufio" - "encoding/json" - "fmt" - "net/http" - "os" - "strings" - - "github.com/jackc/pgx/v5" -) - -// buildTLDSearchQuery builds a query to get TLDs based on search type -// Returns (query, args) for the database query -func buildTLDSearchQuery(sq SearchQuery) (string, []interface{}) { - pattern := "%" + strings.ToLower(sq.Pattern) + "%" - - switch sq.Type { - case "domain": - // Check if pattern includes TLD (e.g., d:npr.org -> exact match) - hostPart, tldFilter := parseSearchTerm(sq.Pattern) - if tldFilter != "" { - // Exact match - return just the matching TLD - return ` - SELECT tld::text as tld, COUNT(*) as domain_count - FROM domains - WHERE tld = $1 AND LOWER(host) = $2 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{tldFilter, strings.ToLower(hostPart)} - } - // Pattern match - search all TLDs - return ` - SELECT tld::text as tld, COUNT(*) as domain_count - FROM domains - WHERE LOWER(host) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "url": - // Search feed URL paths (after domain) - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count - FROM feeds - WHERE tld IS NOT NULL AND LOWER(url) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "title": - // Search feed titles - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count - FROM feeds - WHERE tld IS NOT NULL AND LOWER(title) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "description": - // Search feed descriptions - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count - FROM feeds - WHERE tld IS NOT NULL AND LOWER(description) LIKE $1 - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - - case "item": - // Search item titles - return ` - SELECT f.tld, COUNT(DISTINCT f.domain_host || '.' || f.domain_tld) as domain_count - FROM feeds f - INNER JOIN items i ON i.feed_url = f.url - WHERE f.tld IS NOT NULL AND LOWER(i.title) LIKE $1 - GROUP BY f.tld - ORDER BY f.tld ASC - `, []interface{}{pattern} - - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if sq.DomainHost != "" && sq.DomainTLD != "" { - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM ( - -- Domains matching host pattern - SELECT tld::text as tld, host || '.' || tld as source_host - FROM domains WHERE LOWER(host) LIKE $1 - UNION - -- Exact domain match - SELECT tld::text as tld, host || '.' || tld as source_host - FROM domains WHERE LOWER(host) = $2 AND tld::text = $3 - UNION - -- Feeds matching URL - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(url) LIKE $1 - UNION - -- Feeds matching title - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(title) LIKE $1 - UNION - -- Feeds matching description - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(description) LIKE $1 - ) combined - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - } - return ` - SELECT tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM ( - -- Domains matching host - SELECT tld::text as tld, host || '.' || tld as source_host - FROM domains WHERE LOWER(host) LIKE $1 - UNION - -- Feeds matching URL - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(url) LIKE $1 - UNION - -- Feeds matching title - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(title) LIKE $1 - UNION - -- Feeds matching description - SELECT domain_tld::text as tld, domain_host || '.' || domain_tld as source_host FROM feeds WHERE domain_tld IS NOT NULL AND LOWER(description) LIKE $1 - ) combined - GROUP BY tld - ORDER BY tld ASC - `, []interface{}{pattern} - } -} - -// buildDomainSearchQuery builds a query to get domains based on search type -// Returns (whereClause, args, argNum) to append to the base query -func buildDomainSearchQuery(sq SearchQuery, tldFilter string, argNum int) (string, []interface{}, int) { - pattern := "%" + strings.ToLower(sq.Pattern) + "%" - var where string - var args []interface{} - - switch sq.Type { - case "domain": - if sq.ExactMatch && tldFilter != "" { - // d:npr.org -> exact match - where = fmt.Sprintf(" AND d.tld = $%d AND LOWER(d.host) = $%d", argNum, argNum+1) - args = []interface{}{tldFilter, strings.ToLower(sq.Pattern)} - argNum += 2 - } else if tldFilter != "" { - where = fmt.Sprintf(" AND d.tld = $%d AND LOWER(d.host) LIKE $%d", argNum, argNum+1) - args = []interface{}{tldFilter, pattern} - argNum += 2 - } else { - where = fmt.Sprintf(" AND LOWER(d.host) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - } - - case "url": - where = fmt.Sprintf(" AND LOWER(f.url) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - case "title": - where = fmt.Sprintf(" AND LOWER(f.title) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - case "description": - where = fmt.Sprintf(" AND LOWER(f.description) LIKE $%d", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - case "item": - // Need to join items - handled separately - where = fmt.Sprintf(" AND EXISTS (SELECT 1 FROM items i WHERE i.feed_url = f.url AND LOWER(i.title) LIKE $%d)", argNum) - args = []interface{}{pattern} - argNum++ - if tldFilter != "" { - where += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - - default: - // "all" - search everything, also include exact domain match if pattern looks like a domain - if tldFilter != "" { - if sq.DomainHost != "" && sq.DomainTLD != "" { - where = fmt.Sprintf(` AND d.tld = $%d AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d OR - (LOWER(d.host) = $%d AND d.tld::text = $%d) - )`, argNum, argNum+1, argNum+1, argNum+1, argNum+1, argNum+2, argNum+3) - args = []interface{}{tldFilter, pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - argNum += 4 - } else { - where = fmt.Sprintf(` AND d.tld = $%d AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d - )`, argNum, argNum+1, argNum+1, argNum+1, argNum+1) - args = []interface{}{tldFilter, pattern} - argNum += 2 - } - } else { - if sq.DomainHost != "" && sq.DomainTLD != "" { - where = fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d OR - (LOWER(d.host) = $%d AND d.tld::text = $%d) - )`, argNum, argNum, argNum, argNum, argNum+1, argNum+2) - args = []interface{}{pattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - argNum += 3 - } else { - where = fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d - )`, argNum, argNum, argNum, argNum) - args = []interface{}{pattern} - argNum++ - } - } - } - - return where, args, argNum -} - -func (c *Crawler) handleAPIAllDomains(w http.ResponseWriter, r *http.Request) { - offset := 0 - limit := 100 - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 100 { - limit = 100 - } - } - - // Serve from cache (updated once per minute in background) - c.statsMu.RLock() - cached := c.cachedAllDomains - c.statsMu.RUnlock() - - var domains []DomainStat - if cached != nil && offset < len(cached) { - end := offset + limit - if end > len(cached) { - end = len(cached) - } - domains = cached[offset:end] - } - if domains == nil { - domains = []DomainStat{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -// handleAPIDomains lists domains with optional status filter, including their feeds -func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") - hasFeeds := r.URL.Query().Get("has_feeds") == "true" - search := r.URL.Query().Get("search") - tldFilter := r.URL.Query().Get("tld") - feedMode := r.URL.Query().Get("feedMode") // include or exclude - feedStatuses := r.URL.Query().Get("feedStatuses") // comma-separated - feedTypes := r.URL.Query().Get("feedTypes") // comma-separated - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - // Parse comma-separated values - var statusList, typeList []string - if feedStatuses != "" { - statusList = strings.Split(feedStatuses, ",") - } - if feedTypes != "" { - typeList = strings.Split(feedTypes, ",") - } - - // Parse search prefix for type-specific searching - var searchQuery SearchQuery - if search != "" { - searchQuery = parseSearchPrefix(search) - // Only extract TLD for domain searches (d:npr.org -> exact match for npr.org) - // All other searches use the literal pattern - if searchQuery.Type == "domain" { - hostPart, detectedTLD := parseSearchTerm(searchQuery.Pattern) - if detectedTLD != "" { - searchQuery.Pattern = hostPart - searchQuery.ExactMatch = true // d:npr.org matches exactly npr.org - if tldFilter == "" { - tldFilter = detectedTLD - } - } - } - } - - // First get domains - var rows pgx.Rows - var err error - - // If feed filter is specified, query domains that have matching feeds - if len(statusList) > 0 || len(typeList) > 0 || feedMode != "" { - // Build dynamic query to get domains with matching feeds - query := ` - SELECT DISTINCT d.host, d.tld, d.status, d.last_error, d.feeds_found - FROM domains d - INNER JOIN feeds f ON f.domain_host = d.host AND f.domain_tld = d.tld - WHERE 1=1` - args := []interface{}{} - argNum := 1 - - if tldFilter != "" { - query += fmt.Sprintf(" AND d.tld = $%d", argNum) - args = append(args, tldFilter) - argNum++ - } - if status != "" { - query += fmt.Sprintf(" AND d.status = $%d", argNum) - args = append(args, status) - argNum++ - } - - // Handle status filters (publish_status for pass/skip/hold/dead) - if len(statusList) > 0 { - if feedMode == "exclude" { - query += fmt.Sprintf(" AND (f.publish_status IS NULL OR f.publish_status NOT IN (SELECT unnest($%d::text[])))", argNum) - } else { - query += fmt.Sprintf(" AND f.publish_status IN (SELECT unnest($%d::text[]))", argNum) - } - args = append(args, statusList) - argNum++ - } - - // Handle type filters (including special "empty" type) - if len(typeList) > 0 { - hasEmpty := false - var regularTypes []string - for _, t := range typeList { - if t == "empty" { - hasEmpty = true - } else { - regularTypes = append(regularTypes, t) - } - } - - if feedMode == "exclude" { - // Exclude mode - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[]))) AND f.item_count > 0", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[])))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND f.item_count > 0" - } - } else { - // Include mode - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND (f.type IN (SELECT unnest($%d::text[])) OR f.item_count IS NULL OR f.item_count = 0)", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND f.type IN (SELECT unnest($%d::text[]))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND (f.item_count IS NULL OR f.item_count = 0)" - } - } - } - - if search != "" && searchQuery.Pattern != "" { - searchPattern := "%" + strings.ToLower(searchQuery.Pattern) + "%" - switch searchQuery.Type { - case "domain": - if searchQuery.ExactMatch { - // d:npr.org -> exact match for host "npr" (tld already filtered above) - query += fmt.Sprintf(" AND LOWER(d.host) = $%d", argNum) - args = append(args, strings.ToLower(searchQuery.Pattern)) - } else { - // d:npr -> pattern match - query += fmt.Sprintf(" AND LOWER(d.host) LIKE $%d", argNum) - args = append(args, searchPattern) - } - argNum++ - case "url": - query += fmt.Sprintf(" AND LOWER(f.url) LIKE $%d", argNum) - args = append(args, searchPattern) - argNum++ - case "title": - query += fmt.Sprintf(" AND LOWER(f.title) LIKE $%d", argNum) - args = append(args, searchPattern) - argNum++ - case "description": - query += fmt.Sprintf(" AND LOWER(f.description) LIKE $%d", argNum) - args = append(args, searchPattern) - argNum++ - case "item": - query += fmt.Sprintf(" AND EXISTS (SELECT 1 FROM items i WHERE i.feed_url = f.url AND LOWER(i.title) LIKE $%d)", argNum) - args = append(args, searchPattern) - argNum++ - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if searchQuery.DomainHost != "" && searchQuery.DomainTLD != "" { - query += fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d OR - (LOWER(d.host) = $%d AND d.tld::text = $%d) - )`, argNum, argNum, argNum, argNum, argNum+1, argNum+2) - args = append(args, searchPattern, strings.ToLower(searchQuery.DomainHost), strings.ToLower(searchQuery.DomainTLD)) - argNum += 3 - } else { - query += fmt.Sprintf(` AND ( - LOWER(d.host) LIKE $%d OR - LOWER(f.url) LIKE $%d OR - LOWER(f.title) LIKE $%d OR - LOWER(f.description) LIKE $%d - )`, argNum, argNum, argNum, argNum) - args = append(args, searchPattern) - argNum++ - } - } - } - query += fmt.Sprintf(" ORDER BY d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - args = append(args, limit, offset) - - rows, err = c.db.Query(query, args...) - } else if hasFeeds { - // Only domains with feeds - searchPattern := "%" + strings.ToLower(search) + "%" - if tldFilter != "" && status != "" { - // Filter by specific TLD and status - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.tld = $1 AND d.status = $2 - ORDER BY d.host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, status, limit, offset) - } else if tldFilter != "" { - // Filter by specific TLD only (exclude 'skip' by default) - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status != 'skip' AND d.tld = $1 - ORDER BY d.host ASC - LIMIT $2 OFFSET $3 - `, tldFilter, limit, offset) - } else if search != "" { - // Search in domain host only (uses trigram index) - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status != 'skip' AND LOWER(d.host) LIKE $1 - ORDER BY d.tld ASC, d.host ASC - LIMIT $2 OFFSET $3 - `, searchPattern, limit, offset) - } else if status != "" { - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status = $1 - ORDER BY d.tld ASC, d.host ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - } else { - // Default: exclude 'skip' status domains - rows, err = c.db.Query(` - SELECT d.host, d.tld, d.status, d.last_error, f.feed_count - FROM domains d - INNER JOIN ( - SELECT domain_host, domain_tld, COUNT(*) as feed_count - FROM feeds - WHERE item_count > 0 - GROUP BY domain_host, domain_tld - ) f ON d.host = f.domain_host AND d.tld = f.domain_tld - WHERE d.status != 'skip' - ORDER BY d.tld ASC, d.host ASC - LIMIT $1 OFFSET $2 - `, limit, offset) - } - } else if tldFilter != "" && search != "" && status != "" { - // Filter by TLD, status, and search - if searchQuery.ExactMatch { - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 AND LOWER(host) = $3 - ORDER BY host ASC - LIMIT $4 OFFSET $5 - `, tldFilter, status, strings.ToLower(searchQuery.Pattern), limit, offset) - } else if searchQuery.DomainHost != "" && strings.ToLower(searchQuery.DomainTLD) == strings.ToLower(tldFilter) { - // Domain-like search with matching TLD - search for exact host - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 AND LOWER(host) = $3 - ORDER BY host ASC - LIMIT $4 OFFSET $5 - `, tldFilter, status, strings.ToLower(searchQuery.DomainHost), limit, offset) - } else { - searchPattern := "%" + strings.ToLower(searchQuery.Pattern) + "%" - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 AND LOWER(host) LIKE $3 - ORDER BY host ASC - LIMIT $4 OFFSET $5 - `, tldFilter, status, searchPattern, limit, offset) - } - } else if tldFilter != "" && search != "" { - // Filter by TLD and search - // If search looks like a domain with matching TLD, use DomainHost for exact/pattern match - if searchQuery.ExactMatch { - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND LOWER(host) = $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, strings.ToLower(searchQuery.Pattern), limit, offset) - } else if searchQuery.DomainHost != "" && strings.ToLower(searchQuery.DomainTLD) == strings.ToLower(tldFilter) { - // Domain-like search with matching TLD - search for exact host or pattern - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND LOWER(host) = $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, strings.ToLower(searchQuery.DomainHost), limit, offset) - } else { - searchPattern := "%" + strings.ToLower(searchQuery.Pattern) + "%" - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND LOWER(host) LIKE $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, searchPattern, limit, offset) - } - } else if tldFilter != "" && status != "" { - // Filter by TLD and status - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 AND status = $2 - ORDER BY host ASC - LIMIT $3 OFFSET $4 - `, tldFilter, status, limit, offset) - } else if tldFilter != "" { - // Filter by TLD only (show all statuses) - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE tld = $1 - ORDER BY host ASC - LIMIT $2 OFFSET $3 - `, tldFilter, limit, offset) - } else if status != "" { - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE status = $1 - ORDER BY tld ASC, host ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - } else { - // Default: exclude 'skip' status domains - rows, err = c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE status != 'skip' - ORDER BY tld ASC, host ASC - LIMIT $1 OFFSET $2 - `, limit, offset) - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type,omitempty"` - Status string `json:"status,omitempty"` - PublishStatus string `json:"publish_status,omitempty"` - Language string `json:"language,omitempty"` - ItemCount int `json:"item_count,omitempty"` - } - - type DomainInfo struct { - Host string `json:"host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - Feeds []FeedInfo `json:"feeds,omitempty"` - } - - var domains []DomainInfo - var hosts []string - for rows.Next() { - var d DomainInfo - var tld, lastError *string - if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.TLD = StringValue(tld) - d.LastError = StringValue(lastError) - domains = append(domains, d) - // Build full domain for feed lookup (source_host = host.tld) - fullDomain := d.Host - if d.TLD != "" { - fullDomain = d.Host + "." + d.TLD - } - hosts = append(hosts, fullDomain) - } - - // Now get feeds for these domains (with actual item count from items table) - // Apply the same feed filters used for domain selection - if len(hosts) > 0 { - feedQuery := ` - SELECT f.domain_host || '.' || f.domain_tld as source_host, f.url, f.title, f.type, f.status, f.publish_status, f.language, - (SELECT COUNT(*) FROM items WHERE feed_url = f.url) as item_count - FROM feeds f - WHERE f.domain_host || '.' || f.domain_tld = ANY($1)` - feedArgs := []interface{}{hosts} - feedArgNum := 2 - - // Apply feed status filters (publish_status for pass/skip/hold/dead) - if len(statusList) > 0 { - if feedMode == "exclude" { - feedQuery += fmt.Sprintf(" AND (f.publish_status IS NULL OR f.publish_status NOT IN (SELECT unnest($%d::text[])))", feedArgNum) - } else { - feedQuery += fmt.Sprintf(" AND f.publish_status IN (SELECT unnest($%d::text[]))", feedArgNum) - } - feedArgs = append(feedArgs, statusList) - feedArgNum++ - } - - // Apply feed type filters (including special "empty" type) - if len(typeList) > 0 { - hasEmpty := false - var regularTypes []string - for _, t := range typeList { - if t == "empty" { - hasEmpty = true - } else { - regularTypes = append(regularTypes, t) - } - } - - if feedMode == "exclude" { - if len(regularTypes) > 0 && hasEmpty { - feedQuery += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[]))) AND f.item_count > 0", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if len(regularTypes) > 0 { - feedQuery += fmt.Sprintf(" AND (f.type IS NULL OR f.type NOT IN (SELECT unnest($%d::text[])))", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if hasEmpty { - feedQuery += " AND f.item_count > 0" - } - } else { - if len(regularTypes) > 0 && hasEmpty { - feedQuery += fmt.Sprintf(" AND (f.type IN (SELECT unnest($%d::text[])) OR f.item_count IS NULL OR f.item_count = 0)", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if len(regularTypes) > 0 { - feedQuery += fmt.Sprintf(" AND f.type IN (SELECT unnest($%d::text[]))", feedArgNum) - feedArgs = append(feedArgs, regularTypes) - feedArgNum++ - } else if hasEmpty { - feedQuery += " AND (f.item_count IS NULL OR f.item_count = 0)" - } - } - } - - feedQuery += " ORDER BY f.domain_host, f.domain_tld, f.url" - - feedRows, err := c.db.Query(feedQuery, feedArgs...) - if err == nil { - defer feedRows.Close() - feedsByHost := make(map[string][]FeedInfo) - for feedRows.Next() { - var host string - var f FeedInfo - var title, feedType, status, publishStatus, language *string - var itemCount *int - if err := feedRows.Scan(&host, &f.URL, &title, &feedType, &status, &publishStatus, &language, &itemCount); err != nil { - continue - } - f.Title = StringValue(title) - f.Type = StringValue(feedType) - f.Status = StringValue(status) - f.PublishStatus = StringValue(publishStatus) - f.Language = StringValue(language) - if itemCount != nil { - f.ItemCount = *itemCount - } - feedsByHost[host] = append(feedsByHost[host], f) - } - // Attach feeds to domains (feedsByHost is keyed by full domain) - for i := range domains { - fullHost := domains[i].Host - if domains[i].TLD != "" { - fullHost = domains[i].Host + "." + domains[i].TLD - } - if feeds, ok := feedsByHost[fullHost]; ok { - domains[i].Feeds = feeds - } - } - } - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -func (c *Crawler) handleAPIDomainsByStatus(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") - if status == "" { - http.Error(w, "status parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - rows, err := c.db.Query(` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE status = $1 - ORDER BY tld ASC, host ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type DomainInfo struct { - Host string `json:"host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - } - - var domains []DomainInfo - for rows.Next() { - var d DomainInfo - var tld, lastError *string - if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.TLD = StringValue(tld) - d.LastError = StringValue(lastError) - domains = append(domains, d) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - // Parse host into domain_host and domain_tld - domainHost := stripTLD(host) - domainTLD := getTLD(host) - - rows, err := c.db.Query(` - SELECT url, title, type, status, last_error, item_count, publish_status, language - FROM feeds - WHERE domain_host = $1 AND domain_tld = $2 - ORDER BY url ASC - LIMIT $3 OFFSET $4 - `, domainHost, domainTLD, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title"` - Type string `json:"type"` - Status string `json:"status,omitempty"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - PublishStatus string `json:"publish_status,omitempty"` - Language string `json:"language,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, status, lastError, publishStatus, language *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &status, &lastError, &itemCount, &publishStatus, &language); err != nil { - continue - } - f.Title = StringValue(title) - f.Status = StringValue(status) - f.LastError = StringValue(lastError) - f.PublishStatus = StringValue(publishStatus) - f.Language = StringValue(language) - if itemCount != nil { - f.ItemCount = *itemCount - } - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(feeds) -} - -// handleAPISetDomainStatus sets the status for a domain -// status must be 'hold', 'pass', or 'skip' (use /api/dropDomain for 'drop') -func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - status := r.URL.Query().Get("status") - - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - if status != "hold" && status != "pass" && status != "skip" { - http.Error(w, "status must be 'hold', 'pass', or 'skip' (use /api/dropDomain for permanent deletion)", http.StatusBadRequest) - return - } - - host = normalizeHost(host) - - // Setting to 'skip' triggers takedown (hide content but preserve data) - if status == "skip" { - result := c.skipDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) - return - } - - // When setting to pass, clear any last_error - var err error - strippedHost := stripTLD(host) - tld := getTLD(host) - if status == "pass" { - _, err = c.db.Exec(` - UPDATE domains SET status = $1, last_error = NULL - WHERE host = $2 AND tld = $3 - `, status, strippedHost, tld) - } else { - _, err = c.db.Exec(` - UPDATE domains SET status = $1 - WHERE host = $2 AND tld = $3 - `, status, strippedHost, tld) - } - - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ - "host": host, - "status": status, - }) -} - -func (c *Crawler) handleAPIRevisitDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - _, err := c.db.Exec(` - UPDATE domains SET status = 'pass', crawled_at = '0001-01-01 00:00:00', last_error = NULL - WHERE host = $1 AND tld = $2 - `, stripTLD(host), getTLD(host)) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{"status": "queued", "host": host}) -} - -// handleAPIPriorityCrawl immediately crawls a domain (adds it if not exists) -func (c *Crawler) handleAPIPriorityCrawl(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - host = normalizeHost(host) - - // Add domain if it doesn't exist, or reset to pass for crawling - _, err := c.db.Exec(` - INSERT INTO domains (host, status, tld) - VALUES ($1, 'pass', $2) - ON CONFLICT(host, tld) DO UPDATE SET status = 'pass', crawled_at = '0001-01-01 00:00:00', last_error = NULL - `, stripTLD(host), getTLD(host)) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Crawl synchronously - fmt.Printf("Priority crawl: %s\n", host) - feedsFound, crawlErr := c.feedCrawl(host) - - errStr := "" - if crawlErr != nil { - errStr = crawlErr.Error() - } - - // Mark as crawled - c.markDomainCrawled(stripTLD(host), getTLD(host), feedsFound, errStr) - - // Get the feeds we found - feeds, _ := c.GetFeedsByHost(host) - - type FeedSummary struct { - URL string `json:"url"` - Title string `json:"title"` - Type string `json:"type"` - Category string `json:"category"` - Status string `json:"status"` - } - var feedSummaries []FeedSummary - for _, f := range feeds { - feedSummaries = append(feedSummaries, FeedSummary{ - URL: f.URL, - Title: f.Title, - Type: f.Type, - Category: f.Category, - Status: f.Status, - }) - } - - result := map[string]interface{}{ - "host": host, - "feeds_found": feedsFound, - "feeds": feedSummaries, - } - if crawlErr != nil { - result["error"] = crawlErr.Error() - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPIFilter handles flexible filtering with stackable parameters -func (c *Crawler) handleAPIFilter(w http.ResponseWriter, r *http.Request) { - tld := r.URL.Query().Get("tld") - domain := r.URL.Query().Get("domain") - feedStatus := r.URL.Query().Get("feedStatus") - domainStatus := r.URL.Query().Get("domainStatus") - languages := r.URL.Query().Get("languages") // comma-separated list - show := r.URL.Query().Get("show") // "feeds" or "domains" - sort := r.URL.Query().Get("sort") // "alpha" or "feeds" - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - // Parse languages into slice - var langList []string - if languages != "" { - for _, lang := range strings.Split(languages, ",") { - lang = strings.TrimSpace(lang) - if lang != "" { - langList = append(langList, lang) - } - } - } - - // Determine what to show based on filters - if show == "" { - if feedStatus != "" || domain != "" || len(langList) > 0 { - show = "feeds" - } else { - show = "domains" - } - } - - if show == "feeds" { - c.filterFeeds(w, tld, domain, feedStatus, langList, limit, offset) - } else { - c.filterDomains(w, tld, domainStatus, sort, limit, offset) - } -} - -func (c *Crawler) filterDomains(w http.ResponseWriter, tld, status, sort string, limit, offset int) { - var args []interface{} - argNum := 1 - query := ` - SELECT host, tld, status, last_error, feeds_found - FROM domains - WHERE 1=1` - - if tld != "" { - query += fmt.Sprintf(" AND tld = $%d", argNum) - args = append(args, tld) - argNum++ - } - if status != "" { - query += fmt.Sprintf(" AND status = $%d", argNum) - args = append(args, status) - argNum++ - } - - // Sort by feed count descending or alphabetically - if sort == "feeds" { - query += fmt.Sprintf(" ORDER BY feeds_found DESC, host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - } else { - query += fmt.Sprintf(" ORDER BY tld ASC, host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - } - args = append(args, limit, offset) - - rows, err := c.db.Query(query, args...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type DomainInfo struct { - Host string `json:"host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - } - - var domains []DomainInfo - for rows.Next() { - var d DomainInfo - var tldVal, lastError *string - if err := rows.Scan(&d.Host, &tldVal, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.TLD = StringValue(tldVal) - d.LastError = StringValue(lastError) - domains = append(domains, d) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "type": "domains", - "data": domains, - }) -} - -func (c *Crawler) handleAPITLDDomains(w http.ResponseWriter, r *http.Request) { - tld := r.URL.Query().Get("tld") - if tld == "" { - http.Error(w, "tld parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - rows, err := c.db.Query(` - SELECT host, status, last_error, feeds_found - FROM domains - WHERE tld = $1 - ORDER BY host ASC - LIMIT $2 OFFSET $3 - `, tld, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type DomainInfo struct { - Host string `json:"host"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - FeedCount int `json:"feed_count"` - } - - var domains []DomainInfo - for rows.Next() { - var d DomainInfo - var lastError *string - if err := rows.Scan(&d.Host, &d.Status, &lastError, &d.FeedCount); err != nil { - continue - } - d.LastError = StringValue(lastError) - domains = append(domains, d) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(domains) -} - -func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") // domain status: pass, skip, hold, dead - feedMode := r.URL.Query().Get("feedMode") // include or exclude - feedStatuses := r.URL.Query().Get("feedStatuses") // comma-separated: pass,skip,hold,dead - feedTypes := r.URL.Query().Get("feedTypes") // comma-separated: rss,atom,json,unknown,empty - search := r.URL.Query().Get("search") // search query - - // Parse comma-separated values - var statusList, typeList []string - if feedStatuses != "" { - statusList = strings.Split(feedStatuses, ",") - } - if feedTypes != "" { - typeList = strings.Split(feedTypes, ",") - } - - var rows pgx.Rows - var err error - - // If feed filter is specified, query from feeds table instead - if len(statusList) > 0 || len(typeList) > 0 || feedMode == "exclude" { - // Build query to get TLDs from feeds - query := `SELECT domain_tld as tld, COUNT(DISTINCT domain_host || '.' || domain_tld) as domain_count FROM feeds WHERE domain_tld IS NOT NULL` - args := []interface{}{} - argNum := 1 - - // Handle status filters (publish_status for pass/skip/hold/dead) - if len(statusList) > 0 { - if feedMode == "exclude" { - query += fmt.Sprintf(" AND (publish_status IS NULL OR publish_status NOT IN (SELECT unnest($%d::text[])))", argNum) - } else { - query += fmt.Sprintf(" AND publish_status IN (SELECT unnest($%d::text[]))", argNum) - } - args = append(args, statusList) - argNum++ - } - - // Handle type filters (including special "empty" type) - if len(typeList) > 0 { - hasEmpty := false - var regularTypes []string - for _, t := range typeList { - if t == "empty" { - hasEmpty = true - } else { - regularTypes = append(regularTypes, t) - } - } - - if feedMode == "exclude" { - // Exclude mode: exclude these types - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND type NOT IN (SELECT unnest($%d::text[])) AND item_count > 0", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND (type IS NULL OR type NOT IN (SELECT unnest($%d::text[])))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND item_count > 0" - } - } else { - // Include mode: include these types - if len(regularTypes) > 0 && hasEmpty { - query += fmt.Sprintf(" AND (type IN (SELECT unnest($%d::text[])) OR item_count IS NULL OR item_count = 0)", argNum) - args = append(args, regularTypes) - argNum++ - } else if len(regularTypes) > 0 { - query += fmt.Sprintf(" AND type IN (SELECT unnest($%d::text[]))", argNum) - args = append(args, regularTypes) - argNum++ - } else if hasEmpty { - query += " AND (item_count IS NULL OR item_count = 0)" - } - } - } - - if search != "" { - sq := parseSearchPrefix(search) - searchPattern := "%" + strings.ToLower(sq.Pattern) + "%" - - // Only extract TLD for domain searches (d:npr.org -> exact match for npr.org) - var tldFilter string - var exactMatch bool - hostSearchPattern := searchPattern - if sq.Type == "domain" { - hostPattern, detectedTLD := parseSearchTerm(sq.Pattern) - if detectedTLD != "" { - tldFilter = detectedTLD - exactMatch = true - hostSearchPattern = "%" + strings.ToLower(hostPattern) + "%" - } - } - - switch sq.Type { - case "domain": - // Search domain names - if exactMatch && tldFilter != "" { - // d:npr.org -> exact match - query += fmt.Sprintf(" AND LOWER(domain_host || '.' || domain_tld) = $%d", argNum) - args = append(args, strings.ToLower(sq.Pattern)) - } else if tldFilter != "" { - query += fmt.Sprintf(" AND domain_tld = $%d AND LOWER(domain_host || '.' || domain_tld) LIKE $%d", argNum, argNum+1) - args = append(args, tldFilter, hostSearchPattern) - } else { - query += fmt.Sprintf(" AND LOWER(domain_host || '.' || domain_tld) LIKE $%d", argNum) - args = append(args, hostSearchPattern) - } - case "url": - query += fmt.Sprintf(" AND LOWER(url) LIKE $%d", argNum) - args = append(args, searchPattern) - case "title": - query += fmt.Sprintf(" AND LOWER(title) LIKE $%d", argNum) - args = append(args, searchPattern) - case "description": - query += fmt.Sprintf(" AND LOWER(description) LIKE $%d", argNum) - args = append(args, searchPattern) - case "item": - query += fmt.Sprintf(" AND EXISTS (SELECT 1 FROM items i WHERE i.feed_url = feeds.url AND LOWER(i.title) LIKE $%d)", argNum) - args = append(args, searchPattern) - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if sq.DomainHost != "" && sq.DomainTLD != "" { - fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD) - query += fmt.Sprintf(` AND ( - LOWER(domain_host || '.' || domain_tld) LIKE $%d OR - LOWER(url) LIKE $%d OR - LOWER(title) LIKE $%d OR - LOWER(description) LIKE $%d OR - LOWER(domain_host || '.' || domain_tld) = $%d - )`, argNum, argNum, argNum, argNum, argNum+1) - args = append(args, searchPattern, fullDomain) - } else { - query += fmt.Sprintf(` AND ( - LOWER(domain_host || '.' || domain_tld) LIKE $%d OR - LOWER(url) LIKE $%d OR - LOWER(title) LIKE $%d OR - LOWER(description) LIKE $%d - )`, argNum, argNum, argNum, argNum) - args = append(args, searchPattern) - } - } - } - query += " GROUP BY domain_tld ORDER BY domain_tld ASC" - rows, err = c.db.Query(query, args...) - } else if search != "" { - // Parse search prefix for type-specific searching - sq := parseSearchPrefix(search) - - // Use the helper to build the TLD search query - query, args := buildTLDSearchQuery(sq) - rows, err = c.db.Query(query, args...) - } else if status != "" { - // TLDs filtered by domain status - rows, err = c.db.Query(` - SELECT tld::text as tld, COUNT(*) as domain_count - FROM domains - WHERE tld IS NOT NULL AND status = $1 - GROUP BY tld - HAVING COUNT(*) > 0 - ORDER BY tld ASC - `, status) - } else { - // All TLDs from enum with domain counts - rows, err = c.db.Query(` - SELECT e.enumlabel as tld, COALESCE(d.cnt, 0) as domain_count - FROM pg_enum e - LEFT JOIN ( - SELECT tld::text as tld, COUNT(*) as cnt - FROM domains - GROUP BY tld - ) d ON e.enumlabel = d.tld - WHERE e.enumtypid = 'tld_enum'::regtype - ORDER BY e.enumlabel ASC - `) - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type TLDInfo struct { - TLD string `json:"tld"` - DomainCount int `json:"domain_count"` - } - - var tlds []TLDInfo - for rows.Next() { - var t TLDInfo - if err := rows.Scan(&t.TLD, &t.DomainCount); err != nil { - continue - } - tlds = append(tlds, t) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(tlds) -} - -func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) { - tld := r.URL.Query().Get("tld") - if tld == "" { - http.Error(w, "tld parameter required", http.StatusBadRequest) - return - } - search := r.URL.Query().Get("search") - - stats := map[string]interface{}{ - "tld": tld, - } - - // Build WHERE clause based on whether search is provided - var domainWhere, feedWhere string - var domainArgs, feedArgs []interface{} - - if search != "" { - // Parse search prefix for type-specific searching - sq := parseSearchPrefix(search) - searchPattern := "%" + strings.ToLower(sq.Pattern) + "%" - - // For domain searches, check for exact match - if sq.Type == "domain" { - hostPart, detectedTLD := parseSearchTerm(sq.Pattern) - if detectedTLD != "" { - // d:npr.org -> exact match for host "npr" in specified TLD - domainWhere = "tld = $1 AND lower(host) = $2" - domainArgs = []interface{}{tld, strings.ToLower(hostPart)} - feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) = $2" - feedArgs = []interface{}{tld, strings.ToLower(sq.Pattern)} - } else { - // d:npr -> pattern match in specified TLD - domainWhere = "tld = $1 AND lower(host) LIKE $2" - domainArgs = []interface{}{tld, searchPattern} - feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) LIKE $2" - feedArgs = []interface{}{tld, searchPattern} - } - } else { - // Other search types - pattern match - domainWhere = "tld = $1 AND lower(host) LIKE $2" - domainArgs = []interface{}{tld, searchPattern} - feedWhere = "domain_tld = $1 AND lower(domain_host || '.' || domain_tld) LIKE $2" - feedArgs = []interface{}{tld, searchPattern} - } - stats["search"] = search - } else { - // Filter by TLD only - domainWhere = "tld = $1" - domainArgs = []interface{}{tld} - feedWhere = "domain_tld = $1" - feedArgs = []interface{}{tld} - } - - // Domain stats by status - var totalDomains, passDomains, skipDomains, holdDomains, deadDomains int - err := c.db.QueryRow(`SELECT COUNT(*) FROM domains WHERE `+domainWhere, domainArgs...).Scan(&totalDomains) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - stats["total_domains"] = totalDomains - - rows, err := c.db.Query(`SELECT status, COUNT(*) FROM domains WHERE `+domainWhere+` GROUP BY status`, domainArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for rows.Next() { - var status string - var count int - if err := rows.Scan(&status, &count); err != nil { - continue - } - switch status { - case "pass": - passDomains = count - case "skip": - skipDomains = count - case "hold": - holdDomains = count - case "dead": - deadDomains = count - } - } - rows.Close() - stats["pass_domains"] = passDomains - stats["skip_domains"] = skipDomains - stats["hold_domains"] = holdDomains - stats["dead_domains"] = deadDomains - - // Feed stats - var totalFeeds, passFeeds, skipFeeds, holdFeeds, deadFeeds, emptyFeeds int - var rssFeeds, atomFeeds, jsonFeeds, unknownFeeds int - - err = c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE `+feedWhere, feedArgs...).Scan(&totalFeeds) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - stats["total_feeds"] = totalFeeds - - // Feed status counts - statusRows, err := c.db.Query(`SELECT COALESCE(status, 'hold'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY status`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for statusRows.Next() { - var status string - var count int - if err := statusRows.Scan(&status, &count); err != nil { - continue - } - switch status { - case "pass": - passFeeds = count - case "skip": - skipFeeds = count - case "hold": - holdFeeds = count - case "dead": - deadFeeds = count - } - } - statusRows.Close() - stats["pass_feeds"] = passFeeds - stats["skip_feeds"] = skipFeeds - stats["hold_feeds"] = holdFeeds - stats["dead_feeds"] = deadFeeds - - // Empty feeds count - c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE (`+feedWhere+`) AND (item_count IS NULL OR item_count = 0)`, feedArgs...).Scan(&emptyFeeds) - stats["empty_feeds"] = emptyFeeds - - // Feed type counts - typeRows, err := c.db.Query(`SELECT COALESCE(type, 'unknown'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY type`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for typeRows.Next() { - var feedType string - var count int - if err := typeRows.Scan(&feedType, &count); err != nil { - continue - } - switch feedType { - case "rss": - rssFeeds = count - case "atom": - atomFeeds = count - case "json": - jsonFeeds = count - default: - unknownFeeds += count - } - } - typeRows.Close() - stats["rss_feeds"] = rssFeeds - stats["atom_feeds"] = atomFeeds - stats["json_feeds"] = jsonFeeds - stats["unknown_feeds"] = unknownFeeds - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(stats) -} - -func (c *Crawler) handleAPISearchStats(w http.ResponseWriter, r *http.Request) { - search := r.URL.Query().Get("search") - if search == "" { - http.Error(w, "search parameter required", http.StatusBadRequest) - return - } - - // Parse search prefix for type-specific searching - sq := parseSearchPrefix(search) - searchPattern := "%" + strings.ToLower(sq.Pattern) + "%" - - // Only extract TLD for domain searches (d:npr.org -> exact match for npr.org) - var tldFilter, hostPart string - var exactMatch bool - if sq.Type == "domain" { - hostPart, tldFilter = parseSearchTerm(sq.Pattern) - if tldFilter != "" { - searchPattern = "%" + strings.ToLower(hostPart) + "%" - exactMatch = true - } - } - - stats := map[string]interface{}{} - - // Build WHERE clause based on search type - var domainWhere, feedWhere string - var domainArgs, feedArgs []interface{} - - switch sq.Type { - case "domain": - if exactMatch && tldFilter != "" { - // d:npr.org -> exact match - domainWhere = "tld = $1 AND LOWER(host) = $2" - domainArgs = []interface{}{tldFilter, strings.ToLower(hostPart)} - feedWhere = "LOWER(domain_host || '.' || domain_tld) = $1" - feedArgs = []interface{}{strings.ToLower(sq.Pattern)} - } else if tldFilter != "" { - domainWhere = "tld = $1 AND LOWER(host) LIKE $2" - domainArgs = []interface{}{tldFilter, searchPattern} - feedWhere = "domain_tld = $1 AND LOWER(domain_host || '.' || domain_tld) LIKE $2" - feedArgs = []interface{}{tldFilter, searchPattern} - } else { - domainWhere = "LOWER(host) LIKE $1" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(domain_host || '.' || domain_tld) LIKE $1" - feedArgs = []interface{}{searchPattern} - } - case "url": - domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.url) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(url) LIKE $1" - feedArgs = []interface{}{searchPattern} - case "title": - domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.title) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(title) LIKE $1" - feedArgs = []interface{}{searchPattern} - case "description": - domainWhere = "EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(f.description) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "LOWER(description) LIKE $1" - feedArgs = []interface{}{searchPattern} - case "item": - domainWhere = "EXISTS (SELECT 1 FROM feeds f INNER JOIN items i ON i.feed_url = f.url WHERE f.domain_host = host AND f.domain_tld = tld AND LOWER(i.title) LIKE $1)" - domainArgs = []interface{}{searchPattern} - feedWhere = "EXISTS (SELECT 1 FROM items i WHERE i.feed_url = url AND LOWER(i.title) LIKE $1)" - feedArgs = []interface{}{searchPattern} - default: - // "all" - search domains and feeds (NOT items - use i: prefix for item search) - // Also include exact domain match if pattern looks like a domain - if sq.DomainHost != "" && sq.DomainTLD != "" { - domainWhere = `( - LOWER(host) LIKE $1 OR - (LOWER(host) = $2 AND tld::text = $3) OR - EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND ( - LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1 - )) - )` - domainArgs = []interface{}{searchPattern, strings.ToLower(sq.DomainHost), strings.ToLower(sq.DomainTLD)} - fullDomain := strings.ToLower(sq.DomainHost + "." + sq.DomainTLD) - feedWhere = `( - LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1 OR LOWER(domain_host || '.' || domain_tld) = $2 - )` - feedArgs = []interface{}{searchPattern, fullDomain} - } else { - domainWhere = `( - LOWER(host) LIKE $1 OR - EXISTS (SELECT 1 FROM feeds f WHERE f.domain_host = host AND f.domain_tld = tld AND ( - LOWER(f.url) LIKE $1 OR LOWER(f.title) LIKE $1 OR LOWER(f.description) LIKE $1 - )) - )` - domainArgs = []interface{}{searchPattern} - feedWhere = `( - LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 OR LOWER(title) LIKE $1 OR LOWER(description) LIKE $1 - )` - feedArgs = []interface{}{searchPattern} - } - } - - // Count matching domains by status - var totalDomains, passDomains, skipDomains, holdDomains, deadDomains int - rows, err := c.db.Query(`SELECT status, COUNT(*) FROM domains WHERE `+domainWhere+` GROUP BY status`, domainArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for rows.Next() { - var status string - var count int - if err := rows.Scan(&status, &count); err != nil { - continue - } - totalDomains += count - switch status { - case "pass": - passDomains = count - case "skip": - skipDomains = count - case "hold": - holdDomains = count - case "dead": - deadDomains = count - } - } - rows.Close() - stats["total_domains"] = totalDomains - stats["pass_domains"] = passDomains - stats["skip_domains"] = skipDomains - stats["hold_domains"] = holdDomains - stats["dead_domains"] = deadDomains - - // Count matching feeds by status - var totalFeeds, passFeeds, skipFeeds, holdFeeds, deadFeeds, emptyFeeds int - var rssFeeds, atomFeeds, jsonFeeds, unknownFeeds int - - statusRows, err := c.db.Query(`SELECT COALESCE(status, 'hold'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY status`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for statusRows.Next() { - var status string - var count int - if err := statusRows.Scan(&status, &count); err != nil { - continue - } - totalFeeds += count - switch status { - case "pass": - passFeeds = count - case "skip": - skipFeeds = count - case "hold": - holdFeeds = count - case "dead": - deadFeeds = count - } - } - statusRows.Close() - stats["total_feeds"] = totalFeeds - stats["pass_feeds"] = passFeeds - stats["skip_feeds"] = skipFeeds - stats["hold_feeds"] = holdFeeds - stats["dead_feeds"] = deadFeeds - - // Count empty feeds - c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE (`+feedWhere+`) AND (item_count IS NULL OR item_count = 0)`, feedArgs...).Scan(&emptyFeeds) - stats["empty_feeds"] = emptyFeeds - - typeRows, err := c.db.Query(`SELECT COALESCE(type, 'unknown'), COUNT(*) FROM feeds WHERE `+feedWhere+` GROUP BY type`, feedArgs...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - for typeRows.Next() { - var feedType string - var count int - if err := typeRows.Scan(&feedType, &count); err != nil { - continue - } - switch feedType { - case "rss": - rssFeeds = count - case "atom": - atomFeeds = count - case "json": - jsonFeeds = count - default: - unknownFeeds += count - } - } - typeRows.Close() - stats["rss_feeds"] = rssFeeds - stats["atom_feeds"] = atomFeeds - stats["json_feeds"] = jsonFeeds - stats["unknown_feeds"] = unknownFeeds - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(stats) -} - -// handleAPIDenyDomain skips a domain (takedown accounts, preserve data) -func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - result := c.skipDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// DomainActionResult contains the results of a domain action -type DomainActionResult struct { - Success bool `json:"success"` - Host string `json:"host"` - Action string `json:"action"` - FeedsAffected int64 `json:"feeds_affected,omitempty"` - ItemsDeleted int64 `json:"items_deleted,omitempty"` - AccountsAffected int `json:"accounts_affected,omitempty"` - AccountErrors []string `json:"account_errors,omitempty"` - Error string `json:"error,omitempty"` -} - -// getPDSCredentials loads PDS credentials from environment or pds.env file -func getPDSCredentials() (pdsHost, pdsAdminPassword string) { - pdsHost = os.Getenv("PDS_HOST") - pdsAdminPassword = os.Getenv("PDS_ADMIN_PASSWORD") - if pdsHost == "" || pdsAdminPassword == "" { - if file, err := os.Open("pds.env"); err == nil { - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - if strings.HasPrefix(line, "PDS_HOST=") { - pdsHost = strings.TrimPrefix(line, "PDS_HOST=") - } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") { - pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=") - } - } - file.Close() - } - } - return -} - -// getDomainDIDs returns all unique publish_account DIDs for a domain's feeds -func (c *Crawler) getDomainDIDs(host string) []string { - domainHost := stripTLD(host) - domainTLD := getTLD(host) - var dids []string - rows, err := c.db.Query(` - SELECT DISTINCT publish_account FROM feeds - WHERE domain_host = $1 AND domain_tld = $2 AND publish_account IS NOT NULL AND publish_account != '' - `, domainHost, domainTLD) - if err == nil { - defer rows.Close() - for rows.Next() { - var did string - if err := rows.Scan(&did); err == nil && did != "" { - dids = append(dids, did) - } - } - } - return dids -} - -// skipDomain sets a domain to skip, takes down PDS accounts but preserves all data -func (c *Crawler) skipDomain(host string) DomainActionResult { - result := DomainActionResult{Host: host, Action: "skip"} - - pdsHost, pdsAdminPassword := getPDSCredentials() - dids := c.getDomainDIDs(host) - - // Takedown PDS accounts (hide content but preserve data) - if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { - publisher := NewPublisher(pdsHost) - for _, did := range dids { - if err := publisher.TakedownAccount(pdsAdminPassword, did, "domain-skip"); err != nil { - result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) - } else { - result.AccountsAffected++ - } - } - } - - // Mark feeds as skipped (but don't delete) - domainHost := stripTLD(host) - domainTLD := getTLD(host) - feedsAffected, err := c.db.Exec(` - UPDATE feeds SET status = 'skip', publish_status = 'skip' - WHERE domain_host = $1 AND domain_tld = $2 - `, domainHost, domainTLD) - if err != nil { - result.Error = fmt.Sprintf("failed to update feeds: %v", err) - return result - } - result.FeedsAffected = feedsAffected - - // Update domain status to skip - _, err = c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)) - if err != nil { - result.Error = fmt.Sprintf("failed to update domain status: %v", err) - return result - } - - result.Success = true - return result -} - -// handleAPIDropDomain permanently deletes all data for a skipped domain -func (c *Crawler) handleAPIDropDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - // Verify domain is currently skipped - var status string - err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)).Scan(&status) - if err != nil { - http.Error(w, "domain not found", http.StatusNotFound) - return - } - if status != "skip" { - http.Error(w, "domain must be skipped before dropping", http.StatusBadRequest) - return - } - - result := c.dropDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// dropDomain permanently deletes all data for a domain (feeds, items, PDS accounts) -func (c *Crawler) dropDomain(host string) DomainActionResult { - result := DomainActionResult{Host: host, Action: "drop"} - - pdsHost, pdsAdminPassword := getPDSCredentials() - dids := c.getDomainDIDs(host) - - // Delete PDS accounts - if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { - publisher := NewPublisher(pdsHost) - for _, did := range dids { - if err := publisher.DeleteAccount(pdsAdminPassword, did); err != nil { - result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) - } else { - result.AccountsAffected++ - } - } - } - - // Get feed URLs for this domain (needed to delete items) - domainHost := stripTLD(host) - domainTLD := getTLD(host) - var feedURLs []string - feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE domain_host = $1 AND domain_tld = $2`, domainHost, domainTLD) - if err == nil { - defer feedRows.Close() - for feedRows.Next() { - var url string - if err := feedRows.Scan(&url); err == nil { - feedURLs = append(feedURLs, url) - } - } - } - - // Delete items for all feeds from this domain - for _, feedURL := range feedURLs { - deleted, err := c.db.Exec(`DELETE FROM items WHERE feed_url = $1`, feedURL) - if err == nil { - result.ItemsDeleted += deleted - } - } - - // Delete all feeds from this domain - feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE domain_host = $1 AND domain_tld = $2`, domainHost, domainTLD) - if err != nil { - result.Error = fmt.Sprintf("failed to delete feeds: %v", err) - return result - } - result.FeedsAffected = feedsDeleted - - // Update domain status to drop - _, err = c.db.Exec(`UPDATE domains SET status = 'drop' WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)) - if err != nil { - result.Error = fmt.Sprintf("failed to update domain status: %v", err) - return result - } - - result.Success = true - return result -} - -// handleAPIUndenyDomain removes skip status from a domain (restores accounts) -func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request) { - host := r.URL.Query().Get("host") - if host == "" { - http.Error(w, "host parameter required", http.StatusBadRequest) - return - } - - // Verify domain is currently skipped - var status string - err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1 AND tld = $2`, stripTLD(host), getTLD(host)).Scan(&status) - if err != nil { - http.Error(w, "domain not found", http.StatusNotFound) - return - } - if status != "skip" { - http.Error(w, "domain is not skipped", http.StatusBadRequest) - return - } - - result := c.restoreDomain(host) - if result.Error != "" { - http.Error(w, result.Error, http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// restoreDomain removes skip status and restores PDS accounts -func (c *Crawler) restoreDomain(host string) DomainActionResult { - result := DomainActionResult{Host: host, Action: "restore"} - - pdsHost, pdsAdminPassword := getPDSCredentials() - dids := c.getDomainDIDs(host) - - // Restore PDS accounts (remove takedown) - if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { - publisher := NewPublisher(pdsHost) - for _, did := range dids { - if err := publisher.RestoreAccount(pdsAdminPassword, did); err != nil { - result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) - } else { - result.AccountsAffected++ - } - } - } - - // Restore feeds to pass status - domainHost := stripTLD(host) - domainTLD := getTLD(host) - feedsAffected, err := c.db.Exec(` - UPDATE feeds SET status = 'pass', publish_status = 'pass' - WHERE domain_host = $1 AND domain_tld = $2 - `, domainHost, domainTLD) - if err != nil { - result.Error = fmt.Sprintf("failed to update feeds: %v", err) - return result - } - result.FeedsAffected = feedsAffected - - // Update domain status back to pass - _, err = c.db.Exec(` - UPDATE domains SET status = 'pass', last_error = NULL - WHERE host = $1 AND tld = $2 - `, stripTLD(host), getTLD(host)) - if err != nil { - result.Error = fmt.Sprintf("failed to update domain status: %v", err) - return result - } - - result.Success = true - return result -} diff --git a/api_feeds.go b/api_feeds.go deleted file mode 100644 index 73b4bd4..0000000 --- a/api_feeds.go +++ /dev/null @@ -1,481 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net/http" - "strings" - "time" - - "github.com/jackc/pgx/v5" -) - -func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - type FeedDetails struct { - URL string `json:"url"` - Type string `json:"type,omitempty"` - Category string `json:"category,omitempty"` - Title string `json:"title,omitempty"` - Description string `json:"description,omitempty"` - Language string `json:"language,omitempty"` - SiteURL string `json:"siteUrl,omitempty"` - DiscoveredAt string `json:"discoveredAt,omitempty"` - LastCheckedAt string `json:"lastCheckedAt,omitempty"` - NextCheckAt string `json:"nextCheckAt,omitempty"` - LastBuildDate string `json:"lastBuildDate,omitempty"` - Status string `json:"status,omitempty"` - LastError string `json:"lastError,omitempty"` - ItemCount int `json:"itemCount,omitempty"` - OldestItemDate string `json:"oldestItemDate,omitempty"` - NewestItemDate string `json:"newestItemDate,omitempty"` - PublishStatus string `json:"publishStatus,omitempty"` - PublishAccount string `json:"publishAccount,omitempty"` - } - - var f FeedDetails - var category, title, description, language, siteUrl *string - var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time - var status, lastError *string - var oldestItemDate, newestItemDate *time.Time - var itemCount *int - var discoveredAt time.Time - var publishStatus, publishAccount *string - - err := c.db.QueryRow(` - SELECT url, type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, - (SELECT COUNT(*) FROM items WHERE feed_url = feeds.url) as item_count, - oldest_item_date, newest_item_date, - publish_status, publish_account - FROM feeds WHERE url = $1 - `, feedURL).Scan( - &f.URL, &f.Type, &category, &title, &description, &language, &siteUrl, - &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, - &status, &lastError, - &itemCount, &oldestItemDate, &newestItemDate, - &publishStatus, &publishAccount, - ) - - if err == pgx.ErrNoRows { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - f.Category = StringValue(category) - f.Title = StringValue(title) - f.Description = StringValue(description) - f.Language = StringValue(language) - f.SiteURL = StringValue(siteUrl) - f.DiscoveredAt = discoveredAt.Format(time.RFC3339) - if lastCheckedAt != nil { - f.LastCheckedAt = lastCheckedAt.Format(time.RFC3339) - } - if nextCheckAt != nil { - f.NextCheckAt = nextCheckAt.Format(time.RFC3339) - } - if lastBuildDate != nil { - f.LastBuildDate = lastBuildDate.Format(time.RFC3339) - } - f.Status = StringValue(status) - f.LastError = StringValue(lastError) - if itemCount != nil { - f.ItemCount = *itemCount - } - if oldestItemDate != nil { - f.OldestItemDate = oldestItemDate.Format(time.RFC3339) - } - if newestItemDate != nil { - f.NewestItemDate = newestItemDate.Format(time.RFC3339) - } - f.PublishStatus = StringValue(publishStatus) - f.PublishAccount = StringValue(publishAccount) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(f) -} - -func (c *Crawler) handleAPIFeedItems(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - limit := 50 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 100 { - limit = 100 - } - } - - items, err := c.GetItemsByFeed(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if items == nil { - items = []*Item{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(items) -} - -func (c *Crawler) handleAPIFeedsByStatus(w http.ResponseWriter, r *http.Request) { - status := r.URL.Query().Get("status") - if status == "" { - http.Error(w, "status parameter required", http.StatusBadRequest) - return - } - - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - rows, err := c.db.Query(` - SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count - FROM feeds - WHERE status = $1 - ORDER BY url ASC - LIMIT $2 OFFSET $3 - `, status, limit, offset) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, sourceHost, tld, lastError *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &sourceHost, &tld, &f.Status, &lastError, &itemCount); err != nil { - continue - } - f.Title = StringValue(title) - f.SourceHost = StringValue(sourceHost) - f.TLD = StringValue(tld) - f.LastError = StringValue(lastError) - if itemCount != nil { - f.ItemCount = *itemCount - } - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(feeds) -} - -// handleAPIFeeds lists feeds with optional publish_status filter -func (c *Crawler) handleAPIFeeds(w http.ResponseWriter, r *http.Request) { - publishStatus := r.URL.Query().Get("publish_status") - limit := 100 - offset := 0 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - if o := r.URL.Query().Get("offset"); o != "" { - fmt.Sscanf(o, "%d", &offset) - } - - var rows pgx.Rows - var err error - if publishStatus != "" { - rows, err = c.db.Query(` - SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, publish_status, language - FROM feeds - WHERE publish_status = $1 - ORDER BY url ASC - LIMIT $2 OFFSET $3 - `, publishStatus, limit, offset) - } else { - rows, err = c.db.Query(` - SELECT url, title, type, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, publish_status, language - FROM feeds - ORDER BY url ASC - LIMIT $1 OFFSET $2 - `, limit, offset) - } - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - PublishStatus string `json:"publish_status,omitempty"` - Language string `json:"language,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, sourceHost, tld, lastError, publishStatus, language *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &sourceHost, &tld, &f.Status, &lastError, &itemCount, &publishStatus, &language); err != nil { - continue - } - f.Title = StringValue(title) - f.SourceHost = StringValue(sourceHost) - f.TLD = StringValue(tld) - f.LastError = StringValue(lastError) - f.PublishStatus = StringValue(publishStatus) - f.Language = StringValue(language) - if itemCount != nil { - f.ItemCount = *itemCount - } - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(feeds) -} - -func (c *Crawler) filterFeeds(w http.ResponseWriter, tld, domain, status string, languages []string, limit, offset int) { - var args []interface{} - argNum := 1 - query := ` - SELECT url, title, type, category, domain_host || '.' || domain_tld as source_host, domain_tld as tld, status, last_error, item_count, language - FROM feeds - WHERE 1=1` - - if tld != "" { - query += fmt.Sprintf(" AND domain_tld = $%d", argNum) - args = append(args, tld) - argNum++ - } - if domain != "" { - // Parse domain into host and tld parts - domainHost := stripTLD(domain) - domainTLD := getTLD(domain) - query += fmt.Sprintf(" AND domain_host = $%d AND domain_tld = $%d", argNum, argNum+1) - args = append(args, domainHost, domainTLD) - argNum += 2 - } - if status != "" { - query += fmt.Sprintf(" AND status = $%d", argNum) - args = append(args, status) - argNum++ - } - if len(languages) > 0 { - // Build IN clause for languages, handling 'unknown' as empty string - placeholders := make([]string, len(languages)) - for i, lang := range languages { - placeholders[i] = fmt.Sprintf("$%d", argNum) - if lang == "unknown" { - args = append(args, "") - } else { - args = append(args, lang) - } - argNum++ - } - query += fmt.Sprintf(" AND COALESCE(language, '') IN (%s)", strings.Join(placeholders, ",")) - } - - query += fmt.Sprintf(" ORDER BY url ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) - args = append(args, limit, offset) - - rows, err := c.db.Query(query, args...) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type FeedInfo struct { - URL string `json:"url"` - Title string `json:"title,omitempty"` - Type string `json:"type"` - Category string `json:"category"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - Status string `json:"status"` - LastError string `json:"last_error,omitempty"` - ItemCount int `json:"item_count,omitempty"` - Language string `json:"language,omitempty"` - } - - var feeds []FeedInfo - for rows.Next() { - var f FeedInfo - var title, category, sourceHost, tldVal, lastError, language *string - var itemCount *int - if err := rows.Scan(&f.URL, &title, &f.Type, &category, &sourceHost, &tldVal, &f.Status, &lastError, &itemCount, &language); err != nil { - continue - } - f.Title = StringValue(title) - if category != nil && *category != "" { - f.Category = *category - } else { - f.Category = "main" - } - f.SourceHost = StringValue(sourceHost) - f.TLD = StringValue(tldVal) - f.LastError = StringValue(lastError) - if itemCount != nil { - f.ItemCount = *itemCount - } - f.Language = StringValue(language) - feeds = append(feeds, f) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "type": "feeds", - "data": feeds, - }) -} - -// handleAPICheckFeed immediately checks a feed and returns items -func (c *Crawler) handleAPICheckFeed(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - force := r.URL.Query().Get("force") == "true" - - feedURL = normalizeURL(feedURL) - - // Get the feed - feed, err := c.getFeed(feedURL) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if feed == nil { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - - // Clear cache headers if force is requested - if force { - feed.ETag = "" - feed.LastModified = "" - } - - // Force check the feed - fmt.Printf("Force check feed: %s (force=%v)\n", feedURL, force) - changed, checkErr := c.CheckFeed(feed) - - // Get updated feed info - feed, _ = c.getFeed(feedURL) - - // Get items - items, _ := c.GetItemsByFeed(feedURL, 20) - - type ItemSummary struct { - Title string `json:"title"` - Link string `json:"link"` - PubDate string `json:"pub_date,omitempty"` - Author string `json:"author,omitempty"` - } - var itemSummaries []ItemSummary - for _, item := range items { - is := ItemSummary{ - Title: item.Title, - Link: item.Link, - Author: item.Author, - } - if !item.PubDate.IsZero() { - is.PubDate = item.PubDate.Format("2006-01-02 15:04") - } - itemSummaries = append(itemSummaries, is) - } - - result := map[string]interface{}{ - "url": feedURL, - "title": feed.Title, - "type": feed.Type, - "category": feed.Category, - "status": feed.Status, - "changed": changed, - "itemCount": feed.ItemCount, - "items": itemSummaries, - } - if checkErr != nil { - result["error"] = checkErr.Error() - } - if feed.LastError != "" { - result["lastError"] = feed.LastError - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPILanguages returns distinct languages with counts -func (c *Crawler) handleAPILanguages(w http.ResponseWriter, r *http.Request) { - rows, err := c.db.Query(` - SELECT COALESCE(NULLIF(language, ''), 'unknown') as lang, COUNT(*) as cnt - FROM feeds - GROUP BY lang - ORDER BY cnt DESC - `) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer rows.Close() - - type LangInfo struct { - Language string `json:"language"` - Count int `json:"count"` - } - - var languages []LangInfo - for rows.Next() { - var l LangInfo - if err := rows.Scan(&l.Language, &l.Count); err != nil { - continue - } - languages = append(languages, l) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(languages) -} diff --git a/api_publish.go b/api_publish.go deleted file mode 100644 index 54f9296..0000000 --- a/api_publish.go +++ /dev/null @@ -1,1031 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net/http" - "os" - "strings" - "time" -) - -// handleAPIEnablePublish sets a feed's publish status to 'pass' -// If account is not provided, it will be auto-derived from the feed URL -func (c *Crawler) handleAPIEnablePublish(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - account := r.URL.Query().Get("account") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - feedURL = normalizeURL(feedURL) - - // Auto-derive account handle if not provided - if account == "" { - account = DeriveHandleFromFeed(feedURL) - if account == "" { - http.Error(w, "could not derive account handle from URL", http.StatusBadRequest) - return - } - } - - // Check feed exists - feed, err := c.getFeed(feedURL) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if feed == nil { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - - if err := c.SetPublishStatus(feedURL, "pass", account); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Get unpublished count - count, _ := c.GetUnpublishedItemCount(feedURL) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "pass", - "url": feedURL, - "account": account, - "unpublished_items": count, - }) -} - -// handleAPIDeriveHandle shows what handle would be derived from a feed URL -func (c *Crawler) handleAPIDeriveHandle(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - handle := DeriveHandleFromFeed(feedURL) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "url": feedURL, - "handle": handle, - }) -} - -// handleAPIDisablePublish sets a feed's publish status to 'skip' -func (c *Crawler) handleAPIDisablePublish(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - feedURL = normalizeURL(feedURL) - - if err := c.SetPublishStatus(feedURL, "skip", ""); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "skip", - "url": feedURL, - }) -} - -// handleAPIPublishEnabled returns all feeds with publish status 'pass' -func (c *Crawler) handleAPIPublishEnabled(w http.ResponseWriter, r *http.Request) { - feeds, err := c.GetFeedsByPublishStatus("pass") - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - type FeedPublishInfo struct { - URL string `json:"url"` - Title string `json:"title"` - Account string `json:"account"` - UnpublishedCount int `json:"unpublished_count"` - } - - var result []FeedPublishInfo - for _, f := range feeds { - count, _ := c.GetUnpublishedItemCount(f.URL) - result = append(result, FeedPublishInfo{ - URL: f.URL, - Title: f.Title, - Account: f.PublishAccount, - UnpublishedCount: count, - }) - } - - if result == nil { - result = []FeedPublishInfo{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPIPublishDenied returns all feeds with publish status 'skip' -func (c *Crawler) handleAPIPublishDenied(w http.ResponseWriter, r *http.Request) { - feeds, err := c.GetFeedsByPublishStatus("skip") - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - type FeedDeniedInfo struct { - URL string `json:"url"` - Title string `json:"title"` - SourceHost string `json:"source_host"` - } - - var result []FeedDeniedInfo - for _, f := range feeds { - result = append(result, FeedDeniedInfo{ - URL: f.URL, - Title: f.Title, - SourceHost: fullHost(f.DomainHost, f.DomainTLD), - }) - } - - if result == nil { - result = []FeedDeniedInfo{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPIPublishCandidates returns feeds pending review that have items -func (c *Crawler) handleAPIPublishCandidates(w http.ResponseWriter, r *http.Request) { - limit := 50 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 200 { - limit = 200 - } - } - - feeds, err := c.GetPublishCandidates(limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - type CandidateInfo struct { - URL string `json:"url"` - Title string `json:"title"` - Category string `json:"category"` - SourceHost string `json:"source_host"` - ItemCount int `json:"item_count"` - DerivedHandle string `json:"derived_handle"` - } - - var result []CandidateInfo - for _, f := range feeds { - result = append(result, CandidateInfo{ - URL: f.URL, - Title: f.Title, - Category: f.Category, - SourceHost: fullHost(f.DomainHost, f.DomainTLD), - ItemCount: f.ItemCount, - DerivedHandle: DeriveHandleFromFeed(f.URL), - }) - } - - if result == nil { - result = []CandidateInfo{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// handleAPISetPublishStatus sets the publish status for a feed -// Status values: -// - 'pass': Create account if needed, begin publishing -// - 'hold': Crawl and store items but don't publish (default) -// - 'skip': Stop crawling but keep existing data -// - 'drop': Full cleanup - remove items, posts, and account -func (c *Crawler) handleAPISetPublishStatus(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - status := r.URL.Query().Get("status") - account := r.URL.Query().Get("account") - - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - if status != "pass" && status != "skip" && status != "hold" && status != "drop" { - http.Error(w, "status must be 'pass', 'hold', 'skip', or 'drop'", http.StatusBadRequest) - return - } - - feedURL = normalizeURL(feedURL) - - result := map[string]interface{}{ - "url": feedURL, - "status": status, - } - - // Handle 'drop' - full cleanup then set to skip - if status == "drop" { - cleanup := c.cleanupFeedPublishing(feedURL) - result["cleanup"] = cleanup - // After dropping, set status to skip with no account - if err := c.SetPublishStatus(feedURL, "skip", ""); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - result["account"] = "" - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) - return - } - - // Handle 'pass' - create account if needed and publish - if status == "pass" { - if account == "" { - account = DeriveHandleFromFeed(feedURL) - } - - // Check if account exists on PDS, create if not - created, err := c.ensureFeedAccountExists(feedURL, account) - if err != nil { - result["error"] = err.Error() - } else if created { - result["account_created"] = true - } - result["account"] = account - } - - // Handle 'hold' and 'skip' - just update status - if status == "hold" || status == "skip" { - // Get current account if any (don't change it) - feed, _ := c.getFeed(feedURL) - if feed != nil { - account = feed.PublishAccount - } - } - - if err := c.SetPublishStatus(feedURL, status, account); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - result["account"] = account - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} - -// ensureFeedAccountExists creates the PDS account for a feed if it doesn't exist -// Returns (created bool, error) -func (c *Crawler) ensureFeedAccountExists(feedURL, account string) (bool, error) { - // Load PDS credentials - pdsHost := os.Getenv("PDS_HOST") - pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD") - feedPassword := os.Getenv("FEED_PASSWORD") - - if pdsHost == "" { - if envData, err := os.ReadFile("pds.env"); err == nil { - for _, line := range strings.Split(string(envData), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "PDS_HOST=") { - pdsHost = strings.TrimPrefix(line, "PDS_HOST=") - } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") { - pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=") - } else if strings.HasPrefix(line, "FEED_PASSWORD=") { - feedPassword = strings.TrimPrefix(line, "FEED_PASSWORD=") - } - } - } - } - - if pdsHost == "" || pdsAdminPassword == "" { - return false, fmt.Errorf("PDS credentials not configured") - } - if feedPassword == "" { - feedPassword = "feed1440!" - } - - publisher := NewPublisher(pdsHost) - - // account is already the full handle (e.g., "ycombinator-blog.1440.news") - handle := account - if !strings.HasSuffix(handle, ".1440.news") { - handle = account + ".1440.news" - } - - // Try to login - if successful, account exists - _, err := publisher.CreateSession(handle, feedPassword) - if err == nil { - return false, nil // Account already exists - } - - // Account doesn't exist, create it - inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1) - if err != nil { - return false, fmt.Errorf("failed to create invite: %w", err) - } - - email := handle + "@1440.news" - session, err := publisher.CreateAccount(handle, email, feedPassword, inviteCode) - if err != nil { - return false, fmt.Errorf("failed to create account: %w", err) - } - - fmt.Printf("Created account %s for feed %s\n", handle, feedURL) - - // Set up profile - feed, _ := c.getFeed(feedURL) - if feed != nil { - sourceHost := fullHost(feed.DomainHost, feed.DomainTLD) - displayName := feed.Title - if displayName == "" { - displayName = sourceHost - } - description := feed.Description - if description == "" { - description = "News feed via 1440.news" - } - // Add feed URL to description - feedURLFull := "https://" + feedURL - description = feedURLFull + "\n\n" + description - if len(displayName) > 64 { - displayName = displayName[:61] + "..." - } - if len(description) > 256 { - description = description[:253] + "..." - } - - // Try to fetch favicon - var avatar *BlobRef - faviconData, mimeType, err := FetchFaviconBytes(sourceHost) - if err == nil && len(faviconData) > 0 { - avatar, _ = publisher.UploadBlob(session, faviconData, mimeType) - } - - if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil { - fmt.Printf("Failed to set profile for %s: %v\n", handle, err) - } - } - - // Have directory account follow this new account - if err := publisher.FollowAsDirectory(session.DID); err != nil { - fmt.Printf("Directory follow failed for %s: %v\n", handle, err) - } - - return true, nil -} - -// cleanupFeedPublishing removes all published content for a feed -// Returns a summary of what was cleaned up -func (c *Crawler) cleanupFeedPublishing(feedURL string) map[string]interface{} { - result := map[string]interface{}{ - "posts_deleted": 0, - "account_deleted": false, - "items_cleared": 0, - } - - // Get feed info to find the account - feed, err := c.getFeed(feedURL) - if err != nil || feed == nil { - result["error"] = "feed not found" - return result - } - - if feed.PublishAccount == "" { - // No account associated, just clear items - itemsCleared, _ := c.db.Exec(`UPDATE items SET published_at = NULL WHERE feed_url = $1`, feedURL) - result["items_cleared"] = itemsCleared - return result - } - - // Load PDS credentials - pdsHost := os.Getenv("PDS_HOST") - pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD") - feedPassword := os.Getenv("FEED_PASSWORD") - - if pdsHost == "" { - // Try loading from pds.env - if envData, err := os.ReadFile("pds.env"); err == nil { - for _, line := range strings.Split(string(envData), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "PDS_HOST=") { - pdsHost = strings.TrimPrefix(line, "PDS_HOST=") - } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") { - pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=") - } else if strings.HasPrefix(line, "FEED_PASSWORD=") { - feedPassword = strings.TrimPrefix(line, "FEED_PASSWORD=") - } - } - } - } - - if pdsHost == "" || feedPassword == "" { - result["error"] = "PDS credentials not configured" - // Still clear items in database - itemsCleared, _ := c.db.Exec(`UPDATE items SET published_at = NULL WHERE feed_url = $1`, feedURL) - result["items_cleared"] = itemsCleared - return result - } - - publisher := NewPublisher(pdsHost) - - // Try to authenticate as the feed account - session, err := publisher.CreateSession(feed.PublishAccount, feedPassword) - if err == nil && session != nil { - // Delete all posts - deleted, err := publisher.DeleteAllPosts(session) - if err == nil { - result["posts_deleted"] = deleted - } else { - result["posts_delete_error"] = err.Error() - } - } else { - result["session_error"] = "could not authenticate to delete posts" - } - - // Delete the account using admin API - if pdsAdminPassword != "" && session != nil { - err := publisher.DeleteAccount(pdsAdminPassword, session.DID) - if err == nil { - result["account_deleted"] = true - } else { - result["account_delete_error"] = err.Error() - } - } - - // Clear published_at on all items - itemsCleared, _ := c.db.Exec(`UPDATE items SET published_at = NULL WHERE feed_url = $1`, feedURL) - result["items_cleared"] = itemsCleared - - // Clear publish_account on feed - c.db.Exec(`UPDATE feeds SET publish_account = NULL WHERE url = $1`, feedURL) - - return result -} - -// handleAPIUnpublishedItems returns unpublished items for a feed -func (c *Crawler) handleAPIUnpublishedItems(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - - limit := 50 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 200 { - limit = 200 - } - } - - items, err := c.GetUnpublishedItems(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if items == nil { - items = []*Item{} - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(items) -} - -// handleAPITestPublish tests publishing a single item to PDS -// Requires: feedUrl, guid, handle, password, pds (optional, defaults to https://1440.news) -func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("feedUrl") - guidParam := r.URL.Query().Get("guid") - handle := r.URL.Query().Get("handle") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - - if feedURL == "" || guidParam == "" { - http.Error(w, "feedUrl and guid parameters required", http.StatusBadRequest) - return - } - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://1440.news" - } - - // Get the item - var item Item - var guid, title, link, description, content, author *string - var pubDate, updatedAt, publishedAt *time.Time - var publishedUri *string - - err := c.db.QueryRow(` - SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri - FROM items WHERE feed_url = $1 AND guid = $2 - `, feedURL, guidParam).Scan( - &item.FeedURL, &guid, &title, &link, - &description, &content, &author, &pubDate, - &item.DiscoveredAt, &updatedAt, &publishedAt, &publishedUri, - ) - if err != nil { - http.Error(w, "item not found: "+err.Error(), http.StatusNotFound) - return - } - - item.GUID = StringValue(guid) - item.Title = StringValue(title) - item.Link = StringValue(link) - item.Description = StringValue(description) - item.Content = StringValue(content) - item.Author = StringValue(author) - if pubDate != nil { - item.PubDate = *pubDate - } - - // Create publisher and authenticate - publisher := NewPublisher(pdsHost) - session, err := publisher.CreateSession(handle, password) - if err != nil { - http.Error(w, "auth failed: "+err.Error(), http.StatusUnauthorized) - return - } - - // Publish the item - uri, err := publisher.PublishItem(session, &item) - if err != nil { - http.Error(w, "publish failed: "+err.Error(), http.StatusInternalServerError) - return - } - - // Mark as published - c.MarkItemPublished(item.FeedURL, item.GUID, uri) - - // Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt - rkeyTime := item.PubDate - if rkeyTime.IsZero() { - rkeyTime = item.DiscoveredAt - } - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "published", - "uri": uri, - "feedUrl": item.FeedURL, - "guid": item.GUID, - "title": item.Title, - "rkey": GenerateRkey(item.GUID, rkeyTime), - }) -} - -// handleAPIPublishFeed publishes unpublished items for a feed -// Requires: url (feed), handle, password, pds (optional), limit (optional, default 10) -func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - handle := r.URL.Query().Get("handle") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://1440.news" - } - - limit := 10 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 50 { - limit = 50 - } - } - - feedURL = normalizeURL(feedURL) - - // Get unpublished items (ordered by pubDate ASC - oldest first) - items, err := c.GetUnpublishedItems(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if len(items) == 0 { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "no_items", - "published": 0, - }) - return - } - - // Create publisher and authenticate - publisher := NewPublisher(pdsHost) - session, err := publisher.CreateSession(handle, password) - if err != nil { - http.Error(w, "auth failed: "+err.Error(), http.StatusUnauthorized) - return - } - - type PublishResult struct { - FeedURL string `json:"feed_url"` - GUID string `json:"guid"` - Title string `json:"title"` - URI string `json:"uri,omitempty"` - Error string `json:"error,omitempty"` - } - - var results []PublishResult - published := 0 - failed := 0 - - for i, item := range items { - result := PublishResult{ - FeedURL: item.FeedURL, - GUID: item.GUID, - Title: item.Title, - } - - uri, err := publisher.PublishItem(session, item) - if err != nil { - result.Error = err.Error() - failed++ - } else { - result.URI = uri - c.MarkItemPublished(item.FeedURL, item.GUID, uri) - published++ - } - - results = append(results, result) - - // Add delay between posts to ensure unique timestamps for relay indexing - if i < len(items)-1 { - time.Sleep(1100 * time.Millisecond) - } - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "complete", - "published": published, - "failed": failed, - "results": results, - }) -} - -// handleAPICreateAccount creates a new account on the PDS -// Requires: handle, email, password, pds (optional), inviteCode (optional) -// If pdsAdminPassword is provided, it will create an invite code first -func (c *Crawler) handleAPICreateAccount(w http.ResponseWriter, r *http.Request) { - handle := r.URL.Query().Get("handle") - email := r.URL.Query().Get("email") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - inviteCode := r.URL.Query().Get("inviteCode") - pdsAdminPassword := r.URL.Query().Get("pdsAdminPassword") - - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - if email == "" { - // Generate a placeholder email from handle - email = handle + "@1440.news" - } - - publisher := NewPublisher(pdsHost) - - // If PDS admin password provided, create an invite code first - if pdsAdminPassword != "" && inviteCode == "" { - code, err := publisher.CreateInviteCode(pdsAdminPassword, 1) - if err != nil { - http.Error(w, "create invite failed: "+err.Error(), http.StatusInternalServerError) - return - } - inviteCode = code - } - - // Create the account - session, err := publisher.CreateAccount(handle, email, password, inviteCode) - if err != nil { - http.Error(w, "create account failed: "+err.Error(), http.StatusInternalServerError) - return - } - - // Have directory account follow this new account - if err := publisher.FollowAsDirectory(session.DID); err != nil { - fmt.Printf("API: directory follow failed for %s: %v\n", handle, err) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "created", - "handle": session.Handle, - "did": session.DID, - }) -} - -// handleAPIPublishFeedFull creates an account (if needed) and publishes items -// This is a convenience endpoint that combines account creation and publishing -// Requires: url (feed), pdsAdminPassword, pds (optional), limit (optional), feedPassword (optional) -func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Request) { - feedURL := r.URL.Query().Get("url") - pdsAdminPassword := r.URL.Query().Get("pdsAdminPassword") - pdsHost := r.URL.Query().Get("pds") - feedPassword := r.URL.Query().Get("feedPassword") // Password for new feed accounts - - if feedURL == "" { - http.Error(w, "url parameter required", http.StatusBadRequest) - return - } - if pdsAdminPassword == "" { - http.Error(w, "pdsAdminPassword parameter required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - if feedPassword == "" { - feedPassword = "feed1440!" // Default password for feed accounts - } - - limit := 10 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 50 { - limit = 50 - } - } - - feedURL = normalizeURL(feedURL) - - // Get the feed to check its status and get the derived handle - feed, err := c.getFeed(feedURL) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if feed == nil { - http.Error(w, "feed not found", http.StatusNotFound) - return - } - if feed.PublishStatus != "pass" { - http.Error(w, "feed is not approved for publishing (status: "+feed.PublishStatus+")", http.StatusBadRequest) - return - } - - handle := feed.PublishAccount - if handle == "" { - handle = DeriveHandleFromFeed(feedURL) - } - email := handle + "@1440.news" - - publisher := NewPublisher(pdsHost) - - // First, try to authenticate with the feed account - session, err := publisher.CreateSession(handle, feedPassword) - if err != nil { - // Account doesn't exist, create it - fmt.Printf("Account %s doesn't exist, creating...\n", handle) - - // Create invite code using PDS admin password - inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1) - if err != nil { - http.Error(w, "create invite failed: "+err.Error(), http.StatusInternalServerError) - return - } - - // Create the account - session, err = publisher.CreateAccount(handle, email, feedPassword, inviteCode) - if err != nil { - http.Error(w, "create account failed: "+err.Error(), http.StatusInternalServerError) - return - } - fmt.Printf("Created account: %s (%s)\n", session.Handle, session.DID) - - // Set up profile with feed title and favicon - sourceHost := fullHost(feed.DomainHost, feed.DomainTLD) - displayName := feed.Title - if displayName == "" { - displayName = sourceHost - } - description := feed.Description - - // Try to fetch favicon for avatar - var avatar *BlobRef - faviconData, mimeType, err := FetchFaviconBytes(sourceHost) - if err == nil && len(faviconData) > 0 { - avatar, err = publisher.UploadBlob(session, faviconData, mimeType) - if err != nil { - fmt.Printf("Failed to upload favicon: %v\n", err) - } - } - - if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil { - fmt.Printf("Failed to update profile: %v\n", err) - } else { - fmt.Printf("Set profile for %s: %s\n", handle, displayName) - } - - // Have directory account follow this new account - if err := publisher.FollowAsDirectory(session.DID); err != nil { - fmt.Printf("API: directory follow failed for %s: %v\n", handle, err) - } - } - - // Get unpublished items - items, err := c.GetUnpublishedItems(feedURL, limit) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if len(items) == 0 { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "no_items", - "handle": handle, - "published": 0, - }) - return - } - - type PublishResult struct { - FeedURL string `json:"feed_url"` - GUID string `json:"guid"` - Title string `json:"title"` - URI string `json:"uri,omitempty"` - Error string `json:"error,omitempty"` - } - - var results []PublishResult - published := 0 - failed := 0 - - for i, item := range items { - result := PublishResult{ - FeedURL: item.FeedURL, - GUID: item.GUID, - Title: item.Title, - } - - uri, err := publisher.PublishItem(session, item) - if err != nil { - result.Error = err.Error() - failed++ - } else { - result.URI = uri - c.MarkItemPublished(item.FeedURL, item.GUID, uri) - published++ - } - - results = append(results, result) - - // Add delay between posts to ensure unique timestamps for relay indexing - if i < len(items)-1 { - time.Sleep(1100 * time.Millisecond) - } - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "complete", - "handle": handle, - "did": session.DID, - "published": published, - "failed": failed, - "results": results, - }) -} - -// handleAPIUpdateProfile updates a profile for an existing account -// Requires: handle, password, pds (optional), displayName (optional), description (optional), faviconUrl (optional) -func (c *Crawler) handleAPIUpdateProfile(w http.ResponseWriter, r *http.Request) { - handle := r.URL.Query().Get("handle") - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - displayName := r.URL.Query().Get("displayName") - description := r.URL.Query().Get("description") - faviconURL := r.URL.Query().Get("faviconUrl") - - if handle == "" || password == "" { - http.Error(w, "handle and password parameters required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - - publisher := NewPublisher(pdsHost) - - // Authenticate - session, err := publisher.CreateSession(handle, password) - if err != nil { - http.Error(w, "auth failed: "+err.Error(), http.StatusUnauthorized) - return - } - - // Fetch favicon if URL provided - var avatar *BlobRef - if faviconURL != "" { - faviconData, mimeType, err := FetchFaviconBytes(faviconURL) - if err != nil { - http.Error(w, "fetch favicon failed: "+err.Error(), http.StatusBadRequest) - return - } - avatar, err = publisher.UploadBlob(session, faviconData, mimeType) - if err != nil { - http.Error(w, "upload favicon failed: "+err.Error(), http.StatusInternalServerError) - return - } - } - - // Update profile - if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil { - http.Error(w, "update profile failed: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "updated", - "handle": handle, - "displayName": displayName, - "hasAvatar": avatar != nil, - }) -} - -// handleAPIResetAllPublishing clears all publish accounts and published_at timestamps -func (c *Crawler) handleAPIResetAllPublishing(w http.ResponseWriter, r *http.Request) { - // Clear all publish_account fields - accountsCleared, err := c.db.Exec(`UPDATE feeds SET publish_account = NULL WHERE publish_account IS NOT NULL`) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Clear all published_at timestamps - itemsCleared, err := c.db.Exec(`UPDATE items SET published_at = NULL WHERE published_at IS NOT NULL`) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Reset all publish_status to 'hold' - statusReset, err := c.db.Exec(`UPDATE feeds SET publish_status = 'hold' WHERE publish_status IS NOT NULL`) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "success": true, - "accounts_cleared": accountsCleared, - "items_cleared": itemsCleared, - "status_reset": statusReset, - }) -} - -// handleAPIRefreshProfiles refreshes all account profiles (avatars, descriptions) -// Requires: password (feed account password), pds (optional, defaults to pds.1440.news) -func (c *Crawler) handleAPIRefreshProfiles(w http.ResponseWriter, r *http.Request) { - password := r.URL.Query().Get("password") - pdsHost := r.URL.Query().Get("pds") - - if password == "" { - http.Error(w, "password parameter required", http.StatusBadRequest) - return - } - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - - publisher := NewPublisher(pdsHost) - - // Run RefreshAllProfiles synchronously - c.RefreshAllProfiles(publisher, password) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "success": true, - "message": "profiles refreshed", - }) -} diff --git a/api_search.go b/api_search.go deleted file mode 100644 index f6945c5..0000000 --- a/api_search.go +++ /dev/null @@ -1,311 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net/http" - "strings" - "time" - - "github.com/jackc/pgx/v5" -) - -// SearchResult represents a search result with feed and matching items -type SearchResult struct { - Feed SearchFeed `json:"feed"` - Items []SearchItem `json:"items"` -} - -type SearchFeed struct { - URL string `json:"url"` - Type string `json:"type"` - Category string `json:"category"` - Title string `json:"title"` - Description string `json:"description"` - Language string `json:"language"` - SiteURL string `json:"site_url"` - DiscoveredAt string `json:"discovered_at"` - LastCheckedAt string `json:"last_checked_at"` - NextCheckAt string `json:"next_check_at"` - LastBuildDate string `json:"last_build_date"` - Status string `json:"status"` - LastError string `json:"last_error"` - LastErrorAt string `json:"last_error_at"` - SourceURL string `json:"source_url"` - SourceHost string `json:"source_host"` - TLD string `json:"tld"` - ItemCount int `json:"item_count"` - OldestItemDate string `json:"oldest_item_date"` - NewestItemDate string `json:"newest_item_date"` - NoUpdate bool `json:"no_update"` -} - -type SearchItem struct { - FeedURL string `json:"feed_url"` - GUID string `json:"guid"` - Title string `json:"title"` - Link string `json:"link"` - Description string `json:"description"` - Content string `json:"content"` - Author string `json:"author"` - PubDate string `json:"pub_date"` - DiscoveredAt string `json:"discovered_at"` - UpdatedAt string `json:"updated_at"` -} - -func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) { - query := r.URL.Query().Get("q") - if query == "" { - http.Error(w, "q parameter required", http.StatusBadRequest) - return - } - - limit := 100 - if l := r.URL.Query().Get("limit"); l != "" { - fmt.Sscanf(l, "%d", &limit) - if limit > 500 { - limit = 500 - } - } - - // Results map: feedURL -> SearchResult - results := make(map[string]*SearchResult) - - // Helper to scan feed row into SearchFeed - scanFeed := func(rows pgx.Rows) (string, SearchFeed, bool) { - var url string - var feedType, category, title, description, language, siteUrl *string - var discoveredAt time.Time - var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time - var itemCount *int - var status, lastError *string - var lastErrorAt *time.Time - var sourceUrl, sourceHost, tld *string - var oldestItemDate, newestItemDate *time.Time - var noUpdate *bool - - if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl, - &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, - &status, &lastError, &lastErrorAt, - &sourceUrl, &sourceHost, &tld, - &itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil { - return "", SearchFeed{}, false - } - cat := StringValue(category) - if cat == "" { - cat = "main" - } - sf := SearchFeed{ - URL: url, - Type: StringValue(feedType), - Category: cat, - Title: StringValue(title), - Description: StringValue(description), - Language: StringValue(language), - SiteURL: StringValue(siteUrl), - DiscoveredAt: discoveredAt.Format(time.RFC3339), - Status: StringValue(status), - LastError: StringValue(lastError), - SourceURL: StringValue(sourceUrl), - SourceHost: StringValue(sourceHost), - TLD: StringValue(tld), - } - if lastCheckedAt != nil { - sf.LastCheckedAt = lastCheckedAt.Format(time.RFC3339) - } - if nextCheckAt != nil { - sf.NextCheckAt = nextCheckAt.Format(time.RFC3339) - } - if lastBuildDate != nil { - sf.LastBuildDate = lastBuildDate.Format(time.RFC3339) - } - if lastErrorAt != nil { - sf.LastErrorAt = lastErrorAt.Format(time.RFC3339) - } - if itemCount != nil { - sf.ItemCount = *itemCount - } - if oldestItemDate != nil { - sf.OldestItemDate = oldestItemDate.Format(time.RFC3339) - } - if newestItemDate != nil { - sf.NewestItemDate = newestItemDate.Format(time.RFC3339) - } - if noUpdate != nil { - sf.NoUpdate = *noUpdate - } - return url, sf, true - } - - // Search feeds by domain_host (LIKE search for domain matching) - // Use LOWER() to leverage trigram index - lowerPattern := "%" + strings.ToLower(query) + "%" - hostRows, err := c.db.Query(` - SELECT url, type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, last_error_at, - source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld, - item_count, oldest_item_date, newest_item_date, no_update - FROM feeds - WHERE LOWER(domain_host || '.' || domain_tld) LIKE $1 OR LOWER(url) LIKE $1 - LIMIT $2 - `, lowerPattern, limit) - if err == nil { - defer hostRows.Close() - for hostRows.Next() { - if url, feed, ok := scanFeed(hostRows); ok { - if _, exists := results[url]; !exists { - results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}} - } - } - } - } - - // Search feeds via full-text search - tsQuery := ToSearchQuery(query) - feedRows, err := c.db.Query(` - SELECT url, type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, last_error_at, - source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld, - item_count, oldest_item_date, newest_item_date, no_update - FROM feeds - WHERE search_vector @@ to_tsquery('english', $1) - LIMIT $2 - `, tsQuery, limit) - if err == nil { - defer feedRows.Close() - for feedRows.Next() { - if url, feed, ok := scanFeed(feedRows); ok { - if _, exists := results[url]; !exists { - results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}} - } - } - } - } - - // Search items via full-text search - itemRows, err := c.db.Query(` - SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at - FROM items i - WHERE i.search_vector @@ to_tsquery('english', $1) - ORDER BY i.pub_date DESC - LIMIT $2 - `, tsQuery, limit) - if err == nil { - defer itemRows.Close() - for itemRows.Next() { - var feedUrl string - var guid, title, link, description, content, author *string - var pubDate, discoveredAt, updatedAt *time.Time - if err := itemRows.Scan(&feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil { - continue - } - - item := SearchItem{ - FeedURL: feedUrl, - GUID: StringValue(guid), - Title: StringValue(title), - Link: StringValue(link), - Description: StringValue(description), - Content: StringValue(content), - Author: StringValue(author), - } - if pubDate != nil { - item.PubDate = pubDate.Format(time.RFC3339) - } - if discoveredAt != nil { - item.DiscoveredAt = discoveredAt.Format(time.RFC3339) - } - if updatedAt != nil { - item.UpdatedAt = updatedAt.Format(time.RFC3339) - } - - // Add to existing result or create new one - if result, exists := results[feedUrl]; exists { - result.Items = append(result.Items, item) - } else { - // Fetch feed info for this item's feed - var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string - var fDiscoveredAt time.Time - var fLastCheckedAt, fNextCheckAt, fLastBuildDate *time.Time - var fItemCount *int - var fStatus, fLastError *string - var fLastErrorAt *time.Time - var fSourceUrl, fSourceHost, fTLD *string - var fOldestItemDate, fNewestItemDate *time.Time - var fNoUpdate *bool - - c.db.QueryRow(` - SELECT type, category, title, description, language, site_url, - discovered_at, last_checked_at, next_check_at, last_build_date, - status, last_error, last_error_at, - source_url, domain_host || '.' || domain_tld as source_host, domain_tld as tld, - item_count, oldest_item_date, newest_item_date, no_update - FROM feeds WHERE url = $1 - `, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl, - &fDiscoveredAt, &fLastCheckedAt, &fNextCheckAt, &fLastBuildDate, - &fStatus, &fLastError, &fLastErrorAt, - &fSourceUrl, &fSourceHost, &fTLD, - &fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate) - - fCat := StringValue(fCategory) - if fCat == "" { - fCat = "main" - } - sf := SearchFeed{ - URL: feedUrl, - Type: StringValue(fType), - Category: fCat, - Title: StringValue(fTitle), - Description: StringValue(fDesc), - Language: StringValue(fLang), - SiteURL: StringValue(fSiteUrl), - DiscoveredAt: fDiscoveredAt.Format(time.RFC3339), - Status: StringValue(fStatus), - LastError: StringValue(fLastError), - SourceURL: StringValue(fSourceUrl), - SourceHost: StringValue(fSourceHost), - TLD: StringValue(fTLD), - } - if fLastCheckedAt != nil { - sf.LastCheckedAt = fLastCheckedAt.Format(time.RFC3339) - } - if fNextCheckAt != nil { - sf.NextCheckAt = fNextCheckAt.Format(time.RFC3339) - } - if fLastBuildDate != nil { - sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339) - } - if fLastErrorAt != nil { - sf.LastErrorAt = fLastErrorAt.Format(time.RFC3339) - } - if fItemCount != nil { - sf.ItemCount = *fItemCount - } - if fOldestItemDate != nil { - sf.OldestItemDate = fOldestItemDate.Format(time.RFC3339) - } - if fNewestItemDate != nil { - sf.NewestItemDate = fNewestItemDate.Format(time.RFC3339) - } - if fNoUpdate != nil { - sf.NoUpdate = *fNoUpdate - } - results[feedUrl] = &SearchResult{ - Feed: sf, - Items: []SearchItem{item}, - } - } - } - } - - // Convert map to slice - var resultList []SearchResult - for _, r := range results { - resultList = append(resultList, *r) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resultList) -} diff --git a/crawler.go b/crawler.go index bc6be12..0be1c46 100644 --- a/crawler.go +++ b/crawler.go @@ -18,23 +18,20 @@ import ( ) type Crawler struct { - MaxDepth int - MaxPagesPerHost int - Timeout time.Duration - UserAgent string - visited sync.Map - feedsMu sync.Mutex - client *http.Client + MaxDepth int + MaxPagesPerHost int + Timeout time.Duration + UserAgent string + visited sync.Map + feedsMu sync.Mutex + client *http.Client domainsCrawled int32 // feed_crawl: domains crawled for feed discovery domainsChecked int32 // domain_check: domains checked for liveness feedsChecked int32 // feed_check: feeds checked for new items startTime time.Time db *DB domainsImported int32 - cachedStats *DashboardStats - cachedAllDomains []DomainStat - statsMu sync.RWMutex - shutdownCh chan struct{} // closed on shutdown to signal goroutines + shutdownCh chan struct{} // closed on shutdown to signal goroutines } func NewCrawler(connString string) (*Crawler, error) { @@ -107,17 +104,6 @@ func (c *Crawler) Close() error { return nil } -// StartStatsLoop updates cached stats every 10 seconds -func (c *Crawler) StartStatsLoop() { - for { - if c.IsShuttingDown() { - return - } - c.UpdateStats() - time.Sleep(10 * time.Second) - } -} - // StartCleanupLoop runs item cleanup once per week func (c *Crawler) StartCleanupLoop() { for { @@ -367,7 +353,7 @@ type FeedInfo struct { func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo { var title, description, siteURL, sourceHost *string err := c.db.QueryRow(` - SELECT title, description, site_url, domain_host || '.' || domain_tld as source_host FROM feeds WHERE url = $1 + SELECT title, description, site_url, domain_host as source_host FROM feeds WHERE url = $1 `, feedURL).Scan(&title, &description, &siteURL, &sourceHost) if err != nil { return nil @@ -383,7 +369,7 @@ func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo { // RefreshAllProfiles updates profiles for all existing accounts with feed URLs func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) { rows, err := c.db.Query(` - SELECT url, title, description, site_url, domain_host || '.' || domain_tld as source_host, publish_account + SELECT url, title, description, site_url, domain_host as source_host, publish_account FROM feeds WHERE publish_account IS NOT NULL AND publish_account <> '' `) diff --git a/dashboard.go b/dashboard.go deleted file mode 100644 index 5ff21fa..0000000 --- a/dashboard.go +++ /dev/null @@ -1,265 +0,0 @@ -package main - -import ( - "fmt" - "time" -) - -// DashboardStats holds all statistics for the dashboard -type DashboardStats struct { - // Domain stats - TotalDomains int `json:"total_domains"` - HoldDomains int `json:"hold_domains"` - PassDomains int `json:"pass_domains"` - SkipDomains int `json:"skip_domains"` - DeadDomains int `json:"dead_domains"` - - // Feed stats - TotalFeeds int `json:"total_feeds"` - AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds) - PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing) - SkipFeeds int `json:"skip_feeds"` - HoldFeeds int `json:"hold_feeds"` - DeadFeeds int `json:"dead_feeds"` - EmptyFeeds int `json:"empty_feeds"` - RSSFeeds int `json:"rss_feeds"` - AtomFeeds int `json:"atom_feeds"` - JSONFeeds int `json:"json_feeds"` - UnknownFeeds int `json:"unknown_feeds"` - - // Processing rates (per minute) - DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count - DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute - FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute - FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute - - // Timing - UpdatedAt time.Time `json:"updated_at"` -} - -type TLDStat struct { - TLD string `json:"tld"` - Count int `json:"count"` -} - -type RecentFeed struct { - URL string `json:"url"` - Title string `json:"title"` - Type string `json:"type"` - DiscoveredAt time.Time `json:"discovered_at"` -} - -type DomainStat struct { - Host string `json:"host"` - FeedsFound int `json:"feeds_found"` -} - -// commaFormat formats an integer with comma separators -func commaFormat(n int) string { - s := fmt.Sprintf("%d", n) - if len(s) <= 3 { - return s - } - var result []byte - for i, c := range s { - if i > 0 && (len(s)-i)%3 == 0 { - result = append(result, ',') - } - result = append(result, byte(c)) - } - return string(result) -} - -// UpdateStats recalculates and caches dashboard statistics -func (c *Crawler) UpdateStats() { - fmt.Println("UpdateStats: calculating stats...") - stats, err := c.calculateStats() - if err != nil { - fmt.Printf("UpdateStats: error calculating stats: %v\n", err) - return - } - // Cache all domains with feeds (runs in background, so slow query is OK) - fmt.Println("UpdateStats: fetching all domains...") - allDomains := c.fetchAllDomainsFromDB() - fmt.Printf("UpdateStats: got %d domains\n", len(allDomains)) - - c.statsMu.Lock() - c.cachedStats = stats - c.cachedAllDomains = allDomains - c.statsMu.Unlock() - fmt.Println("UpdateStats: complete") -} - -func (c *Crawler) fetchAllDomainsFromDB() []DomainStat { - rows, err := c.db.Query(` - SELECT domain_tld as tld, domain_host || '.' || domain_tld as source_host, COUNT(*) as cnt FROM feeds - GROUP BY domain_tld, domain_host - ORDER BY domain_tld, domain_host - `) - if err != nil { - fmt.Printf("fetchAllDomainsFromDB error: %v\n", err) - return nil - } - defer rows.Close() - - var domains []DomainStat - for rows.Next() { - var ds DomainStat - var tld string - if err := rows.Scan(&tld, &ds.Host, &ds.FeedsFound); err != nil { - continue - } - domains = append(domains, ds) - } - return domains -} - -// GetDashboardStats returns cached statistics (returns empty stats if not yet cached) -func (c *Crawler) GetDashboardStats() (*DashboardStats, error) { - c.statsMu.RLock() - stats := c.cachedStats - c.statsMu.RUnlock() - - if stats != nil { - return stats, nil - } - // Return empty stats while background calculation runs (don't block HTTP requests) - return &DashboardStats{UpdatedAt: time.Now()}, nil -} - -// calculateStats collects all statistics for the dashboard -func (c *Crawler) calculateStats() (*DashboardStats, error) { - stats := &DashboardStats{ - UpdatedAt: time.Now(), - DomainsCrawled: c.domainsCrawled, - } - - // Calculate rates (per minute) - elapsed := time.Since(c.startTime).Minutes() - if elapsed > 0 { - stats.DomainCheckRate = int(float64(c.domainsChecked) / elapsed) - stats.FeedCrawlRate = int(float64(c.domainsCrawled) / elapsed) - stats.FeedCheckRate = int(float64(c.feedsChecked) / elapsed) - } - - // Get domain stats - if err := c.collectDomainStats(stats); err != nil { - return nil, err - } - - // Get feed stats - if err := c.collectFeedStats(stats); err != nil { - return nil, err - } - - return stats, nil -} - -func (c *Crawler) collectDomainStats(stats *DashboardStats) error { - // Use COUNT(*) for total count - err := c.db.QueryRow("SELECT COUNT(*) FROM domains").Scan(&stats.TotalDomains) - if err != nil { - return err - } - - // Single query to get all status counts (one index scan instead of three) - rows, err := c.db.Query("SELECT status, COUNT(*) FROM domains GROUP BY status") - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var status string - var count int - if err := rows.Scan(&status, &count); err != nil { - continue - } - switch status { - case "hold": - stats.HoldDomains = count - case "pass": - stats.PassDomains = count - case "skip": - stats.SkipDomains = count - case "dead": - stats.DeadDomains = count - } - } - if err := rows.Err(); err != nil { - return err - } - - return rows.Err() -} - -func (c *Crawler) collectFeedStats(stats *DashboardStats) error { - // Use COUNT(*) for total count - err := c.db.QueryRow("SELECT COUNT(*) FROM feeds").Scan(&stats.TotalFeeds) - if err != nil { - return err - } - - // Get status counts - statusRows, err := c.db.Query("SELECT status, COUNT(*) FROM feeds GROUP BY status") - if err != nil { - return err - } - defer statusRows.Close() - - for statusRows.Next() { - var status *string - var count int - if err := statusRows.Scan(&status, &count); err != nil { - continue - } - if status != nil { - switch *status { - case "pass": - stats.AliveFeeds = count - case "skip": - stats.SkipFeeds = count - case "hold": - stats.HoldFeeds = count - case "dead": - stats.DeadFeeds = count - } - } - } - - // Count feeds approved for publishing (publish_status='pass') - c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE publish_status = 'pass'").Scan(&stats.PublishFeeds) - - // Count empty feeds (item_count = 0 or NULL) - c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE item_count IS NULL OR item_count = 0").Scan(&stats.EmptyFeeds) - - // Single query to get all type counts (one index scan instead of three) - rows, err := c.db.Query("SELECT type, COUNT(*) FROM feeds GROUP BY type") - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var feedType *string - var count int - if err := rows.Scan(&feedType, &count); err != nil { - continue - } - if feedType == nil { - stats.UnknownFeeds += count - } else { - switch *feedType { - case "rss": - stats.RSSFeeds = count - case "atom": - stats.AtomFeeds = count - case "json": - stats.JSONFeeds = count - default: - stats.UnknownFeeds += count - } - } - } - return rows.Err() -} diff --git a/main.go b/main.go index 372bcc6..d7c9a54 100644 --- a/main.go +++ b/main.go @@ -19,16 +19,6 @@ func main() { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) - // Start dashboard in background - go func() { - if err := crawler.StartDashboard("0.0.0.0:4321"); err != nil { - fmt.Fprintf(os.Stderr, "Dashboard error: %v\n", err) - } - }() - - // Initialize stats in background (can be slow with large DBs) - go crawler.UpdateStats() - // Start all loops independently fmt.Println("Starting import and processing loops...") @@ -44,9 +34,6 @@ func main() { // feed_check loop (background) - checks feeds for new items go crawler.StartFeedCheckLoop() - // Stats loop (background) - updates once per minute - go crawler.StartStatsLoop() - // Cleanup loop (background) - removes old items once per week go crawler.StartCleanupLoop() diff --git a/oauth.go b/oauth.go deleted file mode 100644 index aeff41b..0000000 --- a/oauth.go +++ /dev/null @@ -1,287 +0,0 @@ -package main - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - neturl "net/url" - "os" - "strings" - "time" - - oauth "github.com/haileyok/atproto-oauth-golang" - "github.com/haileyok/atproto-oauth-golang/helpers" - "github.com/lestrrat-go/jwx/v2/jwk" -) - -// OAuthManager handles OAuth 2.0 authentication for the dashboard -type OAuthManager struct { - client *oauth.Client - clientID string - redirectURI string - privateJWK jwk.Key - publicJWK jwk.Key - sessions *SessionStore - cookieSecret []byte - allowedScope string -} - -// OAuthConfig holds configuration for the OAuth manager -type OAuthConfig struct { - ClientID string // URL to client metadata (e.g., https://app.1440.news/.well-known/oauth-client-metadata) - RedirectURI string // OAuth callback URL (e.g., https://app.1440.news/auth/callback) - CookieSecret string // 32-byte hex string for AES-256-GCM encryption - PrivateJWK string // ES256 private key as JSON -} - -// NewOAuthManager creates a new OAuth manager -func NewOAuthManager(cfg OAuthConfig, db *DB) (*OAuthManager, error) { - // Parse cookie secret (must be 32 bytes for AES-256) - cookieSecret, err := parseHexSecret(cfg.CookieSecret) - if err != nil { - return nil, fmt.Errorf("invalid cookie secret: %v", err) - } - if len(cookieSecret) != 32 { - return nil, fmt.Errorf("cookie secret must be 32 bytes, got %d", len(cookieSecret)) - } - - // Parse private JWK - privateJWK, err := helpers.ParseJWKFromBytes([]byte(cfg.PrivateJWK)) - if err != nil { - return nil, fmt.Errorf("invalid private JWK: %v", err) - } - - // Extract public key - publicJWK, err := privateJWK.PublicKey() - if err != nil { - return nil, fmt.Errorf("failed to extract public key: %v", err) - } - - // Create HTTP client with longer timeout - httpClient := &http.Client{ - Timeout: 30 * time.Second, - } - - // Create OAuth client - client, err := oauth.NewClient(oauth.ClientArgs{ - Http: httpClient, - ClientJwk: privateJWK, - ClientId: cfg.ClientID, - RedirectUri: cfg.RedirectURI, - }) - if err != nil { - return nil, fmt.Errorf("failed to create OAuth client: %v", err) - } - - return &OAuthManager{ - client: client, - clientID: cfg.ClientID, - redirectURI: cfg.RedirectURI, - privateJWK: privateJWK, - publicJWK: publicJWK, - sessions: NewSessionStore(db), - cookieSecret: cookieSecret, - allowedScope: "atproto", - }, nil -} - -// LoadOAuthConfig loads OAuth configuration from environment or oauth.env file -func LoadOAuthConfig(baseURL string) (*OAuthConfig, error) { - cfg := &OAuthConfig{ - ClientID: baseURL + "/.well-known/oauth-client-metadata", - RedirectURI: baseURL + "/auth/callback", - } - - // Try environment variables first - cfg.CookieSecret = os.Getenv("OAUTH_COOKIE_SECRET") - cfg.PrivateJWK = os.Getenv("OAUTH_PRIVATE_JWK") - - // Fall back to oauth.env file - if cfg.CookieSecret == "" || cfg.PrivateJWK == "" { - if data, err := os.ReadFile("oauth.env"); err == nil { - for _, line := range strings.Split(string(data), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "#") || line == "" { - continue - } - parts := strings.SplitN(line, "=", 2) - if len(parts) == 2 { - key := strings.TrimSpace(parts[0]) - value := strings.TrimSpace(parts[1]) - switch key { - case "OAUTH_COOKIE_SECRET": - cfg.CookieSecret = value - case "OAUTH_PRIVATE_JWK": - cfg.PrivateJWK = value - } - } - } - } - } - - // Validate required fields - if cfg.CookieSecret == "" { - return nil, fmt.Errorf("OAUTH_COOKIE_SECRET not configured") - } - if cfg.PrivateJWK == "" { - return nil, fmt.Errorf("OAUTH_PRIVATE_JWK not configured") - } - - return cfg, nil -} - -// parseHexSecret converts a hex string to bytes -func parseHexSecret(hex string) ([]byte, error) { - if len(hex)%2 != 0 { - return nil, fmt.Errorf("hex string must have even length") - } - b := make([]byte, len(hex)/2) - for i := 0; i < len(hex); i += 2 { - var val byte - for j := 0; j < 2; j++ { - c := hex[i+j] - switch { - case c >= '0' && c <= '9': - val = val*16 + (c - '0') - case c >= 'a' && c <= 'f': - val = val*16 + (c - 'a' + 10) - case c >= 'A' && c <= 'F': - val = val*16 + (c - 'A' + 10) - default: - return nil, fmt.Errorf("invalid hex character: %c", c) - } - } - b[i/2] = val - } - return b, nil -} - -// resolveHandle resolves a Bluesky handle to a DID -func resolveHandle(ctx context.Context, handle string) (string, error) { - // Normalize handle (remove @ prefix and whitespace) - handle = strings.TrimSpace(handle) - handle = strings.TrimPrefix(handle, "@") - handle = strings.ToLower(handle) - - // Try DNS-based resolution first - url := fmt.Sprintf("https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle=%s", neturl.QueryEscape(handle)) - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) - if err != nil { - return "", err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - return "", fmt.Errorf("resolve handle failed: %s", string(body)) - } - - var result struct { - DID string `json:"did"` - } - if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { - return "", err - } - - return result.DID, nil -} - -// resolveDIDToHandle resolves a DID to the current handle -func resolveDIDToHandle(ctx context.Context, did string) (string, error) { - // Fetch DID document - var docURL string - if strings.HasPrefix(did, "did:plc:") { - docURL = fmt.Sprintf("https://plc.directory/%s", did) - } else if strings.HasPrefix(did, "did:web:") { - domain := strings.TrimPrefix(did, "did:web:") - docURL = fmt.Sprintf("https://%s/.well-known/did.json", domain) - } else { - return "", fmt.Errorf("unsupported DID method: %s", did) - } - - req, err := http.NewRequestWithContext(ctx, "GET", docURL, nil) - if err != nil { - return "", err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to fetch DID document: %d", resp.StatusCode) - } - - var doc struct { - AlsoKnownAs []string `json:"alsoKnownAs"` - } - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { - return "", err - } - - // Find the at:// handle - for _, aka := range doc.AlsoKnownAs { - if strings.HasPrefix(aka, "at://") { - return strings.TrimPrefix(aka, "at://"), nil - } - } - - return "", fmt.Errorf("no handle found for DID %s", did) -} - -// resolveDIDToService gets the PDS service URL from a DID -func resolveDIDToService(ctx context.Context, did string) (string, error) { - var docURL string - if strings.HasPrefix(did, "did:plc:") { - docURL = fmt.Sprintf("https://plc.directory/%s", did) - } else if strings.HasPrefix(did, "did:web:") { - domain := strings.TrimPrefix(did, "did:web:") - docURL = fmt.Sprintf("https://%s/.well-known/did.json", domain) - } else { - return "", fmt.Errorf("unsupported DID method: %s", did) - } - - req, err := http.NewRequestWithContext(ctx, "GET", docURL, nil) - if err != nil { - return "", err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to fetch DID document: %d", resp.StatusCode) - } - - var doc struct { - Service []struct { - ID string `json:"id"` - Type string `json:"type"` - ServiceEndpoint string `json:"serviceEndpoint"` - } `json:"service"` - } - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { - return "", err - } - - // Find the atproto_pds service - for _, svc := range doc.Service { - if svc.Type == "AtprotoPersonalDataServer" || svc.ID == "#atproto_pds" { - return svc.ServiceEndpoint, nil - } - } - - return "", fmt.Errorf("no PDS service found for DID %s", did) -} diff --git a/oauth_handlers.go b/oauth_handlers.go deleted file mode 100644 index c351f14..0000000 --- a/oauth_handlers.go +++ /dev/null @@ -1,521 +0,0 @@ -package main - -import ( - "context" - "encoding/json" - "fmt" - "html/template" - "net/http" - "net/url" - "strings" - "time" - - "github.com/haileyok/atproto-oauth-golang/helpers" -) - -var allowedHandles = map[string]bool{ - "1440.news": true, - "wehrv.bsky.social": true, -} - -// HandleClientMetadata serves the OAuth client metadata -func (m *OAuthManager) HandleClientMetadata(w http.ResponseWriter, r *http.Request) { - // Get the JWKS URI from the same host - scheme := "https" - if r.TLS == nil && (r.Host == "localhost" || r.Host == "127.0.0.1" || r.Host == "app.1440.localhost:4321") { - scheme = "http" - } - baseURL := scheme + "://" + r.Host - - metadata := map[string]interface{}{ - "client_id": m.clientID, - "client_name": "1440.news Dashboard", - "client_uri": baseURL, - "redirect_uris": []string{m.redirectURI}, - "grant_types": []string{"authorization_code", "refresh_token"}, - "response_types": []string{"code"}, - "scope": "atproto", - "token_endpoint_auth_method": "private_key_jwt", - "token_endpoint_auth_signing_alg": "ES256", - "dpop_bound_access_tokens": true, - "jwks_uri": baseURL + "/.well-known/jwks.json", - "application_type": "web", - "subject_type": "public", - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(metadata) -} - -// HandleJWKS serves the public JWK set -func (m *OAuthManager) HandleJWKS(w http.ResponseWriter, r *http.Request) { - jwks := helpers.CreateJwksResponseObject(m.publicJWK) - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(jwks) -} - -// HandleLogin serves the login page or initiates OAuth flow -func (m *OAuthManager) HandleLogin(w http.ResponseWriter, r *http.Request) { - // Check if already logged in - if session := m.GetSessionFromCookie(r); session != nil { - http.Redirect(w, r, "/dashboard", http.StatusFound) - return - } - - // If handle is provided, start OAuth flow - handle := r.URL.Query().Get("handle") - if handle != "" { - // Save handle to cookie for prefill on next visit - http.SetCookie(w, &http.Cookie{ - Name: "last_handle", - Value: handle, - Path: "/", - MaxAge: 86400 * 365, // 1 year - HttpOnly: true, - SameSite: http.SameSiteLaxMode, - }) - m.startOAuthFlow(w, r, handle) - return - } - - // Get last handle from cookie for prefill - lastHandle := "" - if cookie, err := r.Cookie("last_handle"); err == nil { - lastHandle = cookie.Value - } - - // Serve login page - w.Header().Set("Content-Type", "text/html; charset=utf-8") - tmpl := template.Must(template.New("login").Parse(loginPageHTML)) - tmpl.Execute(w, map[string]string{"LastHandle": lastHandle}) -} - -// startOAuthFlow initiates the OAuth flow for a given handle -func (m *OAuthManager) startOAuthFlow(w http.ResponseWriter, r *http.Request, handle string) { - ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) - defer cancel() - - // Auto-append .bsky.social if handle has no dots - if !strings.Contains(handle, ".") { - handle = handle + ".bsky.social" - } - - fmt.Printf("OAuth: starting flow for handle: %s\n", handle) - - // Resolve handle to DID - did, err := resolveHandle(ctx, handle) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to resolve handle: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: resolved DID: %s\n", did) - - // Resolve DID to PDS service URL - pdsURL, err := resolveDIDToService(ctx, did) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to resolve PDS: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: PDS URL: %s\n", pdsURL) - - // Get auth server from PDS - authServerURL, err := m.client.ResolvePdsAuthServer(ctx, pdsURL) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to resolve auth server: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: auth server: %s\n", authServerURL) - - // Fetch auth server metadata - authMeta, err := m.client.FetchAuthServerMetadata(ctx, authServerURL) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to fetch auth metadata: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: auth endpoint: %s\n", authMeta.AuthorizationEndpoint) - - // Generate DPoP private key for this auth flow - dpopKey, err := helpers.GenerateKey(nil) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to generate DPoP key: %v", err), http.StatusInternalServerError) - return - } - dpopKeyBytes, err := json.Marshal(dpopKey) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to marshal DPoP key: %v", err), http.StatusInternalServerError) - return - } - - // Send PAR (Pushed Authorization Request) - fmt.Printf("OAuth: sending PAR to %s\n", authServerURL) - parResp, err := m.client.SendParAuthRequest( - ctx, - authServerURL, - authMeta, - handle, - m.allowedScope, - dpopKey, - ) - if err != nil { - fmt.Printf("OAuth: PAR failed: %v\n", err) - http.Error(w, fmt.Sprintf("PAR request failed: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth: PAR success, request_uri: %s\n", parResp.RequestUri) - - // Save pending auth state - pending := &PendingAuth{ - State: parResp.State, - PkceVerifier: parResp.PkceVerifier, - DpopPrivateJWK: string(dpopKeyBytes), - DpopNonce: parResp.DpopAuthserverNonce, - DID: did, - PdsURL: pdsURL, - AuthserverIss: authMeta.Issuer, - } - m.sessions.SavePending(parResp.State, pending) - - // Build authorization URL - authURL, err := url.Parse(authMeta.AuthorizationEndpoint) - if err != nil { - http.Error(w, fmt.Sprintf("Invalid auth endpoint: %v", err), http.StatusInternalServerError) - return - } - - q := authURL.Query() - q.Set("client_id", m.clientID) - q.Set("request_uri", parResp.RequestUri) - authURL.RawQuery = q.Encode() - - fmt.Printf("OAuth: redirecting to: %s\n", authURL.String()) - - http.Redirect(w, r, authURL.String(), http.StatusFound) -} - -// HandleCallback handles the OAuth callback -func (m *OAuthManager) HandleCallback(w http.ResponseWriter, r *http.Request) { - fmt.Printf("OAuth callback: received request from %s\n", r.URL.String()) - - ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) - defer cancel() - - // Get callback parameters - code := r.URL.Query().Get("code") - state := r.URL.Query().Get("state") - iss := r.URL.Query().Get("iss") - errorParam := r.URL.Query().Get("error") - errorDesc := r.URL.Query().Get("error_description") - - codePreview := code - if len(codePreview) > 10 { - codePreview = codePreview[:10] - } - statePreview := state - if len(statePreview) > 10 { - statePreview = statePreview[:10] - } - fmt.Printf("OAuth callback: code=%s..., state=%s..., iss=%s, error=%s\n", - codePreview, statePreview, iss, errorParam) - - // Check for errors from auth server - if errorParam != "" { - http.Error(w, fmt.Sprintf("Authorization error: %s - %s", errorParam, errorDesc), http.StatusBadRequest) - return - } - - if code == "" || state == "" { - http.Error(w, "Missing code or state parameter", http.StatusBadRequest) - return - } - - // Retrieve pending auth state - pending := m.sessions.GetPending(state) - if pending == nil { - fmt.Printf("OAuth callback: no pending state found for %s\n", state) - http.Error(w, "Invalid or expired state", http.StatusBadRequest) - return - } - fmt.Printf("OAuth callback: found pending state for DID %s\n", pending.DID) - - // Verify issuer matches - if iss != "" && iss != pending.AuthserverIss { - http.Error(w, "Issuer mismatch", http.StatusBadRequest) - return - } - - // Parse DPoP private key - dpopKey, err := helpers.ParseJWKFromBytes([]byte(pending.DpopPrivateJWK)) - if err != nil { - http.Error(w, fmt.Sprintf("Failed to parse DPoP key: %v", err), http.StatusInternalServerError) - return - } - - // Exchange code for tokens - fmt.Printf("OAuth callback: exchanging code for tokens at %s\n", pending.AuthserverIss) - tokenResp, err := m.client.InitialTokenRequest( - ctx, - code, - pending.AuthserverIss, - pending.PkceVerifier, - pending.DpopNonce, - dpopKey, - ) - if err != nil { - fmt.Printf("OAuth callback: token exchange failed: %v\n", err) - http.Error(w, fmt.Sprintf("Token exchange failed: %v", err), http.StatusBadRequest) - return - } - fmt.Printf("OAuth callback: token exchange success, sub=%s, scope=%s\n", tokenResp.Sub, tokenResp.Scope) - - // Verify scope - if tokenResp.Scope != m.allowedScope { - fmt.Printf("OAuth callback: scope mismatch: expected %s, got %s\n", m.allowedScope, tokenResp.Scope) - http.Error(w, fmt.Sprintf("Invalid scope: expected %s, got %s", m.allowedScope, tokenResp.Scope), http.StatusForbidden) - return - } - - // Resolve DID to handle - fmt.Printf("OAuth callback: resolving DID %s to handle\n", tokenResp.Sub) - handle, err := resolveDIDToHandle(ctx, tokenResp.Sub) - if err != nil { - fmt.Printf("OAuth callback: failed to resolve handle: %v\n", err) - http.Error(w, fmt.Sprintf("Failed to resolve handle: %v", err), http.StatusInternalServerError) - return - } - fmt.Printf("OAuth callback: resolved handle: %s\n", handle) - - // CRITICAL: Verify user is allowed - if !allowedHandles[handle] { - fmt.Printf("OAuth callback: access denied for handle: %s (allowed: %v)\n", handle, allowedHandles) - http.Error(w, "Access denied.", http.StatusForbidden) - return - } - fmt.Printf("OAuth callback: handle %s is allowed\n", handle) - - // Create session - fmt.Printf("OAuth callback: creating session for %s\n", handle) - session, err := m.sessions.CreateSession(tokenResp.Sub, handle) - if err != nil { - fmt.Printf("OAuth callback: failed to create session: %v\n", err) - http.Error(w, fmt.Sprintf("Failed to create session: %v", err), http.StatusInternalServerError) - return - } - fmt.Printf("OAuth callback: session created with ID %s\n", session.ID) - - // Store token info in session - session.AccessToken = tokenResp.AccessToken - session.RefreshToken = tokenResp.RefreshToken - session.TokenExpiry = time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second) - session.DpopPrivateJWK = pending.DpopPrivateJWK - session.DpopAuthserverNonce = tokenResp.DpopAuthserverNonce - session.PdsURL = pending.PdsURL - session.AuthserverIss = pending.AuthserverIss - m.sessions.UpdateSession(session) - - // Set session cookie - fmt.Printf("OAuth callback: setting session cookie\n") - if err := m.SetSessionCookie(w, r, session.ID); err != nil { - fmt.Printf("OAuth callback: failed to set cookie: %v\n", err) - http.Error(w, fmt.Sprintf("Failed to set cookie: %v", err), http.StatusInternalServerError) - return - } - - // Redirect to dashboard - fmt.Printf("OAuth callback: success! redirecting to /dashboard\n") - http.Redirect(w, r, "/dashboard", http.StatusFound) -} - -// HandleLogout clears the session and redirects to login -func (m *OAuthManager) HandleLogout(w http.ResponseWriter, r *http.Request) { - // Get current session - session := m.GetSessionFromCookie(r) - if session != nil { - // Delete session from store - m.sessions.DeleteSession(session.ID) - } - - // Clear cookie - m.ClearSessionCookie(w) - - // Handle API vs browser request - if r.Method == http.MethodPost || isAPIRequest(r) { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ - "status": "logged out", - }) - return - } - - // Redirect to login for browser requests - http.Redirect(w, r, "/auth/login", http.StatusFound) -} - -// HandleSessionInfo returns current session info (for API calls) -func (m *OAuthManager) HandleSessionInfo(w http.ResponseWriter, r *http.Request) { - session := m.GetSessionFromCookie(r) - if session == nil { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusUnauthorized) - json.NewEncoder(w).Encode(map[string]string{ - "error": "not authenticated", - }) - return - } - - info := &SessionInfo{ - DID: session.DID, - Handle: session.Handle, - ExpiresAt: session.ExpiresAt, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(info) -} - -const loginPageHTML = ` - - - Sign In - 1440.news Dashboard - - - - - -
- -

Dashboard Authentication

- -
-

Sign In with Bluesky

- -
- -
-
- - -
- - -
-
-
- - - - -` diff --git a/oauth_middleware.go b/oauth_middleware.go deleted file mode 100644 index 55f7679..0000000 --- a/oauth_middleware.go +++ /dev/null @@ -1,126 +0,0 @@ -package main - -import ( - "context" - "encoding/json" - "fmt" - "net/http" - "strings" - "time" - - "github.com/haileyok/atproto-oauth-golang/helpers" -) - -// RequireAuth is middleware that protects routes requiring authentication -func (m *OAuthManager) RequireAuth(next http.HandlerFunc) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - session := m.GetSessionFromCookie(r) - if session == nil { - fmt.Printf("RequireAuth: no session found for %s\n", r.URL.Path) - // Check if this is an API call (wants JSON response) - if isAPIRequest(r) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusUnauthorized) - json.NewEncoder(w).Encode(map[string]string{ - "error": "unauthorized", - }) - return - } - // Redirect to login for browser requests - http.Redirect(w, r, "/auth/login", http.StatusFound) - return - } - - // Check if token needs refresh (refresh when within 5 minutes of expiry) - if time.Until(session.TokenExpiry) < 5*time.Minute { - if err := m.refreshToken(r.Context(), session); err != nil { - // Token refresh failed - clear session and redirect to login - m.sessions.DeleteSession(session.ID) - m.ClearSessionCookie(w) - - if isAPIRequest(r) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusUnauthorized) - json.NewEncoder(w).Encode(map[string]string{ - "error": "session expired", - }) - return - } - http.Redirect(w, r, "/auth/login", http.StatusFound) - return - } - } - - // Add session to request context - ctx := context.WithValue(r.Context(), sessionContextKey, session) - next(w, r.WithContext(ctx)) - } -} - -// sessionContextKey is the context key for the OAuth session -type contextKey string - -const sessionContextKey contextKey = "oauth_session" - -// GetSession retrieves the session from request context -func GetSession(r *http.Request) *OAuthSession { - session, _ := r.Context().Value(sessionContextKey).(*OAuthSession) - return session -} - -// isAPIRequest checks if the request expects JSON response -func isAPIRequest(r *http.Request) bool { - // Check Accept header - accept := r.Header.Get("Accept") - if strings.Contains(accept, "application/json") { - return true - } - - // Check URL path - if strings.HasPrefix(r.URL.Path, "/api/") { - return true - } - - // Check X-Requested-With header (for AJAX) - if r.Header.Get("X-Requested-With") == "XMLHttpRequest" { - return true - } - - return false -} - -// refreshToken refreshes the OAuth access token -func (m *OAuthManager) refreshToken(ctx context.Context, session *OAuthSession) error { - if session.RefreshToken == "" { - return nil // No refresh token available - } - - // Parse the DPoP private key - dpopKey, err := helpers.ParseJWKFromBytes([]byte(session.DpopPrivateJWK)) - if err != nil { - return err - } - - // Refresh the token - tokenResp, err := m.client.RefreshTokenRequest( - ctx, - session.RefreshToken, - session.AuthserverIss, - session.DpopAuthserverNonce, - dpopKey, - ) - if err != nil { - return err - } - - // Update session with new tokens - session.AccessToken = tokenResp.AccessToken - session.RefreshToken = tokenResp.RefreshToken - session.TokenExpiry = time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second) - session.DpopAuthserverNonce = tokenResp.DpopAuthserverNonce - - // Save updated session - m.sessions.UpdateSession(session) - - return nil -} diff --git a/oauth_session.go b/oauth_session.go deleted file mode 100644 index 5fbdc8d..0000000 --- a/oauth_session.go +++ /dev/null @@ -1,352 +0,0 @@ -package main - -import ( - "crypto/aes" - "crypto/cipher" - "crypto/rand" - "encoding/base64" - "encoding/json" - "fmt" - "io" - "net/http" - "sync" - "time" -) - -const ( - sessionCookieName = "1440_session" - sessionTTL = 24 * time.Hour -) - -// OAuthSession stores the OAuth session state for a user -type OAuthSession struct { - ID string `json:"id"` - DID string `json:"did"` - Handle string `json:"handle"` - CreatedAt time.Time `json:"created_at"` - ExpiresAt time.Time `json:"expires_at"` - - // OAuth tokens (stored server-side only) - AccessToken string `json:"access_token"` - RefreshToken string `json:"refresh_token"` - TokenExpiry time.Time `json:"token_expiry"` - - // DPoP state - DpopPrivateJWK string `json:"dpop_private_jwk"` - DpopAuthserverNonce string `json:"dpop_authserver_nonce"` - DpopPdsNonce string `json:"dpop_pds_nonce"` - - // Auth server info - PdsURL string `json:"pds_url"` - AuthserverIss string `json:"authserver_iss"` -} - -// PendingAuth stores state during the OAuth flow (before callback) -type PendingAuth struct { - State string `json:"state"` - PkceVerifier string `json:"pkce_verifier"` - DpopPrivateJWK string `json:"dpop_private_jwk"` - DpopNonce string `json:"dpop_nonce"` - DID string `json:"did"` - PdsURL string `json:"pds_url"` - AuthserverIss string `json:"authserver_iss"` - CreatedAt time.Time `json:"created_at"` -} - -// SessionStore manages sessions in the database -type SessionStore struct { - db *DB - pending map[string]*PendingAuth // keyed by state (short-lived, kept in memory) - mu sync.RWMutex - cleanupOnce sync.Once -} - -// NewSessionStore creates a new session store -func NewSessionStore(db *DB) *SessionStore { - s := &SessionStore{ - db: db, - pending: make(map[string]*PendingAuth), - } - s.startCleanup() - return s -} - -// startCleanup starts a background goroutine to clean up expired sessions -func (s *SessionStore) startCleanup() { - s.cleanupOnce.Do(func() { - go func() { - ticker := time.NewTicker(5 * time.Minute) - defer ticker.Stop() - for range ticker.C { - s.cleanup() - } - }() - }) -} - -// cleanup removes expired sessions and pending auths -func (s *SessionStore) cleanup() { - // Clean up expired sessions from database - s.db.Exec("DELETE FROM oauth_sessions WHERE expires_at < NOW()") - - // Clean up old pending auths (10 minute timeout) from memory - s.mu.Lock() - defer s.mu.Unlock() - now := time.Now() - for state, pending := range s.pending { - if now.Sub(pending.CreatedAt) > 10*time.Minute { - delete(s.pending, state) - } - } -} - -// CreateSession creates a new session and returns it -func (s *SessionStore) CreateSession(did, handle string) (*OAuthSession, error) { - id, err := generateRandomID() - if err != nil { - return nil, err - } - - now := time.Now() - session := &OAuthSession{ - ID: id, - DID: did, - Handle: handle, - CreatedAt: now, - ExpiresAt: now.Add(sessionTTL), - } - - _, err = s.db.Exec(` - INSERT INTO oauth_sessions (id, did, handle, created_at, expires_at) - VALUES ($1, $2, $3, $4, $5) - `, session.ID, session.DID, session.Handle, session.CreatedAt, session.ExpiresAt) - if err != nil { - return nil, err - } - - return session, nil -} - -// GetSession retrieves a session by ID -func (s *SessionStore) GetSession(id string) *OAuthSession { - row := s.db.QueryRow(` - SELECT id, did, handle, created_at, expires_at, - access_token, refresh_token, token_expiry, - dpop_private_jwk, dpop_authserver_nonce, dpop_pds_nonce, - pds_url, authserver_iss - FROM oauth_sessions - WHERE id = $1 AND expires_at > NOW() - `, id) - - var session OAuthSession - var accessToken, refreshToken, dpopJwk, dpopAuthNonce, dpopPdsNonce, pdsURL, authIss *string - var tokenExpiry *time.Time - - err := row.Scan( - &session.ID, &session.DID, &session.Handle, &session.CreatedAt, &session.ExpiresAt, - &accessToken, &refreshToken, &tokenExpiry, - &dpopJwk, &dpopAuthNonce, &dpopPdsNonce, - &pdsURL, &authIss, - ) - if err != nil { - return nil - } - - session.AccessToken = StringValue(accessToken) - session.RefreshToken = StringValue(refreshToken) - if tokenExpiry != nil { - session.TokenExpiry = *tokenExpiry - } - session.DpopPrivateJWK = StringValue(dpopJwk) - session.DpopAuthserverNonce = StringValue(dpopAuthNonce) - session.DpopPdsNonce = StringValue(dpopPdsNonce) - session.PdsURL = StringValue(pdsURL) - session.AuthserverIss = StringValue(authIss) - - return &session -} - -// UpdateSession updates a session -func (s *SessionStore) UpdateSession(session *OAuthSession) { - s.db.Exec(` - UPDATE oauth_sessions SET - access_token = $2, - refresh_token = $3, - token_expiry = $4, - dpop_private_jwk = $5, - dpop_authserver_nonce = $6, - dpop_pds_nonce = $7, - pds_url = $8, - authserver_iss = $9 - WHERE id = $1 - `, - session.ID, - NullableString(session.AccessToken), - NullableString(session.RefreshToken), - NullableTime(session.TokenExpiry), - NullableString(session.DpopPrivateJWK), - NullableString(session.DpopAuthserverNonce), - NullableString(session.DpopPdsNonce), - NullableString(session.PdsURL), - NullableString(session.AuthserverIss), - ) -} - -// DeleteSession removes a session -func (s *SessionStore) DeleteSession(id string) { - s.db.Exec("DELETE FROM oauth_sessions WHERE id = $1", id) -} - -// SavePending saves pending OAuth state (kept in memory - short lived) -func (s *SessionStore) SavePending(state string, pending *PendingAuth) { - s.mu.Lock() - defer s.mu.Unlock() - pending.CreatedAt = time.Now() - s.pending[state] = pending -} - -// GetPending retrieves and removes pending OAuth state -func (s *SessionStore) GetPending(state string) *PendingAuth { - s.mu.Lock() - defer s.mu.Unlock() - - pending, ok := s.pending[state] - if ok { - delete(s.pending, state) - } - return pending -} - -// generateRandomID generates a random session ID -func generateRandomID() (string, error) { - b := make([]byte, 32) - if _, err := rand.Read(b); err != nil { - return "", err - } - return base64.URLEncoding.EncodeToString(b), nil -} - -// encryptSessionID encrypts a session ID using AES-256-GCM -func encryptSessionID(sessionID string, key []byte) (string, error) { - block, err := aes.NewCipher(key) - if err != nil { - return "", err - } - - gcm, err := cipher.NewGCM(block) - if err != nil { - return "", err - } - - nonce := make([]byte, gcm.NonceSize()) - if _, err := io.ReadFull(rand.Reader, nonce); err != nil { - return "", err - } - - ciphertext := gcm.Seal(nonce, nonce, []byte(sessionID), nil) - return base64.URLEncoding.EncodeToString(ciphertext), nil -} - -// decryptSessionID decrypts a session ID using AES-256-GCM -func decryptSessionID(encrypted string, key []byte) (string, error) { - ciphertext, err := base64.URLEncoding.DecodeString(encrypted) - if err != nil { - return "", err - } - - block, err := aes.NewCipher(key) - if err != nil { - return "", err - } - - gcm, err := cipher.NewGCM(block) - if err != nil { - return "", err - } - - if len(ciphertext) < gcm.NonceSize() { - return "", fmt.Errorf("ciphertext too short") - } - - nonce, ciphertext := ciphertext[:gcm.NonceSize()], ciphertext[gcm.NonceSize():] - plaintext, err := gcm.Open(nil, nonce, ciphertext, nil) - if err != nil { - return "", err - } - - return string(plaintext), nil -} - -// SetSessionCookie sets an encrypted session cookie -func (m *OAuthManager) SetSessionCookie(w http.ResponseWriter, r *http.Request, sessionID string) error { - encrypted, err := encryptSessionID(sessionID, m.cookieSecret) - if err != nil { - return err - } - - // Only set Secure flag for HTTPS connections - secure := r.TLS != nil || r.Header.Get("X-Forwarded-Proto") == "https" - - http.SetCookie(w, &http.Cookie{ - Name: sessionCookieName, - Value: encrypted, - Path: "/", - HttpOnly: true, - Secure: secure, - SameSite: http.SameSiteLaxMode, - MaxAge: int(sessionTTL.Seconds()), - }) - - return nil -} - -// GetSessionFromCookie retrieves the session from the request cookie -func (m *OAuthManager) GetSessionFromCookie(r *http.Request) *OAuthSession { - cookie, err := r.Cookie(sessionCookieName) - if err != nil { - fmt.Printf("GetSessionFromCookie: no cookie found: %v\n", err) - return nil - } - fmt.Printf("GetSessionFromCookie: found cookie, length=%d\n", len(cookie.Value)) - - sessionID, err := decryptSessionID(cookie.Value, m.cookieSecret) - if err != nil { - fmt.Printf("GetSessionFromCookie: decrypt failed: %v\n", err) - return nil - } - fmt.Printf("GetSessionFromCookie: decrypted session ID: %s\n", sessionID) - - session := m.sessions.GetSession(sessionID) - if session == nil { - fmt.Printf("GetSessionFromCookie: session not found in store\n") - } else { - fmt.Printf("GetSessionFromCookie: found session for %s\n", session.Handle) - } - return session -} - -// ClearSessionCookie removes the session cookie -func (m *OAuthManager) ClearSessionCookie(w http.ResponseWriter) { - http.SetCookie(w, &http.Cookie{ - Name: sessionCookieName, - Value: "", - Path: "/", - HttpOnly: true, - Secure: true, - SameSite: http.SameSiteLaxMode, - MaxAge: -1, - }) -} - -// SessionInfo is the public session info returned to the client -type SessionInfo struct { - DID string `json:"did"` - Handle string `json:"handle"` - ExpiresAt time.Time `json:"expires_at"` -} - -// MarshalJSON converts SessionInfo to JSON -func (s *SessionInfo) MarshalJSON() ([]byte, error) { - type Alias SessionInfo - return json.Marshal((*Alias)(s)) -} diff --git a/routes.go b/routes.go deleted file mode 100644 index f486094..0000000 --- a/routes.go +++ /dev/null @@ -1,215 +0,0 @@ -package main - -import ( - "fmt" - "net/http" - "os" - "strings" -) - -func (c *Crawler) StartDashboard(addr string) error { - // Determine base URL for OAuth - baseURL := os.Getenv("OAUTH_BASE_URL") - if baseURL == "" { - // Default based on whether we're in production - if strings.Contains(addr, "0.0.0.0") { - baseURL = "https://app.1440.news" - } else { - baseURL = "http://" + addr - } - } - - // Initialize OAuth manager - oauthCfg, err := LoadOAuthConfig(baseURL) - var oauth *OAuthManager - if err != nil { - fmt.Printf("OAuth not configured: %v (dashboard will be unprotected)\n", err) - } else { - oauth, err = NewOAuthManager(*oauthCfg, c.db) - if err != nil { - fmt.Printf("Failed to initialize OAuth: %v (dashboard will be unprotected)\n", err) - oauth = nil - } else { - fmt.Println("OAuth authentication enabled for dashboard") - } - } - - // OAuth endpoints (always public) - if oauth != nil { - http.HandleFunc("/.well-known/oauth-client-metadata", oauth.HandleClientMetadata) - http.HandleFunc("/.well-known/jwks.json", oauth.HandleJWKS) - http.HandleFunc("/auth/login", oauth.HandleLogin) - http.HandleFunc("/auth/callback", oauth.HandleCallback) - http.HandleFunc("/auth/logout", oauth.HandleLogout) - http.HandleFunc("/auth/session", oauth.HandleSessionInfo) - } - - // Helper to wrap handlers with auth if OAuth is enabled - withAuth := func(h http.HandlerFunc) http.HandlerFunc { - if oauth != nil { - return oauth.RequireAuth(h) - } - return h - } - - http.HandleFunc("/dashboard", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleDashboard(w, r) - })) - - // Root handler for url.1440.news short URLs and 1440.news accounts directory - http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - host := r.Host - // Strip port if present - if idx := strings.Index(host, ":"); idx != -1 { - host = host[:idx] - } - - // If this is url.1440.news, treat path as short code - if host == "url.1440.news" || host == "url.1440.localhost" { - c.handleRedirect(w, r) - return - } - - // If this is 1440.news (apex), serve accounts directory - if host == "1440.news" || host == "1440.localhost" { - if r.URL.Path == "/" || r.URL.Path == "" { - c.handleAccountsDirectory(w, r) - return - } - } - - // Otherwise, redirect to dashboard for root path - if r.URL.Path == "/" { - http.Redirect(w, r, "/dashboard", http.StatusFound) - return - } - - // Unknown path - http.NotFound(w, r) - }) - - http.HandleFunc("/api/stats", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIStats(w, r) - })) - http.HandleFunc("/api/allDomains", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIAllDomains(w, r) - })) - http.HandleFunc("/api/domainFeeds", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIDomainFeeds(w, r) - })) - http.HandleFunc("/api/feedInfo", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIFeedInfo(w, r) - })) - http.HandleFunc("/api/feedItems", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIFeedItems(w, r) - })) - http.HandleFunc("/api/search", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPISearch(w, r) - })) - http.HandleFunc("/api/tlds", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPITLDs(w, r) - })) - http.HandleFunc("/api/searchStats", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPISearchStats(w, r) - })) - http.HandleFunc("/api/tldDomains", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPITLDDomains(w, r) - })) - http.HandleFunc("/api/revisitDomain", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIRevisitDomain(w, r) - })) - http.HandleFunc("/api/priorityCrawl", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIPriorityCrawl(w, r) - })) - // Internal crawl endpoint (no auth) - not exposed via traefik - http.HandleFunc("/internal/crawl", func(w http.ResponseWriter, r *http.Request) { - c.handleAPIPriorityCrawl(w, r) - }) - http.HandleFunc("/api/checkFeed", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPICheckFeed(w, r) - })) - http.HandleFunc("/api/domainsByStatus", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIDomainsByStatus(w, r) - })) - http.HandleFunc("/api/feedsByStatus", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIFeedsByStatus(w, r) - })) - http.HandleFunc("/api/domains", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIDomains(w, r) - })) - http.HandleFunc("/api/feeds", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIFeeds(w, r) - })) - http.HandleFunc("/api/setDomainStatus", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPISetDomainStatus(w, r) - })) - http.HandleFunc("/api/filter", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIFilter(w, r) - })) - http.HandleFunc("/api/enablePublish", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIEnablePublish(w, r) - })) - http.HandleFunc("/api/disablePublish", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIDisablePublish(w, r) - })) - http.HandleFunc("/api/publishEnabled", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIPublishEnabled(w, r) - })) - http.HandleFunc("/api/publishDenied", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIPublishDenied(w, r) - })) - http.HandleFunc("/api/publishCandidates", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIPublishCandidates(w, r) - })) - http.HandleFunc("/api/setPublishStatus", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPISetPublishStatus(w, r) - })) - http.HandleFunc("/api/unpublishedItems", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIUnpublishedItems(w, r) - })) - http.HandleFunc("/api/testPublish", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPITestPublish(w, r) - })) - http.HandleFunc("/api/deriveHandle", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIDeriveHandle(w, r) - })) - http.HandleFunc("/api/publishFeed", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIPublishFeed(w, r) - })) - http.HandleFunc("/api/createAccount", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPICreateAccount(w, r) - })) - http.HandleFunc("/api/publishFeedFull", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIPublishFeedFull(w, r) - })) - http.HandleFunc("/api/updateProfile", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIUpdateProfile(w, r) - })) - http.HandleFunc("/api/languages", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPILanguages(w, r) - })) - http.HandleFunc("/api/denyDomain", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIDenyDomain(w, r) - })) - http.HandleFunc("/api/undenyDomain", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIUndenyDomain(w, r) - })) - http.HandleFunc("/api/dropDomain", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIDropDomain(w, r) - })) - http.HandleFunc("/api/tldStats", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPITLDStats(w, r) - })) - http.HandleFunc("/api/resetAllPublishing", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIResetAllPublishing(w, r) - })) - http.HandleFunc("/api/refreshProfiles", withAuth(func(w http.ResponseWriter, r *http.Request) { - c.handleAPIRefreshProfiles(w, r) - })) - http.HandleFunc("/static/", func(w http.ResponseWriter, r *http.Request) { - http.StripPrefix("/static/", http.FileServer(http.Dir("static"))).ServeHTTP(w, r) - }) - - fmt.Printf("Dashboard running at http://%s\n", addr) - return http.ListenAndServe(addr, nil) -} diff --git a/static/dashboard.css b/static/dashboard.css deleted file mode 100644 index 7407cbb..0000000 --- a/static/dashboard.css +++ /dev/null @@ -1,185 +0,0 @@ -* { box-sizing: border-box; margin: 0; padding: 0; } -body { - font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, monospace; - background: #0a0a0a; - color: #ffffff; - padding: 0 10px; - line-height: 1.6; -} -h1 { color: #ffffff; margin-bottom: 20px; font-size: 24px; } -h2 { color: #ffffff; margin: 4px 0; font-size: 14px; text-transform: uppercase; letter-spacing: 1px; } -h2:first-child { margin-top: 0; } -#topSection { - background: #0a0a0a; - padding: 0 0 4px 0; -} -#topSection.fixed { - position: fixed; - top: 0; - left: 0; - right: 0; - z-index: 100; - padding: 0 10px 4px 10px; - transform: translateY(0); - transition: transform 0.3s ease; -} -#topSection.fixed.hidden { - transform: translateY(-100%); -} -#topSectionSpacer { - display: none; -} -#topSectionSpacer.active { - display: block; -} -#inputCard { margin: 10px 0; } -.grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px; } -.grid-narrow { display: inline-grid; grid-template-columns: none; grid-auto-flow: column; grid-auto-columns: 115px; gap: 10px; margin-bottom: 0; } -.card { - background: #151515; - border: 1px solid #252525; - border-radius: 8px; - padding: 15px; -} -.card.clickable { - cursor: pointer; - transition: background 0.2s, border-color 0.2s, transform 0.1s; -} -.card.clickable:hover { - background: #1a1a1a; - border-color: #0af; -} -.card.clickable:active { - transform: scale(0.98); -} -.card.clickable.active { - background: #1a1a1a; - border-color: #0af; - box-shadow: 0 0 10px rgba(0, 170, 255, 0.3); -} -.stat-value { font-weight: bold; color: #ffffff; text-align: center; } -.stat-label { color: #888; text-transform: uppercase; text-align: center; } -.stat-row { display: flex; justify-content: space-between; padding: 5px 0; border-bottom: 1px solid #202020; color: #ffffff; } -.stat-row:last-child { border-bottom: none; } -.progress-bar { - background: #202020; - border-radius: 4px; - height: 8px; - margin-top: 10px; - overflow: hidden; -} -.progress-fill { - background: linear-gradient(90deg, #00aa55, #00cc66); - height: 100%; - transition: width 0.3s; -} -table { width: 100%; border-collapse: collapse; color: #ffffff; } -th, td { text-align: left; padding: 8px; border-bottom: 1px solid #202020; } -th { color: #ffffff; font-size: 11px; text-transform: uppercase; } -td { font-size: 13px; color: #ffffff; } -.type-rss { color: #f90; } -.type-atom { color: #09f; } -.type-unknown { color: #ffffff; } -.url { - max-width: 400px; - overflow: hidden; - text-overflow: ellipsis; - white-space: nowrap; - color: #4a9eff; -} -.time { color: #ffffff; font-size: 12px; } -.updated { color: #ffffff; font-size: 11px; text-align: right; margin-top: 20px; } - -/* Search */ -#searchInput:focus { outline: none; border-color: #0af; } -#searchInput::placeholder { color: #555; } -.search-host { margin-bottom: 10px; } -.search-feed:hover { background: #1a1a1a; } - -/* Command buttons */ -.cmd-btn { - background: #1a1a1a; - border: 1px solid #333; - border-radius: 4px; - color: #0af; - padding: 6px 12px; - margin-right: 8px; - margin-bottom: 4px; - font-size: 13px; - font-family: monospace; - cursor: pointer; - transition: background 0.2s, border-color 0.2s; -} -.cmd-btn:hover { - background: #252525; - border-color: #0af; -} -.cmd-btn:active { - background: #0af; - color: #000; -} - -/* Visit link */ -.visit-link:hover { - color: #0af !important; -} - -/* TLD Grid */ -.domain-list { - display: flex; - flex-wrap: wrap; - gap: 10px; -} -.tld-section { - width: 135px; - background: #151515; - border: 1px solid #252525; - border-radius: 8px; - cursor: pointer; - transition: background 0.2s, border-color 0.2s; -} -.tld-section:hover { - background: #1a1a1a; - border-color: #0af; -} -.tld-section.expanded { - background: #1a1a1a; - border-color: #0af; - box-shadow: 0 0 10px rgba(0, 170, 255, 0.3); -} -.tld-section .tld-header { - padding: 4px; - text-align: center; -} -.tld-section .tld-name { - color: #0af; - font-weight: normal; - font-size: 10pt; -} -.tld-section .tld-content { - display: none; -} -/* Expanded content shown in separate container */ -#expandedTLDContent { - margin-top: 10px; - background: #151515; - border: 1px solid #0af; - border-radius: 8px; -} -#expandedTLDContent .tld-header { - display: flex; - align-items: center; - padding: 10px; - background: #1a1a1a; - border-bottom: 1px solid #333; - cursor: pointer; -} -#expandedTLDContent .tld-toggle { - color: #666; - margin-right: 10px; -} -#expandedTLDContent .tld-name { - color: #0af; - font-weight: bold; - font-size: 1.1em; -} diff --git a/static/dashboard.js b/static/dashboard.js deleted file mode 100644 index 9d58a11..0000000 --- a/static/dashboard.js +++ /dev/null @@ -1,874 +0,0 @@ -function initDashboard() { - function commaFormat(n) { - return n.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ','); - } - - function escapeHtml(text) { - if (text == null) return ''; - const div = document.createElement('div'); - div.textContent = text; - return div.innerHTML; - } - - // State - let infiniteScrollState = null; - let isLoadingMore = false; - let searchQuery = ''; - let domainFilter = 'all'; // all, pass, skip, hold, dead - // Feed filter: multi-select with ALL as exclusion toggle - // When allSelected=true, selected items are EXCLUDED; when false, selected items are INCLUDED - let feedFilter = { allSelected: false, statuses: [], types: [] }; - let currentOpenTLD = null; // Track which TLD is currently open - - // Smart sticky header - scroll normally, show fixed on scroll up - let lastScrollY = 0; - const topSection = document.getElementById('topSection'); - const spacer = document.getElementById('topSectionSpacer'); - let headerHeight = topSection.offsetHeight; - let isFixed = false; - - window.addEventListener('scroll', () => { - const currentScrollY = window.scrollY; - - // If at top, return to normal flow - if (currentScrollY <= 0) { - topSection.classList.remove('fixed', 'hidden'); - spacer.classList.remove('active'); - isFixed = false; - lastScrollY = currentScrollY; - return; - } - - // Only activate fixed mode after scrolling past the header - if (currentScrollY > headerHeight) { - if (currentScrollY < lastScrollY) { - // Scrolling up - show fixed header - if (!isFixed) { - spacer.style.height = headerHeight + 'px'; - spacer.classList.add('active'); - topSection.classList.add('fixed'); - // Start hidden, then show - topSection.classList.add('hidden'); - requestAnimationFrame(() => { - topSection.classList.remove('hidden'); - }); - isFixed = true; - } else { - topSection.classList.remove('hidden'); - } - } else if (currentScrollY > lastScrollY && isFixed) { - // Scrolling down while fixed - hide it - topSection.classList.add('hidden'); - } - } - - lastScrollY = currentScrollY; - }, { passive: true }); - - // Stat card click handler - document.addEventListener('click', (e) => { - const card = e.target.closest('.card.clickable'); - if (!card) return; - - const filterType = card.dataset.filter; - const status = card.dataset.status; - const type = card.dataset.type; - - if (filterType === 'domain') { - // Remove active from domain cards only - document.querySelectorAll('.card.clickable[data-filter="domain"]').forEach(c => c.classList.remove('active')); - card.classList.add('active'); - domainFilter = status || 'all'; - - // Update placeholder - const searchInput = document.getElementById('searchInput'); - searchInput.placeholder = domainFilter === 'all' ? 'Search domains...' : `Showing ${domainFilter} domains...`; - - // Reload TLD list with new filter - loadFeeds(searchQuery); - } else if (filterType === 'feed') { - const wasActive = card.classList.contains('active'); - - if (status === 'all') { - // ALL card toggles exclusion mode - if (wasActive) { - card.classList.remove('active'); - feedFilter.allSelected = false; - } else { - card.classList.add('active'); - feedFilter.allSelected = true; - } - } else if (status) { - // Status card (pass, skip, hold, dead) - multi-select - if (wasActive) { - card.classList.remove('active'); - feedFilter.statuses = feedFilter.statuses.filter(s => s !== status); - } else { - card.classList.add('active'); - feedFilter.statuses.push(status); - } - } else if (type) { - // Type card (rss, atom, json, unknown, empty) - multi-select - if (wasActive) { - card.classList.remove('active'); - feedFilter.types = feedFilter.types.filter(t => t !== type); - } else { - card.classList.add('active'); - feedFilter.types.push(type); - } - } - - // Reload TLD list with feed filter - loadFeeds(searchQuery); - } - }); - - // Refresh only expanded TLD sections with new domain filter - function refreshExpandedTLDs() { - const expandedContainer = document.getElementById('expandedTLDContent'); - if (expandedContainer && expandedContainer.style.display !== 'none' && expandedContainer.dataset.tld) { - // Mark as needing reload and reload - expandedContainer.dataset.loaded = 'false'; - loadTLDDomains(expandedContainer, searchQuery); - } - } - - // Apply feed filter to currently visible feeds - function applyFeedFilter() { - document.querySelectorAll('.inline-feed-block').forEach(block => { - const feedStatus = block.dataset.status || 'hold'; - const feedType = block.dataset.type || 'unknown'; - - let show = true; - if (feedFilter.status !== 'all' && feedStatus !== feedFilter.status) { - show = false; - } - if (feedFilter.type && feedType !== feedFilter.type) { - show = false; - } - - block.style.display = show ? 'block' : 'none'; - }); - } - - // Event delegation for domain-spacer clicks (toggle feeds) - document.addEventListener('click', (e) => { - const spacer = e.target.closest('.domain-spacer'); - if (spacer) { - const block = spacer.closest('.domain-block'); - if (block) { - const feedsDiv = block.querySelector('.domain-feeds'); - if (feedsDiv) { - const isVisible = feedsDiv.style.display !== 'none'; - feedsDiv.style.display = isVisible ? 'none' : 'block'; - if (!isVisible) { - feedsDiv.querySelectorAll('.inline-feed-block').forEach(feedBlock => { - const itemsDiv = feedBlock.querySelector('.feed-items'); - if (itemsDiv && !itemsDiv.dataset.loaded) { - itemsDiv.dataset.loaded = 'true'; - loadFeedItems(feedBlock.dataset.url, itemsDiv); - } - }); - } - } - } - } - }); - - // Event delegation for feed-url-toggle clicks (toggle feed info) - document.addEventListener('click', (e) => { - const urlToggle = e.target.closest('.feed-url-toggle'); - if (urlToggle) { - const feedBlock = urlToggle.closest('.inline-feed-block'); - if (feedBlock) { - const infoDiv = feedBlock.querySelector('.feed-info'); - if (infoDiv) { - const isVisible = infoDiv.style.display !== 'none'; - infoDiv.style.display = isVisible ? 'none' : 'block'; - if (!isVisible && !infoDiv.dataset.loaded) { - infoDiv.dataset.loaded = 'true'; - loadFeedInfo(feedBlock.dataset.url, infoDiv); - } - } - } - } - }); - - // Event delegation for feed-title-toggle and feed-filler-toggle clicks (toggle items) - document.addEventListener('click', (e) => { - const titleToggle = e.target.closest('.feed-title-toggle'); - const fillerToggle = e.target.closest('.feed-filler-toggle'); - if (titleToggle || fillerToggle) { - const feedBlock = (titleToggle || fillerToggle).closest('.inline-feed-block'); - if (feedBlock) { - const itemsDiv = feedBlock.querySelector('.feed-items'); - if (itemsDiv) { - const isVisible = itemsDiv.style.display !== 'none'; - itemsDiv.style.display = isVisible ? 'none' : 'block'; - } - } - } - }); - - // Load feed info - async function loadFeedInfo(feedUrl, infoDiv) { - infoDiv.innerHTML = 'Loading...'; - try { - const resp = await fetch(`/api/feedInfo?url=${encodeURIComponent(feedUrl)}`); - if (!resp.ok) throw new Error('Failed to load'); - const f = await resp.json(); - - let html = '
'; - const fields = [ - ['URL', f.url], - ['Title', f.title], - ['Description', f.description], - ['Type', f.type], - ['Language', f.language], - ['Site URL', f.siteUrl], - ['Status', f.status], - ['Last Error', f.lastError], - ['Item Count', f.itemCount], - ['Oldest Item', f.oldestItemDate], - ['Newest Item', f.newestItemDate], - ['Discovered', f.discoveredAt], - ['Last Checked', f.lastCheckedAt], - ['Next Check', f.nextCheckAt], - ['Publish Status', f.publishStatus], - ['Publish Account', f.publishAccount], - ]; - fields.forEach(([label, value]) => { - if (value != null && value !== '' && value !== 0) { - html += `${escapeHtml(label)}:${escapeHtml(String(value))}`; - } - }); - html += '
'; - infoDiv.innerHTML = html; - } catch (err) { - infoDiv.innerHTML = `Error: ${escapeHtml(err.message)}`; - } - } - - // Load feed items - async function loadFeedItems(feedUrl, itemsDiv) { - itemsDiv.innerHTML = 'Loading...'; - try { - const resp = await fetch(`/api/feedItems?url=${encodeURIComponent(feedUrl)}&limit=50`); - if (!resp.ok) throw new Error('Failed to load'); - const items = await resp.json(); - - if (!items || items.length === 0) { - // Just clear the items area, keep the feed visible - itemsDiv.innerHTML = ''; - return; - } - - let html = ''; - items.forEach(item => { - const date = item.pub_date ? new Date(item.pub_date).toLocaleDateString() : ''; - html += `
`; - html += `
${escapeHtml(date)} 
`; - if (item.link) { - html += `${escapeHtml(item.title || item.link)}`; - } else { - html += `${escapeHtml(item.title || '(no title)')}`; - } - html += '
'; - }); - itemsDiv.innerHTML = html; - } catch (err) { - itemsDiv.innerHTML = `Error: ${escapeHtml(err.message)}`; - } - } - - // Status colors - const statusConfig = { - hold: { color: '#f90', bg: '#330', border: '#550' }, - skip: { color: '#f66', bg: '#400', border: '#600' }, - pass: { color: '#0f0', bg: '#040', border: '#060' } - }; - - // Render status buttons - function renderStatusBtns(currentStatus, type, id) { - const order = ['pass', 'hold', 'skip']; - let html = '
'; - order.forEach((s, i) => { - const cfg = statusConfig[s]; - const isActive = s === currentStatus; - const bg = isActive ? cfg.bg : '#1a1a1a'; - const border = isActive ? cfg.border : '#333'; - const color = isActive ? cfg.color : '#ccc'; - html += ``; - }); - html += '
'; - return html; - } - - // Render TLD section header - function renderTLDHeader(tld) { - return `
-
- .${escapeHtml(tld)} -
-
`; - } - - function renderTLDFooter(tld) { - return ``; - } - - function closeTLDSection(container, tld) { - const tldContent = container.querySelector(`.tld-section[data-tld="${tld}"] .tld-content`); - if (tldContent) { - tldContent.insertAdjacentHTML('beforeend', renderTLDFooter(tld)); - } - } - - // Event delegation for TLD clicks (toggle section) - document.addEventListener('click', (e) => { - const tldHeader = e.target.closest('.tld-header'); - const tldFooter = e.target.closest('.tld-footer'); - const expandedContainer = document.getElementById('expandedTLDContent'); - - // Handle clicks in expanded container header - if (tldHeader && tldHeader.closest('#expandedTLDContent')) { - // Close the expanded content - const currentSection = document.querySelector('.tld-section.expanded'); - if (currentSection) { - currentSection.classList.remove('expanded'); - } - expandedContainer.style.display = 'none'; - expandedContainer.innerHTML = ''; - currentOpenTLD = null; - // Show TLD list again - const domainList = document.querySelector('.domain-list'); - if (domainList) domainList.style.display = ''; - updateStats(); // Revert to search or all stats - return; - } - - // Handle clicks on TLD cards - if (tldHeader || tldFooter) { - const section = (tldHeader || tldFooter).closest('.tld-section'); - if (section) { - const tld = section.dataset.tld; - const isExpanded = section.classList.contains('expanded'); - - if (isExpanded) { - // Closing this TLD - section.classList.remove('expanded'); - expandedContainer.style.display = 'none'; - expandedContainer.innerHTML = ''; - currentOpenTLD = null; - // Show TLD list again - const domainList = document.querySelector('.domain-list'); - if (domainList) domainList.style.display = ''; - updateStats(); // Revert to search or all stats - } else { - // Close any other open TLD first - document.querySelectorAll('.tld-section.expanded').forEach(s => { - s.classList.remove('expanded'); - }); - - // Opening this TLD - section.classList.add('expanded'); - currentOpenTLD = tld; - // Hide TLD list - const domainList = document.querySelector('.domain-list'); - if (domainList) domainList.style.display = 'none'; - // Show TLD stats (filtered by search if active) - const currentSearch = document.getElementById('searchInput').value.trim(); - updateStatsForTLD(tld, currentSearch); - - // Set up expanded container with header - expandedContainer.innerHTML = ` -
- .${escapeHtml(tld)} -
-
-
Loading...
-
- `; - expandedContainer.style.display = 'block'; - expandedContainer.dataset.tld = tld; - expandedContainer.dataset.loaded = 'false'; - - // Load domains - loadTLDDomains(expandedContainer, searchQuery); - - // Scroll to expanded container - expandedContainer.scrollIntoView({ behavior: 'smooth', block: 'start' }); - } - } - } - }); - - // Update stats for a specific TLD (optionally filtered by search) - async function updateStatsForTLD(tld, search = '') { - try { - let url = `/api/tldStats?tld=${encodeURIComponent(tld)}`; - if (search) { - url += `&search=${encodeURIComponent(search)}`; - } - const resp = await fetch(url); - if (!resp.ok) return; - const stats = await resp.json(); - - document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains || 0); - document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains || 0); - document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains || 0); - document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains || 0); - document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains || 0); - - document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds || 0); - document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds || 0); - document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds || 0); - document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds || 0); - document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds || 0); - document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds || 0); - document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds || 0); - document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds || 0); - document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds || 0); - document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds || 0); - document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds || 0); - - document.getElementById('updatedAt').textContent = search ? `Search "${search}" in .${tld}` : `Stats for .${tld}`; - } catch (err) { - console.error('TLD stats update failed:', err); - } - } - - // Update stats for search results - async function updateStatsForSearch(query) { - try { - const resp = await fetch(`/api/searchStats?search=${encodeURIComponent(query)}`); - if (!resp.ok) { - console.error('Search stats failed:', resp.status); - return; - } - const stats = await resp.json(); - - document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains || 0); - document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains || 0); - document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains || 0); - document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains || 0); - document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains || 0); - - document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds || 0); - document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds || 0); - document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds || 0); - document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds || 0); - document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds || 0); - document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds || 0); - document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds || 0); - document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds || 0); - document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds || 0); - document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds || 0); - document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds || 0); - - document.getElementById('updatedAt').textContent = `Search: "${query}"`; - } catch (err) { - console.error('Search stats update failed:', err); - } - } - - // Render domain row with feeds - function renderDomainRow(d) { - const status = d.status || 'hold'; - - const fullDomain = d.tld ? d.host + '.' + d.tld : d.host; - let html = `
`; - html += `
`; - html += renderStatusBtns(status, 'domain', fullDomain); - html += `${escapeHtml(fullDomain)}`; - - if (d.last_error) { - html += `${escapeHtml(d.last_error)}`; - } else { - html += ' '; - } - html += '
'; - - // Feeds (shown by default in this view) - if (d.feeds && d.feeds.length > 0) { - html += '
'; - d.feeds.forEach(f => { - const feedStatus = f.publish_status || 'hold'; - const feedType = f.type || 'unknown'; - html += `
`; - html += `
`; - - html += `${escapeHtml(f.language || '')} `; - html += renderStatusBtns(feedStatus, 'feed', f.url); - - if (f.item_count > 0) { - html += `${commaFormat(f.item_count)}`; - } else { - html += ``; - } - - let feedPath = f.url; - try { - const urlObj = new URL(f.url.startsWith('http') ? f.url : 'https://' + f.url); - feedPath = urlObj.pathname + urlObj.search; - } catch (e) {} - html += `${escapeHtml(feedPath)}`; - - if (f.title) { - html += `${escapeHtml(f.title)}`; - } - html += ' '; - html += '
'; - html += ''; - html += '
'; - html += '
'; - }); - html += '
'; - html += '
'; - } - html += '
'; - return html; - } - - // Attach status button handlers - function attachStatusHandlers(container) { - container.querySelectorAll('.status-btn:not(.btn-handled)').forEach(btn => { - btn.classList.add('btn-handled'); - btn.addEventListener('click', async (e) => { - e.stopPropagation(); - const type = btn.dataset.type; - const id = btn.dataset.id; - const newStatus = btn.dataset.status; - - const endpoint = type === 'domain' ? '/api/setDomainStatus' : '/api/setPublishStatus'; - const param = type === 'domain' ? 'host' : 'url'; - - try { - const resp = await fetch(`${endpoint}?${param}=${encodeURIComponent(id)}&status=${newStatus}`); - if (resp.ok) { - const group = btn.closest('.status-btn-group'); - group.querySelectorAll('.status-btn').forEach(b => { - const s = b.dataset.status; - const cfg = statusConfig[s]; - const isActive = s === newStatus; - b.style.background = isActive ? cfg.bg : '#111'; - b.style.borderColor = isActive ? cfg.border : '#333'; - b.style.color = isActive ? cfg.color : '#444'; - }); - const block = btn.closest(type === 'domain' ? '.domain-block' : '.inline-feed-block'); - if (block) block.dataset.status = newStatus; - } - } catch (err) { - console.error('Status update failed:', err); - } - }); - }); - } - - // Infinite scroll - function setupInfiniteScroll(loadMoreFn) { - infiniteScrollState = { loadMore: loadMoreFn, ended: false }; - } - - function clearInfiniteScroll() { - infiniteScrollState = null; - } - - async function checkInfiniteScroll() { - if (!infiniteScrollState || infiniteScrollState.ended || isLoadingMore) return; - const scrollY = window.scrollY + window.innerHeight; - const docHeight = document.documentElement.scrollHeight; - if (scrollY > docHeight - 500) { - isLoadingMore = true; - await infiniteScrollState.loadMore(); - isLoadingMore = false; - } - } - - window.addEventListener('scroll', checkInfiniteScroll); - - // Load and display feeds with lazy-loading TLD sections - let tldObserver = null; - - async function loadFeeds(query = '') { - const output = document.getElementById('output'); - output.innerHTML = '
Loading TLDs...
'; - - // Disconnect previous observer if any - if (tldObserver) { - tldObserver.disconnect(); - } - - try { - // Fetch TLDs with optional domain status filter, feed filter, and search - let tldsUrl = '/api/tlds'; - const params = []; - if (domainFilter !== 'all') { - params.push(`status=${domainFilter}`); - } - // Add feed filter params if any are selected - if (feedFilter.allSelected || feedFilter.statuses.length > 0 || feedFilter.types.length > 0) { - if (feedFilter.allSelected) { - params.push('feedMode=exclude'); - } else { - params.push('feedMode=include'); - } - if (feedFilter.statuses.length > 0) { - params.push(`feedStatuses=${feedFilter.statuses.join(',')}`); - } - if (feedFilter.types.length > 0) { - params.push(`feedTypes=${feedFilter.types.join(',')}`); - } - } - if (query) { - params.push(`search=${encodeURIComponent(query)}`); - } - if (params.length > 0) { - tldsUrl += '?' + params.join('&'); - } - const tldsResp = await fetch(tldsUrl); - if (!tldsResp.ok) { - const errText = await tldsResp.text(); - throw new Error(`HTTP ${tldsResp.status}: ${errText}`); - } - const tlds = await tldsResp.json(); - - if (!tlds || tlds.length === 0) { - // Update stats for empty results - if (query) { - await updateStatsForSearch(query); - } else { - await updateStats(); - } - document.getElementById('infiniteLoader').textContent = query ? 'No matches found' : 'No feeds found'; - return; - } - - const container = output.querySelector('.domain-list'); - - // Render all TLD sections as card placeholders - tlds.forEach(t => { - const tld = t.tld || 'unknown'; - container.insertAdjacentHTML('beforeend', ` -
-
- .${escapeHtml(tld)} -
- -
- `); - }); - - document.getElementById('infiniteLoader').textContent = ''; - - // Auto-expand if single TLD match, otherwise update stats for search/all - if (tlds.length === 1) { - const tld = tlds[0].tld; - const expandedContainer = document.getElementById('expandedTLDContent'); - const section = output.querySelector('.tld-section'); - - if (section && expandedContainer) { - // Mark as expanded - section.classList.add('expanded'); - currentOpenTLD = tld; - // Hide TLD list - const domainList = document.querySelector('.domain-list'); - if (domainList) domainList.style.display = 'none'; - - // Set up expanded container - expandedContainer.innerHTML = ` -
- .${escapeHtml(tld)} -
-
-
Loading...
-
- `; - expandedContainer.style.display = 'block'; - expandedContainer.dataset.tld = tld; - expandedContainer.dataset.loaded = 'false'; - - // Load domains - loadTLDDomains(expandedContainer, query); - - // Show TLD stats (filtered by search if active) - await updateStatsForTLD(tld, query); - } - } else { - // Multiple TLDs - show search or global stats - if (query) { - await updateStatsForSearch(query); - } else { - await updateStats(); - } - } - - } catch (err) { - document.getElementById('infiniteLoader').textContent = 'Error: ' + err.message; - } - } - - // Load domains for a specific TLD section - async function loadTLDDomains(section, query = '') { - const tld = section.dataset.tld; - section.dataset.loaded = 'loading'; - - try { - let url = `/api/domains?tld=${encodeURIComponent(tld)}&limit=500`; - if (domainFilter !== 'all') { - url += `&status=${domainFilter}`; - } - if (query) { - url += `&search=${encodeURIComponent(query)}`; - } - // Apply feed filter if any feed cards are selected - if (feedFilter.allSelected || feedFilter.statuses.length > 0 || feedFilter.types.length > 0) { - if (feedFilter.allSelected) { - url += '&feedMode=exclude'; - } else { - url += '&feedMode=include'; - } - if (feedFilter.statuses.length > 0) { - url += `&feedStatuses=${feedFilter.statuses.join(',')}`; - } - if (feedFilter.types.length > 0) { - url += `&feedTypes=${feedFilter.types.join(',')}`; - } - } - - const resp = await fetch(url); - if (!resp.ok) throw new Error(`HTTP ${resp.status}`); - const domains = await resp.json(); - - const content = section.querySelector('.tld-content'); - content.innerHTML = ''; - - if (!domains || domains.length === 0) { - content.innerHTML = '
No domains with feeds
'; - } else { - domains.forEach(d => { - content.insertAdjacentHTML('beforeend', renderDomainRow(d)); - }); - // Add footer - content.insertAdjacentHTML('beforeend', renderTLDFooter(tld)); - attachStatusHandlers(content); - - // Load items for all feeds - content.querySelectorAll('.inline-feed-block').forEach(feedBlock => { - const itemsDiv = feedBlock.querySelector('.feed-items'); - if (itemsDiv && !itemsDiv.dataset.loaded) { - itemsDiv.dataset.loaded = 'true'; - loadFeedItems(feedBlock.dataset.url, itemsDiv); - } - }); - } - - section.dataset.loaded = 'true'; - } catch (err) { - const content = section.querySelector('.tld-content'); - content.innerHTML = `
Error: ${escapeHtml(err.message)}
`; - section.dataset.loaded = 'false'; - } - } - - // Search handler - const searchInput = document.getElementById('searchInput'); - function doSearch() { - searchQuery = searchInput.value.trim(); - loadFeeds(searchQuery); - } - - // Search on button click - document.getElementById('searchBtn').addEventListener('click', doSearch); - - // Clear button - clears search and resets all filters - document.getElementById('clearBtn').addEventListener('click', () => { - searchInput.value = ''; - searchQuery = ''; - // Reset filters to default - domainFilter = 'all'; - feedFilter = { allSelected: false, statuses: [], types: [] }; - // Reset active card styling - document.querySelectorAll('.card.clickable.active').forEach(c => c.classList.remove('active')); - document.querySelector('.card.clickable[data-filter="domain"][data-status="all"]')?.classList.add('active'); - searchInput.placeholder = 'Search domains...'; - // Close any expanded TLD - currentOpenTLD = null; - const expandedContainer = document.getElementById('expandedTLDContent'); - if (expandedContainer) { - expandedContainer.style.display = 'none'; - expandedContainer.innerHTML = ''; - } - // Show TLD list if hidden - const domainList = document.querySelector('.domain-list'); - if (domainList) domainList.style.display = ''; - // Reload and update stats - loadFeeds(); - }); - - // Search on Enter key - searchInput.addEventListener('keydown', (e) => { - if (e.key === 'Enter') { - e.preventDefault(); - doSearch(); - } - }); - - // Initial load - set default active cards and load - document.querySelector('.card.clickable[data-filter="domain"][data-status="all"]')?.classList.add('active'); - loadFeeds(); - - // Update stats periodically - async function updateStats() { - // Check actual input value for current search state - const currentSearch = document.getElementById('searchInput')?.value.trim() || ''; - - // Priority: open TLD > search query > all - if (currentOpenTLD) { - updateStatsForTLD(currentOpenTLD, currentSearch); - return; - } - if (currentSearch) { - updateStatsForSearch(currentSearch); - return; - } - - try { - const resp = await fetch('/api/stats'); - if (!resp.ok) throw new Error(`HTTP ${resp.status}`); - const stats = await resp.json(); - document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains); - document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains); - document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains); - document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains); - document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains); - document.getElementById('domainCheckRate').textContent = commaFormat(stats.domain_check_rate); - document.getElementById('feedCrawlRate').textContent = commaFormat(stats.feed_crawl_rate); - document.getElementById('feedCheckRate').textContent = commaFormat(stats.feed_check_rate); - document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds); - document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds); - document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds); - document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds); - document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds); - document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds); - document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds); - document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds); - document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds); - document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds); - document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds); - document.getElementById('updatedAt').textContent = 'All TLDs - ' + new Date().toLocaleTimeString(); - } catch (err) { - console.error('Stats update failed:', err); - } - } - - setInterval(updateStats, 60000); -} - -document.addEventListener('DOMContentLoaded', initDashboard); diff --git a/templates.go b/templates.go deleted file mode 100644 index f46a092..0000000 --- a/templates.go +++ /dev/null @@ -1,552 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "html/template" - "io" - "net/http" - "os" - "strings" - "time" -) - -// PDSAccount represents a Bluesky account on the PDS -type PDSAccount struct { - DID string `json:"did"` - Handle string `json:"handle"` - DisplayName string `json:"displayName"` - Description string `json:"description"` - Avatar string `json:"avatar"` -} - -// handleAccountsDirectory serves the 1440.news accounts directory page -func (c *Crawler) handleAccountsDirectory(w http.ResponseWriter, r *http.Request) { - pdsHost := os.Getenv("PDS_HOST") - if pdsHost == "" { - pdsHost = "https://pds.1440.news" - } - - // Fetch all repos from PDS - listReposURL := pdsHost + "/xrpc/com.atproto.sync.listRepos?limit=1000" - resp, err := http.Get(listReposURL) - if err != nil { - http.Error(w, "Failed to fetch accounts: "+err.Error(), http.StatusInternalServerError) - return - } - defer resp.Body.Close() - - body, _ := io.ReadAll(resp.Body) - var reposResp struct { - Repos []struct { - DID string `json:"did"` - Head string `json:"head"` - Active bool `json:"active"` - } `json:"repos"` - } - if err := json.Unmarshal(body, &reposResp); err != nil { - http.Error(w, "Failed to parse repos: "+err.Error(), http.StatusInternalServerError) - return - } - - // Fetch profile for each account using unauthenticated endpoints - var accounts []PDSAccount - client := &http.Client{Timeout: 5 * time.Second} - - for _, repo := range reposResp.Repos { - if !repo.Active { - continue - } - - // Get handle using describeRepo - describeURL := pdsHost + "/xrpc/com.atproto.repo.describeRepo?repo=" + repo.DID - describeResp, err := client.Get(describeURL) - if err != nil { - continue - } - describeBody, _ := io.ReadAll(describeResp.Body) - describeResp.Body.Close() - - var repoInfo struct { - Handle string `json:"handle"` - DID string `json:"did"` - } - if err := json.Unmarshal(describeBody, &repoInfo); err != nil { - continue - } - - // Skip the main 1440.news account (directory account itself) - if repoInfo.Handle == "1440.news" { - continue - } - - account := PDSAccount{ - DID: repoInfo.DID, - Handle: repoInfo.Handle, - } - - // Get profile record for display name, description, avatar - recordURL := pdsHost + "/xrpc/com.atproto.repo.getRecord?repo=" + repo.DID + "&collection=app.bsky.actor.profile&rkey=self" - recordResp, err := client.Get(recordURL) - if err == nil { - recordBody, _ := io.ReadAll(recordResp.Body) - recordResp.Body.Close() - - var record struct { - Value struct { - DisplayName string `json:"displayName"` - Description string `json:"description"` - Avatar struct { - Ref struct { - Link string `json:"$link"` - } `json:"ref"` - } `json:"avatar"` - } `json:"value"` - } - if json.Unmarshal(recordBody, &record) == nil { - account.DisplayName = record.Value.DisplayName - account.Description = record.Value.Description - if record.Value.Avatar.Ref.Link != "" { - account.Avatar = pdsHost + "/xrpc/com.atproto.sync.getBlob?did=" + repo.DID + "&cid=" + record.Value.Avatar.Ref.Link - } - } - } - - accounts = append(accounts, PDSAccount{ - DID: account.DID, - Handle: account.Handle, - DisplayName: account.DisplayName, - Description: account.Description, - Avatar: account.Avatar, - }) - } - - // Render the page - tmpl := template.Must(template.New("accounts").Parse(accountsDirectoryHTML)) - w.Header().Set("Content-Type", "text/html; charset=utf-8") - tmpl.Execute(w, map[string]interface{}{ - "Accounts": accounts, - "Count": len(accounts), - }) -} - -func (c *Crawler) handleDashboard(w http.ResponseWriter, r *http.Request) { - stats, err := c.GetDashboardStats() - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - funcMap := template.FuncMap{ - "pct": func(a, b int) float64 { - if b == 0 { - return 0 - } - return float64(a) * 100.0 / float64(b) - }, - "comma": func(n interface{}) string { - var val int - switch v := n.(type) { - case int: - val = v - case int32: - val = int(v) - case int64: - val = int(v) - default: - return "0" - } - if val < 0 { - return "-" + commaFormat(-val) - } - return commaFormat(val) - }, - } - - tmpl, err := template.New("dashboard").Funcs(funcMap).Parse(dashboardHTML) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "text/html") - tmpl.Execute(w, stats) -} - -func (c *Crawler) handleAPIStats(w http.ResponseWriter, r *http.Request) { - stats, err := c.GetDashboardStats() - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(stats) -} - -// handleRedirect handles short URL redirects for url.1440.news -func (c *Crawler) handleRedirect(w http.ResponseWriter, r *http.Request) { - code := strings.TrimPrefix(r.URL.Path, "/") - if code == "" { - http.NotFound(w, r) - return - } - - // Look up the short URL - shortURL, err := c.GetShortURL(code) - if err != nil { - http.NotFound(w, r) - return - } - - // Record the click asynchronously - go func() { - if err := c.RecordClick(code, r); err != nil { - fmt.Printf("Failed to record click for %s: %v\n", code, err) - } - }() - - // Redirect to original URL - http.Redirect(w, r, shortURL.OriginalURL, http.StatusFound) -} - -const accountsDirectoryHTML = ` - - - 1440.news - News Feed Directory - - - - - -
-
-

1440.news

-

Curated news feeds on Bluesky

-

{{.Count}} feeds available

- -
- -
- {{range .Accounts}} - - {{else}} -

No feeds available yet.

- {{end}} -
-

No feeds match your search.

- - -
- - - -` - -const dashboardHTML = ` - - - 1440.news Feed Crawler - - - - - - -
-

Domains

-
-
-
{{comma .TotalDomains}}
-
All
-
-
-
{{comma .PassDomains}}
-
Pass
-
-
-
{{comma .SkipDomains}}
-
Skip
-
-
-
{{comma .HoldDomains}}
-
Hold
-
-
-
{{comma .DeadDomains}}
-
Dead
-
-
-
{{comma .DomainCheckRate}}
-
alive/min
-
-
-
{{comma .FeedCrawlRate}}
-
crawl/min
-
-
-
{{comma .FeedCheckRate}}
-
check/min
-
-
- -

Feeds

-
-
-
{{comma .TotalFeeds}}
-
All
-
-
-
{{comma .AliveFeeds}}
-
Alive
-
-
-
{{comma .PublishFeeds}}
-
Pass
-
-
-
{{comma .SkipFeeds}}
-
Skip
-
-
-
{{comma .HoldFeeds}}
-
Hold
-
-
-
{{comma .DeadFeeds}}
-
Dead
-
-
-
{{comma .EmptyFeeds}}
-
Empty
-
-
-
{{comma .RSSFeeds}}
-
RSS
-
-
-
{{comma .AtomFeeds}}
-
Atom
-
-
-
{{comma .JSONFeeds}}
-
JSON
-
-
-
{{comma .UnknownFeeds}}
-
Unknown
-
-
- -
- - - -
-
-
- -
-
-
- -
Last updated: {{.UpdatedAt.Format "2006-01-02 15:04:05"}}
- -`