package main import ( "bufio" "encoding/json" "fmt" "net/http" "os" "strings" "github.com/jackc/pgx/v5" ) func (c *Crawler) handleAPIAllDomains(w http.ResponseWriter, r *http.Request) { offset := 0 limit := 100 if o := r.URL.Query().Get("offset"); o != "" { fmt.Sscanf(o, "%d", &offset) } if l := r.URL.Query().Get("limit"); l != "" { fmt.Sscanf(l, "%d", &limit) if limit > 100 { limit = 100 } } // Serve from cache (updated once per minute in background) c.statsMu.RLock() cached := c.cachedAllDomains c.statsMu.RUnlock() var domains []DomainStat if cached != nil && offset < len(cached) { end := offset + limit if end > len(cached) { end = len(cached) } domains = cached[offset:end] } if domains == nil { domains = []DomainStat{} } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(domains) } // handleAPIDomains lists domains with optional status filter, including their feeds func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) { status := r.URL.Query().Get("status") hasFeeds := r.URL.Query().Get("has_feeds") == "true" search := r.URL.Query().Get("search") tldFilter := r.URL.Query().Get("tld") limit := 100 offset := 0 if l := r.URL.Query().Get("limit"); l != "" { fmt.Sscanf(l, "%d", &limit) if limit > 500 { limit = 500 } } if o := r.URL.Query().Get("offset"); o != "" { fmt.Sscanf(o, "%d", &offset) } // First get domains var rows pgx.Rows var err error if hasFeeds { // Only domains with feeds searchPattern := "%" + strings.ToLower(search) + "%" if tldFilter != "" { // Filter by specific TLD rows, err = c.db.Query(` SELECT d.host, d.tld, d.status, d.last_error, f.feed_count FROM domains d INNER JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds WHERE item_count > 0 GROUP BY source_host ) f ON d.host = f.source_host WHERE d.status != 'skip' AND d.tld = $1 ORDER BY d.host ASC LIMIT $2 OFFSET $3 `, tldFilter, limit, offset) } else if search != "" { // Search in domain host or feed title/url rows, err = c.db.Query(` SELECT DISTINCT d.host, d.tld, d.status, d.last_error, f.feed_count FROM domains d INNER JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds WHERE item_count > 0 GROUP BY source_host ) f ON d.host = f.source_host LEFT JOIN feeds fe ON d.host = fe.source_host WHERE d.status != 'skip' AND (LOWER(d.host) LIKE $1 OR LOWER(fe.title) LIKE $1 OR LOWER(fe.url) LIKE $1) ORDER BY d.tld ASC, d.host ASC LIMIT $2 OFFSET $3 `, searchPattern, limit, offset) } else if status != "" { rows, err = c.db.Query(` SELECT d.host, d.tld, d.status, d.last_error, f.feed_count FROM domains d INNER JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds WHERE item_count > 0 GROUP BY source_host ) f ON d.host = f.source_host WHERE d.status = $1 ORDER BY d.tld ASC, d.host ASC LIMIT $2 OFFSET $3 `, status, limit, offset) } else { // Default: exclude 'skip' status domains rows, err = c.db.Query(` SELECT d.host, d.tld, d.status, d.last_error, f.feed_count FROM domains d INNER JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds WHERE item_count > 0 GROUP BY source_host ) f ON d.host = f.source_host WHERE d.status != 'skip' ORDER BY d.tld ASC, d.host ASC LIMIT $1 OFFSET $2 `, limit, offset) } } else if status != "" { rows, err = c.db.Query(` SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count FROM domains d LEFT JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds GROUP BY source_host ) f ON d.host = f.source_host WHERE d.status = $1 ORDER BY d.tld ASC, d.host ASC LIMIT $2 OFFSET $3 `, status, limit, offset) } else { // Default: exclude 'skip' status domains rows, err = c.db.Query(` SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count FROM domains d LEFT JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds GROUP BY source_host ) f ON d.host = f.source_host WHERE d.status != 'skip' ORDER BY d.tld ASC, d.host ASC LIMIT $1 OFFSET $2 `, limit, offset) } if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer rows.Close() type FeedInfo struct { URL string `json:"url"` Title string `json:"title,omitempty"` Type string `json:"type,omitempty"` Status string `json:"status,omitempty"` PublishStatus string `json:"publish_status,omitempty"` Language string `json:"language,omitempty"` ItemCount int `json:"item_count,omitempty"` } type DomainInfo struct { Host string `json:"host"` TLD string `json:"tld"` Status string `json:"status"` LastError string `json:"last_error,omitempty"` FeedCount int `json:"feed_count"` Feeds []FeedInfo `json:"feeds,omitempty"` } var domains []DomainInfo var hosts []string for rows.Next() { var d DomainInfo var tld, lastError *string if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil { continue } d.TLD = StringValue(tld) d.LastError = StringValue(lastError) domains = append(domains, d) hosts = append(hosts, d.Host) } // Now get feeds for these domains (with actual item count from items table) if len(hosts) > 0 { feedRows, err := c.db.Query(` SELECT f.source_host, f.url, f.title, f.type, f.status, f.publish_status, f.language, (SELECT COUNT(*) FROM items WHERE feed_url = f.url) as item_count FROM feeds f WHERE f.source_host = ANY($1) ORDER BY f.source_host, f.url `, hosts) if err == nil { defer feedRows.Close() feedsByHost := make(map[string][]FeedInfo) for feedRows.Next() { var host string var f FeedInfo var title, feedType, status, publishStatus, language *string var itemCount *int if err := feedRows.Scan(&host, &f.URL, &title, &feedType, &status, &publishStatus, &language, &itemCount); err != nil { continue } f.Title = StringValue(title) f.Type = StringValue(feedType) f.Status = StringValue(status) f.PublishStatus = StringValue(publishStatus) f.Language = StringValue(language) if itemCount != nil { f.ItemCount = *itemCount } feedsByHost[host] = append(feedsByHost[host], f) } // Attach feeds to domains for i := range domains { if feeds, ok := feedsByHost[domains[i].Host]; ok { domains[i].Feeds = feeds } } } } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(domains) } func (c *Crawler) handleAPIDomainsByStatus(w http.ResponseWriter, r *http.Request) { status := r.URL.Query().Get("status") if status == "" { http.Error(w, "status parameter required", http.StatusBadRequest) return } limit := 100 offset := 0 if l := r.URL.Query().Get("limit"); l != "" { fmt.Sscanf(l, "%d", &limit) if limit > 500 { limit = 500 } } if o := r.URL.Query().Get("offset"); o != "" { fmt.Sscanf(o, "%d", &offset) } rows, err := c.db.Query(` SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count FROM domains d LEFT JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds GROUP BY source_host ) f ON d.host = f.source_host WHERE d.status = $1 ORDER BY d.tld ASC, d.host ASC LIMIT $2 OFFSET $3 `, status, limit, offset) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer rows.Close() type DomainInfo struct { Host string `json:"host"` TLD string `json:"tld"` Status string `json:"status"` LastError string `json:"last_error,omitempty"` FeedCount int `json:"feed_count"` } var domains []DomainInfo for rows.Next() { var d DomainInfo var tld, lastError *string if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil { continue } d.TLD = StringValue(tld) d.LastError = StringValue(lastError) domains = append(domains, d) } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(domains) } func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) { host := r.URL.Query().Get("host") if host == "" { http.Error(w, "host parameter required", http.StatusBadRequest) return } limit := 100 offset := 0 if l := r.URL.Query().Get("limit"); l != "" { fmt.Sscanf(l, "%d", &limit) if limit > 500 { limit = 500 } } if o := r.URL.Query().Get("offset"); o != "" { fmt.Sscanf(o, "%d", &offset) } rows, err := c.db.Query(` SELECT url, title, type, status, last_error, item_count, publish_status, language FROM feeds WHERE source_host = $1 ORDER BY url ASC LIMIT $2 OFFSET $3 `, host, limit, offset) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer rows.Close() type FeedInfo struct { URL string `json:"url"` Title string `json:"title"` Type string `json:"type"` Status string `json:"status,omitempty"` LastError string `json:"last_error,omitempty"` ItemCount int `json:"item_count,omitempty"` PublishStatus string `json:"publish_status,omitempty"` Language string `json:"language,omitempty"` } var feeds []FeedInfo for rows.Next() { var f FeedInfo var title, status, lastError, publishStatus, language *string var itemCount *int if err := rows.Scan(&f.URL, &title, &f.Type, &status, &lastError, &itemCount, &publishStatus, &language); err != nil { continue } f.Title = StringValue(title) f.Status = StringValue(status) f.LastError = StringValue(lastError) f.PublishStatus = StringValue(publishStatus) f.Language = StringValue(language) if itemCount != nil { f.ItemCount = *itemCount } feeds = append(feeds, f) } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(feeds) } // handleAPISetDomainStatus sets the status for a domain // status must be 'hold', 'pass', or 'skip' (use /api/dropDomain for 'drop') func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Request) { host := r.URL.Query().Get("host") status := r.URL.Query().Get("status") if host == "" { http.Error(w, "host parameter required", http.StatusBadRequest) return } if status != "hold" && status != "pass" && status != "skip" { http.Error(w, "status must be 'hold', 'pass', or 'skip' (use /api/dropDomain for permanent deletion)", http.StatusBadRequest) return } host = normalizeHost(host) // Setting to 'skip' triggers takedown (hide content but preserve data) if status == "skip" { result := c.skipDomain(host) if result.Error != "" { http.Error(w, result.Error, http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(result) return } // When setting to pass, clear any last_error var err error if status == "pass" { _, err = c.db.Exec(` UPDATE domains SET status = $1, last_error = NULL WHERE host = $2 `, status, host) } else { _, err = c.db.Exec(` UPDATE domains SET status = $1 WHERE host = $2 `, status, host) } if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{ "host": host, "status": status, }) } func (c *Crawler) handleAPIRevisitDomain(w http.ResponseWriter, r *http.Request) { host := r.URL.Query().Get("host") if host == "" { http.Error(w, "host parameter required", http.StatusBadRequest) return } _, err := c.db.Exec(` UPDATE domains SET status = 'pass', last_checked_at = NULL, last_crawled_at = NULL, last_error = NULL WHERE host = $1 `, host) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{"status": "queued", "host": host}) } // handleAPIPriorityCrawl immediately crawls a domain (adds it if not exists) func (c *Crawler) handleAPIPriorityCrawl(w http.ResponseWriter, r *http.Request) { host := r.URL.Query().Get("host") if host == "" { http.Error(w, "host parameter required", http.StatusBadRequest) return } host = normalizeHost(host) // Add domain if it doesn't exist, or reset to pass for crawling _, err := c.db.Exec(` INSERT INTO domains (host, status, discovered_at, tld) VALUES ($1, 'pass', NOW(), $2) ON CONFLICT(host) DO UPDATE SET status = 'pass', last_checked_at = NULL, last_crawled_at = NULL, last_error = NULL `, host, getTLD(host)) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } // Crawl synchronously fmt.Printf("Priority crawl: %s\n", host) feedsFound, crawlErr := c.crawlHost(host) errStr := "" if crawlErr != nil { errStr = crawlErr.Error() } // Mark as crawled c.markDomainCrawled(host, feedsFound, errStr) // Get the feeds we found feeds, _ := c.GetFeedsByHost(host) type FeedSummary struct { URL string `json:"url"` Title string `json:"title"` Type string `json:"type"` Category string `json:"category"` Status string `json:"status"` } var feedSummaries []FeedSummary for _, f := range feeds { feedSummaries = append(feedSummaries, FeedSummary{ URL: f.URL, Title: f.Title, Type: f.Type, Category: f.Category, Status: f.Status, }) } result := map[string]interface{}{ "host": host, "feeds_found": feedsFound, "feeds": feedSummaries, } if crawlErr != nil { result["error"] = crawlErr.Error() } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(result) } // handleAPIFilter handles flexible filtering with stackable parameters func (c *Crawler) handleAPIFilter(w http.ResponseWriter, r *http.Request) { tld := r.URL.Query().Get("tld") domain := r.URL.Query().Get("domain") feedStatus := r.URL.Query().Get("feedStatus") domainStatus := r.URL.Query().Get("domainStatus") languages := r.URL.Query().Get("languages") // comma-separated list show := r.URL.Query().Get("show") // "feeds" or "domains" sort := r.URL.Query().Get("sort") // "alpha" or "feeds" limit := 100 offset := 0 if l := r.URL.Query().Get("limit"); l != "" { fmt.Sscanf(l, "%d", &limit) if limit > 500 { limit = 500 } } if o := r.URL.Query().Get("offset"); o != "" { fmt.Sscanf(o, "%d", &offset) } // Parse languages into slice var langList []string if languages != "" { for _, lang := range strings.Split(languages, ",") { lang = strings.TrimSpace(lang) if lang != "" { langList = append(langList, lang) } } } // Determine what to show based on filters if show == "" { if feedStatus != "" || domain != "" || len(langList) > 0 { show = "feeds" } else { show = "domains" } } if show == "feeds" { c.filterFeeds(w, tld, domain, feedStatus, langList, limit, offset) } else { c.filterDomains(w, tld, domainStatus, sort, limit, offset) } } func (c *Crawler) filterDomains(w http.ResponseWriter, tld, status, sort string, limit, offset int) { var args []interface{} argNum := 1 query := ` SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count FROM domains d LEFT JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds GROUP BY source_host ) f ON d.host = f.source_host WHERE 1=1` if tld != "" { query += fmt.Sprintf(" AND d.tld = $%d", argNum) args = append(args, tld) argNum++ } if status != "" { query += fmt.Sprintf(" AND d.status = $%d", argNum) args = append(args, status) argNum++ } // Sort by feed count descending or alphabetically if sort == "feeds" { query += fmt.Sprintf(" ORDER BY feed_count DESC, d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) } else { query += fmt.Sprintf(" ORDER BY d.tld ASC, d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) } args = append(args, limit, offset) rows, err := c.db.Query(query, args...) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer rows.Close() type DomainInfo struct { Host string `json:"host"` TLD string `json:"tld"` Status string `json:"status"` LastError string `json:"last_error,omitempty"` FeedCount int `json:"feed_count"` } var domains []DomainInfo for rows.Next() { var d DomainInfo var tldVal, lastError *string if err := rows.Scan(&d.Host, &tldVal, &d.Status, &lastError, &d.FeedCount); err != nil { continue } d.TLD = StringValue(tldVal) d.LastError = StringValue(lastError) domains = append(domains, d) } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ "type": "domains", "data": domains, }) } func (c *Crawler) handleAPITLDDomains(w http.ResponseWriter, r *http.Request) { tld := r.URL.Query().Get("tld") if tld == "" { http.Error(w, "tld parameter required", http.StatusBadRequest) return } limit := 100 offset := 0 if l := r.URL.Query().Get("limit"); l != "" { fmt.Sscanf(l, "%d", &limit) if limit > 500 { limit = 500 } } if o := r.URL.Query().Get("offset"); o != "" { fmt.Sscanf(o, "%d", &offset) } rows, err := c.db.Query(` SELECT d.host, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count FROM domains d LEFT JOIN ( SELECT source_host, COUNT(*) as feed_count FROM feeds GROUP BY source_host ) f ON d.host = f.source_host WHERE d.tld = $1 ORDER BY d.tld ASC, d.host ASC LIMIT $2 OFFSET $3 `, tld, limit, offset) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer rows.Close() type DomainInfo struct { Host string `json:"host"` Status string `json:"status"` LastError string `json:"last_error,omitempty"` FeedCount int `json:"feed_count"` } var domains []DomainInfo for rows.Next() { var d DomainInfo var lastError *string if err := rows.Scan(&d.Host, &d.Status, &lastError, &d.FeedCount); err != nil { continue } d.LastError = StringValue(lastError) domains = append(domains, d) } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(domains) } func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) { hasFeeds := r.URL.Query().Get("has_feeds") == "true" var rows pgx.Rows var err error if hasFeeds { // Only TLDs that have domains with feeds rows, err = c.db.Query(` SELECT DISTINCT d.tld, COUNT(DISTINCT d.host) as domain_count FROM domains d INNER JOIN feeds f ON d.host = f.source_host WHERE d.tld IS NOT NULL AND d.tld != '' GROUP BY d.tld ORDER BY d.tld ASC `) } else { // All TLDs rows, err = c.db.Query(` SELECT tld, COUNT(*) as domain_count FROM domains WHERE tld IS NOT NULL AND tld != '' GROUP BY tld ORDER BY tld ASC `) } if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer rows.Close() type TLDInfo struct { TLD string `json:"tld"` DomainCount int `json:"domain_count"` } var tlds []TLDInfo for rows.Next() { var t TLDInfo if err := rows.Scan(&t.TLD, &t.DomainCount); err != nil { continue } tlds = append(tlds, t) } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(tlds) } func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) { tld := r.URL.Query().Get("tld") if tld == "" { http.Error(w, "tld parameter required", http.StatusBadRequest) return } var domainCount, feedCount int err := c.db.QueryRow(`SELECT COUNT(*) FROM domains WHERE tld = $1`, tld).Scan(&domainCount) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } err = c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE tld = $1`, tld).Scan(&feedCount) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ "tld": tld, "domain_count": domainCount, "feed_count": feedCount, }) } // handleAPIDenyDomain skips a domain (takedown accounts, preserve data) func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) { host := r.URL.Query().Get("host") if host == "" { http.Error(w, "host parameter required", http.StatusBadRequest) return } result := c.skipDomain(host) if result.Error != "" { http.Error(w, result.Error, http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(result) } // DomainActionResult contains the results of a domain action type DomainActionResult struct { Success bool `json:"success"` Host string `json:"host"` Action string `json:"action"` FeedsAffected int64 `json:"feeds_affected,omitempty"` ItemsDeleted int64 `json:"items_deleted,omitempty"` AccountsAffected int `json:"accounts_affected,omitempty"` AccountErrors []string `json:"account_errors,omitempty"` Error string `json:"error,omitempty"` } // getPDSCredentials loads PDS credentials from environment or pds.env file func getPDSCredentials() (pdsHost, pdsAdminPassword string) { pdsHost = os.Getenv("PDS_HOST") pdsAdminPassword = os.Getenv("PDS_ADMIN_PASSWORD") if pdsHost == "" || pdsAdminPassword == "" { if file, err := os.Open("pds.env"); err == nil { scanner := bufio.NewScanner(file) for scanner.Scan() { line := scanner.Text() if strings.HasPrefix(line, "PDS_HOST=") { pdsHost = strings.TrimPrefix(line, "PDS_HOST=") } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") { pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=") } } file.Close() } } return } // getDomainDIDs returns all unique publish_account DIDs for a domain's feeds func (c *Crawler) getDomainDIDs(host string) []string { var dids []string rows, err := c.db.Query(` SELECT DISTINCT publish_account FROM feeds WHERE source_host = $1 AND publish_account IS NOT NULL AND publish_account != '' `, host) if err == nil { defer rows.Close() for rows.Next() { var did string if err := rows.Scan(&did); err == nil && did != "" { dids = append(dids, did) } } } return dids } // skipDomain sets a domain to skip, takes down PDS accounts but preserves all data func (c *Crawler) skipDomain(host string) DomainActionResult { result := DomainActionResult{Host: host, Action: "skip"} pdsHost, pdsAdminPassword := getPDSCredentials() dids := c.getDomainDIDs(host) // Takedown PDS accounts (hide content but preserve data) if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { publisher := NewPublisher(pdsHost) for _, did := range dids { if err := publisher.TakedownAccount(pdsAdminPassword, did, "domain-skip"); err != nil { result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) } else { result.AccountsAffected++ } } } // Mark feeds as skipped (but don't delete) feedsAffected, err := c.db.Exec(` UPDATE feeds SET status = 'skip', publish_status = 'skip' WHERE source_host = $1 `, host) if err != nil { result.Error = fmt.Sprintf("failed to update feeds: %v", err) return result } result.FeedsAffected = feedsAffected // Update domain status to skip _, err = c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1`, host) if err != nil { result.Error = fmt.Sprintf("failed to update domain status: %v", err) return result } result.Success = true return result } // handleAPIDropDomain permanently deletes all data for a skipped domain func (c *Crawler) handleAPIDropDomain(w http.ResponseWriter, r *http.Request) { host := r.URL.Query().Get("host") if host == "" { http.Error(w, "host parameter required", http.StatusBadRequest) return } // Verify domain is currently skipped var status string err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status) if err != nil { http.Error(w, "domain not found", http.StatusNotFound) return } if status != "skip" { http.Error(w, "domain must be skipped before dropping", http.StatusBadRequest) return } result := c.dropDomain(host) if result.Error != "" { http.Error(w, result.Error, http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(result) } // dropDomain permanently deletes all data for a domain (feeds, items, PDS accounts) func (c *Crawler) dropDomain(host string) DomainActionResult { result := DomainActionResult{Host: host, Action: "drop"} pdsHost, pdsAdminPassword := getPDSCredentials() dids := c.getDomainDIDs(host) // Delete PDS accounts if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { publisher := NewPublisher(pdsHost) for _, did := range dids { if err := publisher.DeleteAccount(pdsAdminPassword, did); err != nil { result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) } else { result.AccountsAffected++ } } } // Get feed URLs for this domain (needed to delete items) var feedURLs []string feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE source_host = $1`, host) if err == nil { defer feedRows.Close() for feedRows.Next() { var url string if err := feedRows.Scan(&url); err == nil { feedURLs = append(feedURLs, url) } } } // Delete items for all feeds from this domain for _, feedURL := range feedURLs { deleted, err := c.db.Exec(`DELETE FROM items WHERE feed_url = $1`, feedURL) if err == nil { result.ItemsDeleted += deleted } } // Delete all feeds from this domain feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE source_host = $1`, host) if err != nil { result.Error = fmt.Sprintf("failed to delete feeds: %v", err) return result } result.FeedsAffected = feedsDeleted // Update domain status to drop _, err = c.db.Exec(`UPDATE domains SET status = 'drop' WHERE host = $1`, host) if err != nil { result.Error = fmt.Sprintf("failed to update domain status: %v", err) return result } result.Success = true return result } // handleAPIUndenyDomain removes skip status from a domain (restores accounts) func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request) { host := r.URL.Query().Get("host") if host == "" { http.Error(w, "host parameter required", http.StatusBadRequest) return } // Verify domain is currently skipped var status string err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status) if err != nil { http.Error(w, "domain not found", http.StatusNotFound) return } if status != "skip" { http.Error(w, "domain is not skipped", http.StatusBadRequest) return } result := c.restoreDomain(host) if result.Error != "" { http.Error(w, result.Error, http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(result) } // restoreDomain removes skip status and restores PDS accounts func (c *Crawler) restoreDomain(host string) DomainActionResult { result := DomainActionResult{Host: host, Action: "restore"} pdsHost, pdsAdminPassword := getPDSCredentials() dids := c.getDomainDIDs(host) // Restore PDS accounts (remove takedown) if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 { publisher := NewPublisher(pdsHost) for _, did := range dids { if err := publisher.RestoreAccount(pdsAdminPassword, did); err != nil { result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err)) } else { result.AccountsAffected++ } } } // Restore feeds to pass status feedsAffected, err := c.db.Exec(` UPDATE feeds SET status = 'pass', publish_status = 'pass' WHERE source_host = $1 `, host) if err != nil { result.Error = fmt.Sprintf("failed to update feeds: %v", err) return result } result.FeedsAffected = feedsAffected // Update domain status back to pass _, err = c.db.Exec(` UPDATE domains SET status = 'pass', last_error = NULL WHERE host = $1 `, host) if err != nil { result.Error = fmt.Sprintf("failed to update domain status: %v", err) return result } result.Success = true return result }