Files
crawler/api_domains.go

1026 lines
28 KiB
Go

package main
import (
"bufio"
"encoding/json"
"fmt"
"net/http"
"os"
"strings"
"github.com/jackc/pgx/v5"
)
func (c *Crawler) handleAPIAllDomains(w http.ResponseWriter, r *http.Request) {
offset := 0
limit := 100
if o := r.URL.Query().Get("offset"); o != "" {
fmt.Sscanf(o, "%d", &offset)
}
if l := r.URL.Query().Get("limit"); l != "" {
fmt.Sscanf(l, "%d", &limit)
if limit > 100 {
limit = 100
}
}
// Serve from cache (updated once per minute in background)
c.statsMu.RLock()
cached := c.cachedAllDomains
c.statsMu.RUnlock()
var domains []DomainStat
if cached != nil && offset < len(cached) {
end := offset + limit
if end > len(cached) {
end = len(cached)
}
domains = cached[offset:end]
}
if domains == nil {
domains = []DomainStat{}
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(domains)
}
// handleAPIDomains lists domains with optional status filter, including their feeds
func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
status := r.URL.Query().Get("status")
hasFeeds := r.URL.Query().Get("has_feeds") == "true"
search := r.URL.Query().Get("search")
limit := 100
offset := 0
if l := r.URL.Query().Get("limit"); l != "" {
fmt.Sscanf(l, "%d", &limit)
if limit > 500 {
limit = 500
}
}
if o := r.URL.Query().Get("offset"); o != "" {
fmt.Sscanf(o, "%d", &offset)
}
// First get domains
var rows pgx.Rows
var err error
if hasFeeds {
// Only domains with feeds
searchPattern := "%" + strings.ToLower(search) + "%"
if search != "" {
// Search in domain host or feed title/url
rows, err = c.db.Query(`
SELECT DISTINCT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d
INNER JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
WHERE item_count > 0
GROUP BY source_host
) f ON d.host = f.source_host
LEFT JOIN feeds fe ON d.host = fe.source_host
WHERE d.status != 'skip'
AND (LOWER(d.host) LIKE $1 OR LOWER(fe.title) LIKE $1 OR LOWER(fe.url) LIKE $1)
ORDER BY d.tld ASC, d.host ASC
LIMIT $2 OFFSET $3
`, searchPattern, limit, offset)
} else if status != "" {
rows, err = c.db.Query(`
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d
INNER JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
WHERE item_count > 0
GROUP BY source_host
) f ON d.host = f.source_host
WHERE d.status = $1
ORDER BY d.tld ASC, d.host ASC
LIMIT $2 OFFSET $3
`, status, limit, offset)
} else {
// Default: exclude 'skip' status domains
rows, err = c.db.Query(`
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
FROM domains d
INNER JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
WHERE item_count > 0
GROUP BY source_host
) f ON d.host = f.source_host
WHERE d.status != 'skip'
ORDER BY d.tld ASC, d.host ASC
LIMIT $1 OFFSET $2
`, limit, offset)
}
} else if status != "" {
rows, err = c.db.Query(`
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
FROM domains d
LEFT JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
GROUP BY source_host
) f ON d.host = f.source_host
WHERE d.status = $1
ORDER BY d.tld ASC, d.host ASC
LIMIT $2 OFFSET $3
`, status, limit, offset)
} else {
// Default: exclude 'skip' status domains
rows, err = c.db.Query(`
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
FROM domains d
LEFT JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
GROUP BY source_host
) f ON d.host = f.source_host
WHERE d.status != 'skip'
ORDER BY d.tld ASC, d.host ASC
LIMIT $1 OFFSET $2
`, limit, offset)
}
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer rows.Close()
type FeedInfo struct {
URL string `json:"url"`
Title string `json:"title,omitempty"`
Type string `json:"type,omitempty"`
Status string `json:"status,omitempty"`
PublishStatus string `json:"publish_status,omitempty"`
Language string `json:"language,omitempty"`
ItemCount int `json:"item_count,omitempty"`
}
type DomainInfo struct {
Host string `json:"host"`
TLD string `json:"tld"`
Status string `json:"status"`
LastError string `json:"last_error,omitempty"`
FeedCount int `json:"feed_count"`
Feeds []FeedInfo `json:"feeds,omitempty"`
}
var domains []DomainInfo
var hosts []string
for rows.Next() {
var d DomainInfo
var tld, lastError *string
if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil {
continue
}
d.TLD = StringValue(tld)
d.LastError = StringValue(lastError)
domains = append(domains, d)
hosts = append(hosts, d.Host)
}
// Now get feeds for these domains (with actual item count from items table)
if len(hosts) > 0 {
feedRows, err := c.db.Query(`
SELECT f.source_host, f.url, f.title, f.type, f.status, f.publish_status, f.language,
(SELECT COUNT(*) FROM items WHERE feed_url = f.url) as item_count
FROM feeds f
WHERE f.source_host = ANY($1)
ORDER BY f.source_host, f.url
`, hosts)
if err == nil {
defer feedRows.Close()
feedsByHost := make(map[string][]FeedInfo)
for feedRows.Next() {
var host string
var f FeedInfo
var title, feedType, status, publishStatus, language *string
var itemCount *int
if err := feedRows.Scan(&host, &f.URL, &title, &feedType, &status, &publishStatus, &language, &itemCount); err != nil {
continue
}
f.Title = StringValue(title)
f.Type = StringValue(feedType)
f.Status = StringValue(status)
f.PublishStatus = StringValue(publishStatus)
f.Language = StringValue(language)
if itemCount != nil {
f.ItemCount = *itemCount
}
feedsByHost[host] = append(feedsByHost[host], f)
}
// Attach feeds to domains
for i := range domains {
if feeds, ok := feedsByHost[domains[i].Host]; ok {
domains[i].Feeds = feeds
}
}
}
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(domains)
}
func (c *Crawler) handleAPIDomainsByStatus(w http.ResponseWriter, r *http.Request) {
status := r.URL.Query().Get("status")
if status == "" {
http.Error(w, "status parameter required", http.StatusBadRequest)
return
}
limit := 100
offset := 0
if l := r.URL.Query().Get("limit"); l != "" {
fmt.Sscanf(l, "%d", &limit)
if limit > 500 {
limit = 500
}
}
if o := r.URL.Query().Get("offset"); o != "" {
fmt.Sscanf(o, "%d", &offset)
}
rows, err := c.db.Query(`
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
FROM domains d
LEFT JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
GROUP BY source_host
) f ON d.host = f.source_host
WHERE d.status = $1
ORDER BY d.tld ASC, d.host ASC
LIMIT $2 OFFSET $3
`, status, limit, offset)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer rows.Close()
type DomainInfo struct {
Host string `json:"host"`
TLD string `json:"tld"`
Status string `json:"status"`
LastError string `json:"last_error,omitempty"`
FeedCount int `json:"feed_count"`
}
var domains []DomainInfo
for rows.Next() {
var d DomainInfo
var tld, lastError *string
if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil {
continue
}
d.TLD = StringValue(tld)
d.LastError = StringValue(lastError)
domains = append(domains, d)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(domains)
}
func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
http.Error(w, "host parameter required", http.StatusBadRequest)
return
}
limit := 100
offset := 0
if l := r.URL.Query().Get("limit"); l != "" {
fmt.Sscanf(l, "%d", &limit)
if limit > 500 {
limit = 500
}
}
if o := r.URL.Query().Get("offset"); o != "" {
fmt.Sscanf(o, "%d", &offset)
}
rows, err := c.db.Query(`
SELECT url, title, type, status, error_count, last_error, item_count, publish_status, language
FROM feeds
WHERE source_host = $1
ORDER BY url ASC
LIMIT $2 OFFSET $3
`, host, limit, offset)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer rows.Close()
type FeedInfo struct {
URL string `json:"url"`
Title string `json:"title"`
Type string `json:"type"`
Status string `json:"status,omitempty"`
ErrorCount int `json:"error_count,omitempty"`
LastError string `json:"last_error,omitempty"`
ItemCount int `json:"item_count,omitempty"`
PublishStatus string `json:"publish_status,omitempty"`
Language string `json:"language,omitempty"`
}
var feeds []FeedInfo
for rows.Next() {
var f FeedInfo
var title, status, lastError, publishStatus, language *string
var errorCount, itemCount *int
if err := rows.Scan(&f.URL, &title, &f.Type, &status, &errorCount, &lastError, &itemCount, &publishStatus, &language); err != nil {
continue
}
f.Title = StringValue(title)
f.Status = StringValue(status)
f.LastError = StringValue(lastError)
f.PublishStatus = StringValue(publishStatus)
f.Language = StringValue(language)
if errorCount != nil {
f.ErrorCount = *errorCount
}
if itemCount != nil {
f.ItemCount = *itemCount
}
feeds = append(feeds, f)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(feeds)
}
// handleAPISetDomainStatus sets the status for a domain
// status must be 'hold', 'pass', 'skip', or 'fail' (use /api/dropDomain for 'drop')
func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
status := r.URL.Query().Get("status")
if host == "" {
http.Error(w, "host parameter required", http.StatusBadRequest)
return
}
if status != "hold" && status != "pass" && status != "skip" && status != "fail" {
http.Error(w, "status must be 'hold', 'pass', 'skip', or 'fail' (use /api/dropDomain for permanent deletion)", http.StatusBadRequest)
return
}
host = normalizeHost(host)
// Setting to 'skip' triggers takedown (hide content but preserve data)
if status == "skip" {
result := c.skipDomain(host)
if result.Error != "" {
http.Error(w, result.Error, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(result)
return
}
// When setting to pass, clear any last_error
var err error
if status == "pass" {
_, err = c.db.Exec(`
UPDATE domains SET status = $1, last_error = NULL
WHERE host = $2
`, status, host)
} else {
_, err = c.db.Exec(`
UPDATE domains SET status = $1
WHERE host = $2
`, status, host)
}
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{
"host": host,
"status": status,
})
}
func (c *Crawler) handleAPIRevisitDomain(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
http.Error(w, "host parameter required", http.StatusBadRequest)
return
}
_, err := c.db.Exec(`
UPDATE domains SET status = 'pass', last_checked_at = NULL, last_crawled_at = NULL, last_error = NULL
WHERE host = $1
`, host)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{"status": "queued", "host": host})
}
// handleAPIPriorityCrawl immediately crawls a domain (adds it if not exists)
func (c *Crawler) handleAPIPriorityCrawl(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
http.Error(w, "host parameter required", http.StatusBadRequest)
return
}
host = normalizeHost(host)
// Add domain if it doesn't exist, or reset to pass for crawling
_, err := c.db.Exec(`
INSERT INTO domains (host, status, discovered_at, tld)
VALUES ($1, 'pass', NOW(), $2)
ON CONFLICT(host) DO UPDATE SET status = 'pass', last_checked_at = NULL, last_crawled_at = NULL, last_error = NULL
`, host, getTLD(host))
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// Crawl synchronously
fmt.Printf("Priority crawl: %s\n", host)
feedsFound, crawlErr := c.crawlHost(host)
errStr := ""
if crawlErr != nil {
errStr = crawlErr.Error()
}
// Mark as crawled
c.markDomainCrawled(host, feedsFound, errStr)
// Get the feeds we found
feeds, _ := c.GetFeedsByHost(host)
type FeedSummary struct {
URL string `json:"url"`
Title string `json:"title"`
Type string `json:"type"`
Category string `json:"category"`
Status string `json:"status"`
}
var feedSummaries []FeedSummary
for _, f := range feeds {
feedSummaries = append(feedSummaries, FeedSummary{
URL: f.URL,
Title: f.Title,
Type: f.Type,
Category: f.Category,
Status: f.Status,
})
}
result := map[string]interface{}{
"host": host,
"feeds_found": feedsFound,
"feeds": feedSummaries,
}
if crawlErr != nil {
result["error"] = crawlErr.Error()
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(result)
}
// handleAPIFilter handles flexible filtering with stackable parameters
func (c *Crawler) handleAPIFilter(w http.ResponseWriter, r *http.Request) {
tld := r.URL.Query().Get("tld")
domain := r.URL.Query().Get("domain")
feedStatus := r.URL.Query().Get("feedStatus")
domainStatus := r.URL.Query().Get("domainStatus")
languages := r.URL.Query().Get("languages") // comma-separated list
show := r.URL.Query().Get("show") // "feeds" or "domains"
sort := r.URL.Query().Get("sort") // "alpha" or "feeds"
limit := 100
offset := 0
if l := r.URL.Query().Get("limit"); l != "" {
fmt.Sscanf(l, "%d", &limit)
if limit > 500 {
limit = 500
}
}
if o := r.URL.Query().Get("offset"); o != "" {
fmt.Sscanf(o, "%d", &offset)
}
// Parse languages into slice
var langList []string
if languages != "" {
for _, lang := range strings.Split(languages, ",") {
lang = strings.TrimSpace(lang)
if lang != "" {
langList = append(langList, lang)
}
}
}
// Determine what to show based on filters
if show == "" {
if feedStatus != "" || domain != "" || len(langList) > 0 {
show = "feeds"
} else {
show = "domains"
}
}
if show == "feeds" {
c.filterFeeds(w, tld, domain, feedStatus, langList, limit, offset)
} else {
c.filterDomains(w, tld, domainStatus, sort, limit, offset)
}
}
func (c *Crawler) filterDomains(w http.ResponseWriter, tld, status, sort string, limit, offset int) {
var args []interface{}
argNum := 1
query := `
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
FROM domains d
LEFT JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
GROUP BY source_host
) f ON d.host = f.source_host
WHERE 1=1`
if tld != "" {
query += fmt.Sprintf(" AND d.tld = $%d", argNum)
args = append(args, tld)
argNum++
}
if status != "" {
query += fmt.Sprintf(" AND d.status = $%d", argNum)
args = append(args, status)
argNum++
}
// Sort by feed count descending or alphabetically
if sort == "feeds" {
query += fmt.Sprintf(" ORDER BY feed_count DESC, d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1)
} else {
query += fmt.Sprintf(" ORDER BY d.tld ASC, d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1)
}
args = append(args, limit, offset)
rows, err := c.db.Query(query, args...)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer rows.Close()
type DomainInfo struct {
Host string `json:"host"`
TLD string `json:"tld"`
Status string `json:"status"`
LastError string `json:"last_error,omitempty"`
FeedCount int `json:"feed_count"`
}
var domains []DomainInfo
for rows.Next() {
var d DomainInfo
var tldVal, lastError *string
if err := rows.Scan(&d.Host, &tldVal, &d.Status, &lastError, &d.FeedCount); err != nil {
continue
}
d.TLD = StringValue(tldVal)
d.LastError = StringValue(lastError)
domains = append(domains, d)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"type": "domains",
"data": domains,
})
}
func (c *Crawler) handleAPITLDDomains(w http.ResponseWriter, r *http.Request) {
tld := r.URL.Query().Get("tld")
if tld == "" {
http.Error(w, "tld parameter required", http.StatusBadRequest)
return
}
limit := 100
offset := 0
if l := r.URL.Query().Get("limit"); l != "" {
fmt.Sscanf(l, "%d", &limit)
if limit > 500 {
limit = 500
}
}
if o := r.URL.Query().Get("offset"); o != "" {
fmt.Sscanf(o, "%d", &offset)
}
rows, err := c.db.Query(`
SELECT d.host, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
FROM domains d
LEFT JOIN (
SELECT source_host, COUNT(*) as feed_count
FROM feeds
GROUP BY source_host
) f ON d.host = f.source_host
WHERE d.tld = $1
ORDER BY d.tld ASC, d.host ASC
LIMIT $2 OFFSET $3
`, tld, limit, offset)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer rows.Close()
type DomainInfo struct {
Host string `json:"host"`
Status string `json:"status"`
LastError string `json:"last_error,omitempty"`
FeedCount int `json:"feed_count"`
}
var domains []DomainInfo
for rows.Next() {
var d DomainInfo
var lastError *string
if err := rows.Scan(&d.Host, &d.Status, &lastError, &d.FeedCount); err != nil {
continue
}
d.LastError = StringValue(lastError)
domains = append(domains, d)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(domains)
}
func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) {
hasFeeds := r.URL.Query().Get("has_feeds") == "true"
var rows pgx.Rows
var err error
if hasFeeds {
// Only TLDs that have domains with feeds
rows, err = c.db.Query(`
SELECT DISTINCT d.tld, COUNT(DISTINCT d.host) as domain_count
FROM domains d
INNER JOIN feeds f ON d.host = f.source_host
WHERE d.tld IS NOT NULL AND d.tld != ''
GROUP BY d.tld
ORDER BY d.tld ASC
`)
} else {
// All TLDs
rows, err = c.db.Query(`
SELECT tld, COUNT(*) as domain_count
FROM domains
WHERE tld IS NOT NULL AND tld != ''
GROUP BY tld
ORDER BY tld ASC
`)
}
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer rows.Close()
type TLDInfo struct {
TLD string `json:"tld"`
DomainCount int `json:"domain_count"`
}
var tlds []TLDInfo
for rows.Next() {
var t TLDInfo
if err := rows.Scan(&t.TLD, &t.DomainCount); err != nil {
continue
}
tlds = append(tlds, t)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(tlds)
}
func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) {
tld := r.URL.Query().Get("tld")
if tld == "" {
http.Error(w, "tld parameter required", http.StatusBadRequest)
return
}
var domainCount, feedCount int
err := c.db.QueryRow(`SELECT COUNT(*) FROM domains WHERE tld = $1`, tld).Scan(&domainCount)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
err = c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE tld = $1`, tld).Scan(&feedCount)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"tld": tld,
"domain_count": domainCount,
"feed_count": feedCount,
})
}
// handleAPIDenyDomain skips a domain (takedown accounts, preserve data)
func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
http.Error(w, "host parameter required", http.StatusBadRequest)
return
}
result := c.skipDomain(host)
if result.Error != "" {
http.Error(w, result.Error, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(result)
}
// DomainActionResult contains the results of a domain action
type DomainActionResult struct {
Success bool `json:"success"`
Host string `json:"host"`
Action string `json:"action"`
FeedsAffected int64 `json:"feeds_affected,omitempty"`
ItemsDeleted int64 `json:"items_deleted,omitempty"`
AccountsAffected int `json:"accounts_affected,omitempty"`
AccountErrors []string `json:"account_errors,omitempty"`
Error string `json:"error,omitempty"`
}
// getPDSCredentials loads PDS credentials from environment or pds.env file
func getPDSCredentials() (pdsHost, pdsAdminPassword string) {
pdsHost = os.Getenv("PDS_HOST")
pdsAdminPassword = os.Getenv("PDS_ADMIN_PASSWORD")
if pdsHost == "" || pdsAdminPassword == "" {
if file, err := os.Open("pds.env"); err == nil {
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "PDS_HOST=") {
pdsHost = strings.TrimPrefix(line, "PDS_HOST=")
} else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") {
pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=")
}
}
file.Close()
}
}
return
}
// getDomainDIDs returns all unique publish_account DIDs for a domain's feeds
func (c *Crawler) getDomainDIDs(host string) []string {
var dids []string
rows, err := c.db.Query(`
SELECT DISTINCT publish_account FROM feeds
WHERE source_host = $1 AND publish_account IS NOT NULL AND publish_account != ''
`, host)
if err == nil {
defer rows.Close()
for rows.Next() {
var did string
if err := rows.Scan(&did); err == nil && did != "" {
dids = append(dids, did)
}
}
}
return dids
}
// skipDomain sets a domain to skip, takes down PDS accounts but preserves all data
func (c *Crawler) skipDomain(host string) DomainActionResult {
result := DomainActionResult{Host: host, Action: "skip"}
pdsHost, pdsAdminPassword := getPDSCredentials()
dids := c.getDomainDIDs(host)
// Takedown PDS accounts (hide content but preserve data)
if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
publisher := NewPublisher(pdsHost)
for _, did := range dids {
if err := publisher.TakedownAccount(pdsAdminPassword, did, "domain-skip"); err != nil {
result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
} else {
result.AccountsAffected++
}
}
}
// Mark feeds as inactive (but don't delete)
feedsAffected, err := c.db.Exec(`
UPDATE feeds SET status = 'inactive', publish_status = 'skip'
WHERE source_host = $1
`, host)
if err != nil {
result.Error = fmt.Sprintf("failed to update feeds: %v", err)
return result
}
result.FeedsAffected = feedsAffected
// Update domain status to skip
_, err = c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1`, host)
if err != nil {
result.Error = fmt.Sprintf("failed to update domain status: %v", err)
return result
}
result.Success = true
return result
}
// handleAPIDropDomain permanently deletes all data for a skipped domain
func (c *Crawler) handleAPIDropDomain(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
http.Error(w, "host parameter required", http.StatusBadRequest)
return
}
// Verify domain is currently skipped
var status string
err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status)
if err != nil {
http.Error(w, "domain not found", http.StatusNotFound)
return
}
if status != "skip" {
http.Error(w, "domain must be skipped before dropping", http.StatusBadRequest)
return
}
result := c.dropDomain(host)
if result.Error != "" {
http.Error(w, result.Error, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(result)
}
// dropDomain permanently deletes all data for a domain (feeds, items, PDS accounts)
func (c *Crawler) dropDomain(host string) DomainActionResult {
result := DomainActionResult{Host: host, Action: "drop"}
pdsHost, pdsAdminPassword := getPDSCredentials()
dids := c.getDomainDIDs(host)
// Delete PDS accounts
if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
publisher := NewPublisher(pdsHost)
for _, did := range dids {
if err := publisher.DeleteAccount(pdsAdminPassword, did); err != nil {
result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
} else {
result.AccountsAffected++
}
}
}
// Get feed URLs for this domain (needed to delete items)
var feedURLs []string
feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE source_host = $1`, host)
if err == nil {
defer feedRows.Close()
for feedRows.Next() {
var url string
if err := feedRows.Scan(&url); err == nil {
feedURLs = append(feedURLs, url)
}
}
}
// Delete items for all feeds from this domain
for _, feedURL := range feedURLs {
deleted, err := c.db.Exec(`DELETE FROM items WHERE feed_url = $1`, feedURL)
if err == nil {
result.ItemsDeleted += deleted
}
}
// Delete all feeds from this domain
feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE source_host = $1`, host)
if err != nil {
result.Error = fmt.Sprintf("failed to delete feeds: %v", err)
return result
}
result.FeedsAffected = feedsDeleted
// Update domain status to drop
_, err = c.db.Exec(`UPDATE domains SET status = 'drop' WHERE host = $1`, host)
if err != nil {
result.Error = fmt.Sprintf("failed to update domain status: %v", err)
return result
}
result.Success = true
return result
}
// handleAPIUndenyDomain removes skip status from a domain (restores accounts)
func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
http.Error(w, "host parameter required", http.StatusBadRequest)
return
}
// Verify domain is currently skipped
var status string
err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status)
if err != nil {
http.Error(w, "domain not found", http.StatusNotFound)
return
}
if status != "skip" {
http.Error(w, "domain is not skipped", http.StatusBadRequest)
return
}
result := c.restoreDomain(host)
if result.Error != "" {
http.Error(w, result.Error, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(result)
}
// restoreDomain removes skip status and restores PDS accounts
func (c *Crawler) restoreDomain(host string) DomainActionResult {
result := DomainActionResult{Host: host, Action: "restore"}
pdsHost, pdsAdminPassword := getPDSCredentials()
dids := c.getDomainDIDs(host)
// Restore PDS accounts (remove takedown)
if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
publisher := NewPublisher(pdsHost)
for _, did := range dids {
if err := publisher.RestoreAccount(pdsAdminPassword, did); err != nil {
result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
} else {
result.AccountsAffected++
}
}
}
// Restore feeds to active status
feedsAffected, err := c.db.Exec(`
UPDATE feeds SET status = 'active', publish_status = 'pass'
WHERE source_host = $1
`, host)
if err != nil {
result.Error = fmt.Sprintf("failed to update feeds: %v", err)
return result
}
result.FeedsAffected = feedsAffected
// Update domain status back to pass
_, err = c.db.Exec(`
UPDATE domains SET status = 'pass', last_error = NULL
WHERE host = $1
`, host)
if err != nil {
result.Error = fmt.Sprintf("failed to update domain status: %v", err)
return result
}
result.Success = true
return result
}