Revise domain status flow: skip uses takedown, add drop for permanent deletion
- Import default changed from 'hold' to 'pass' (auto-crawl) - Skip now uses PDS takedown (hides posts but preserves data) - Added 'drop' status for permanent deletion (requires skip first) - Added TakedownAccount/RestoreAccount PDS functions - Un-skip restores PDS accounts and reactivates feeds - Dashboard shows 'drop' button only for skipped domains Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+260
-32
@@ -1,9 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
@@ -326,7 +328,7 @@ func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// handleAPISetDomainStatus sets the status for a domain
|
||||
// status must be 'hold', 'pass', 'skip', or 'fail'
|
||||
// status must be 'hold', 'pass', 'skip', or 'fail' (use /api/dropDomain for 'drop')
|
||||
func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
status := r.URL.Query().Get("status")
|
||||
@@ -336,12 +338,24 @@ func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Reques
|
||||
return
|
||||
}
|
||||
if status != "hold" && status != "pass" && status != "skip" && status != "fail" {
|
||||
http.Error(w, "status must be 'hold', 'pass', 'skip', or 'fail'", http.StatusBadRequest)
|
||||
http.Error(w, "status must be 'hold', 'pass', 'skip', or 'fail' (use /api/dropDomain for permanent deletion)", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
host = normalizeHost(host)
|
||||
|
||||
// Setting to 'skip' triggers takedown (hide content but preserve data)
|
||||
if status == "skip" {
|
||||
result := c.skipDomain(host)
|
||||
if result.Error != "" {
|
||||
http.Error(w, result.Error, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
return
|
||||
}
|
||||
|
||||
// When setting to pass, clear any last_error
|
||||
var err error
|
||||
if status == "pass" {
|
||||
@@ -707,7 +721,7 @@ func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
}
|
||||
|
||||
// handleAPIDenyDomain skips a domain and all its feeds
|
||||
// handleAPIDenyDomain skips a domain (takedown accounts, preserve data)
|
||||
func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
@@ -715,29 +729,199 @@ func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Update domain status to skip
|
||||
_, err := c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Skip all feeds from this domain
|
||||
feedsAffected, err := c.db.Exec(`UPDATE feeds SET publish_status = 'skip', status = 'dead' WHERE source_host = $1`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
result := c.skipDomain(host)
|
||||
if result.Error != "" {
|
||||
http.Error(w, result.Error, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"success": true,
|
||||
"host": host,
|
||||
"feeds_skipped": feedsAffected,
|
||||
})
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
// handleAPIUndenyDomain removes skip status from a domain
|
||||
// DomainActionResult contains the results of a domain action
|
||||
type DomainActionResult struct {
|
||||
Success bool `json:"success"`
|
||||
Host string `json:"host"`
|
||||
Action string `json:"action"`
|
||||
FeedsAffected int64 `json:"feeds_affected,omitempty"`
|
||||
ItemsDeleted int64 `json:"items_deleted,omitempty"`
|
||||
AccountsAffected int `json:"accounts_affected,omitempty"`
|
||||
AccountErrors []string `json:"account_errors,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// getPDSCredentials loads PDS credentials from environment or pds.env file
|
||||
func getPDSCredentials() (pdsHost, pdsAdminPassword string) {
|
||||
pdsHost = os.Getenv("PDS_HOST")
|
||||
pdsAdminPassword = os.Getenv("PDS_ADMIN_PASSWORD")
|
||||
if pdsHost == "" || pdsAdminPassword == "" {
|
||||
if file, err := os.Open("pds.env"); err == nil {
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "PDS_HOST=") {
|
||||
pdsHost = strings.TrimPrefix(line, "PDS_HOST=")
|
||||
} else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") {
|
||||
pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=")
|
||||
}
|
||||
}
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// getDomainDIDs returns all unique publish_account DIDs for a domain's feeds
|
||||
func (c *Crawler) getDomainDIDs(host string) []string {
|
||||
var dids []string
|
||||
rows, err := c.db.Query(`
|
||||
SELECT DISTINCT publish_account FROM feeds
|
||||
WHERE source_host = $1 AND publish_account IS NOT NULL AND publish_account != ''
|
||||
`, host)
|
||||
if err == nil {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var did string
|
||||
if err := rows.Scan(&did); err == nil && did != "" {
|
||||
dids = append(dids, did)
|
||||
}
|
||||
}
|
||||
}
|
||||
return dids
|
||||
}
|
||||
|
||||
// skipDomain sets a domain to skip, takes down PDS accounts but preserves all data
|
||||
func (c *Crawler) skipDomain(host string) DomainActionResult {
|
||||
result := DomainActionResult{Host: host, Action: "skip"}
|
||||
|
||||
pdsHost, pdsAdminPassword := getPDSCredentials()
|
||||
dids := c.getDomainDIDs(host)
|
||||
|
||||
// Takedown PDS accounts (hide content but preserve data)
|
||||
if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
|
||||
publisher := NewPublisher(pdsHost)
|
||||
for _, did := range dids {
|
||||
if err := publisher.TakedownAccount(pdsAdminPassword, did, "domain-skip"); err != nil {
|
||||
result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
|
||||
} else {
|
||||
result.AccountsAffected++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mark feeds as inactive (but don't delete)
|
||||
feedsAffected, err := c.db.Exec(`
|
||||
UPDATE feeds SET status = 'inactive', publish_status = 'skip'
|
||||
WHERE source_host = $1
|
||||
`, host)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("failed to update feeds: %v", err)
|
||||
return result
|
||||
}
|
||||
result.FeedsAffected = feedsAffected
|
||||
|
||||
// Update domain status to skip
|
||||
_, err = c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1`, host)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("failed to update domain status: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
result.Success = true
|
||||
return result
|
||||
}
|
||||
|
||||
// handleAPIDropDomain permanently deletes all data for a skipped domain
|
||||
func (c *Crawler) handleAPIDropDomain(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
http.Error(w, "host parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Verify domain is currently skipped
|
||||
var status string
|
||||
err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status)
|
||||
if err != nil {
|
||||
http.Error(w, "domain not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
if status != "skip" {
|
||||
http.Error(w, "domain must be skipped before dropping", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
result := c.dropDomain(host)
|
||||
if result.Error != "" {
|
||||
http.Error(w, result.Error, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
// dropDomain permanently deletes all data for a domain (feeds, items, PDS accounts)
|
||||
func (c *Crawler) dropDomain(host string) DomainActionResult {
|
||||
result := DomainActionResult{Host: host, Action: "drop"}
|
||||
|
||||
pdsHost, pdsAdminPassword := getPDSCredentials()
|
||||
dids := c.getDomainDIDs(host)
|
||||
|
||||
// Delete PDS accounts
|
||||
if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
|
||||
publisher := NewPublisher(pdsHost)
|
||||
for _, did := range dids {
|
||||
if err := publisher.DeleteAccount(pdsAdminPassword, did); err != nil {
|
||||
result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
|
||||
} else {
|
||||
result.AccountsAffected++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get feed URLs for this domain (needed to delete items)
|
||||
var feedURLs []string
|
||||
feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE source_host = $1`, host)
|
||||
if err == nil {
|
||||
defer feedRows.Close()
|
||||
for feedRows.Next() {
|
||||
var url string
|
||||
if err := feedRows.Scan(&url); err == nil {
|
||||
feedURLs = append(feedURLs, url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete items for all feeds from this domain
|
||||
for _, feedURL := range feedURLs {
|
||||
deleted, err := c.db.Exec(`DELETE FROM items WHERE feed_url = $1`, feedURL)
|
||||
if err == nil {
|
||||
result.ItemsDeleted += deleted
|
||||
}
|
||||
}
|
||||
|
||||
// Delete all feeds from this domain
|
||||
feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE source_host = $1`, host)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("failed to delete feeds: %v", err)
|
||||
return result
|
||||
}
|
||||
result.FeedsAffected = feedsDeleted
|
||||
|
||||
// Update domain status to drop
|
||||
_, err = c.db.Exec(`UPDATE domains SET status = 'drop' WHERE host = $1`, host)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("failed to update domain status: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
result.Success = true
|
||||
return result
|
||||
}
|
||||
|
||||
// handleAPIUndenyDomain removes skip status from a domain (restores accounts)
|
||||
func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
@@ -745,24 +929,68 @@ func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request)
|
||||
return
|
||||
}
|
||||
|
||||
// Update domain status back to pass
|
||||
_, err := c.db.Exec(`UPDATE domains SET status = 'pass' WHERE host = $1 AND status = 'skip'`, host)
|
||||
// Verify domain is currently skipped
|
||||
var status string
|
||||
err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
http.Error(w, "domain not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
if status != "skip" {
|
||||
http.Error(w, "domain is not skipped", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Restore feeds to hold status and active
|
||||
feedsRestored, err := c.db.Exec(`UPDATE feeds SET publish_status = 'hold', status = 'active' WHERE source_host = $1 AND status = 'dead'`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
result := c.restoreDomain(host)
|
||||
if result.Error != "" {
|
||||
http.Error(w, result.Error, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"success": true,
|
||||
"host": host,
|
||||
"feeds_restored": feedsRestored,
|
||||
})
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
// restoreDomain removes skip status and restores PDS accounts
|
||||
func (c *Crawler) restoreDomain(host string) DomainActionResult {
|
||||
result := DomainActionResult{Host: host, Action: "restore"}
|
||||
|
||||
pdsHost, pdsAdminPassword := getPDSCredentials()
|
||||
dids := c.getDomainDIDs(host)
|
||||
|
||||
// Restore PDS accounts (remove takedown)
|
||||
if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
|
||||
publisher := NewPublisher(pdsHost)
|
||||
for _, did := range dids {
|
||||
if err := publisher.RestoreAccount(pdsAdminPassword, did); err != nil {
|
||||
result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
|
||||
} else {
|
||||
result.AccountsAffected++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restore feeds to active status
|
||||
feedsAffected, err := c.db.Exec(`
|
||||
UPDATE feeds SET status = 'active', publish_status = 'pass'
|
||||
WHERE source_host = $1
|
||||
`, host)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("failed to update feeds: %v", err)
|
||||
return result
|
||||
}
|
||||
result.FeedsAffected = feedsAffected
|
||||
|
||||
// Update domain status back to pass
|
||||
_, err = c.db.Exec(`
|
||||
UPDATE domains SET status = 'pass', last_error = NULL
|
||||
WHERE host = $1
|
||||
`, host)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("failed to update domain status: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
result.Success = true
|
||||
return result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user