diff --git a/CLAUDE.md b/CLAUDE.md
index c35196e..5f788cb 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -80,7 +80,7 @@ Column naming: snake_case (e.g., `source_host`, `pub_date`, `item_count`)
### Crawl Logic
-1. Domain manually approved (status set to 'pass')
+1. Domains import as `pass` by default (auto-crawled)
2. Check stage: HEAD request verifies domain is reachable, sets last_checked_at
3. Crawl stage: Full recursive crawl (HTTPS, fallback HTTP)
4. Recursive crawl up to MaxDepth=10, MaxPagesPerHost=10
@@ -101,8 +101,17 @@ Status values: `hold` (default/pending review), `pass` (approved), `skip` (rejec
1. **Check stage** - HEAD request to verify domain is reachable
2. **Crawl stage** - Full recursive crawl for feed discovery
-Domain status values: `hold` (pending), `pass` (approved), `skip` (rejected), `fail` (error).
-Domains starting with a digit (except 1440.news) are auto-skipped.
+Domain status values:
+- `pass` (default on import) - Domain is crawled and checked automatically
+- `hold` (manual) - Pauses crawling, keeps existing feeds and items
+- `skip` (manual) - Takes down PDS accounts (hides posts), marks feeds inactive, preserves all data
+- `drop` (manual, via button) - Permanently **deletes** all feeds, items, and PDS accounts (requires skip first)
+- `fail` (automatic) - Set when check/crawl fails, keeps existing feeds and items
+
+Skip vs Drop:
+- `skip` is reversible - use "un-skip" to restore accounts and resume publishing
+- `drop` is permanent - all data is deleted, cannot be recovered
+Auto-skip patterns (imported as `skip`): bare TLDs, domains starting with digit, domains starting with letter-dash.
Non-English feeds are auto-skipped.
## AT Protocol Integration
diff --git a/api_domains.go b/api_domains.go
index dba0eb6..f2f0b8c 100644
--- a/api_domains.go
+++ b/api_domains.go
@@ -1,9 +1,11 @@
package main
import (
+ "bufio"
"encoding/json"
"fmt"
"net/http"
+ "os"
"strings"
"github.com/jackc/pgx/v5"
@@ -326,7 +328,7 @@ func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) {
}
// handleAPISetDomainStatus sets the status for a domain
-// status must be 'hold', 'pass', 'skip', or 'fail'
+// status must be 'hold', 'pass', 'skip', or 'fail' (use /api/dropDomain for 'drop')
func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
status := r.URL.Query().Get("status")
@@ -336,12 +338,24 @@ func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Reques
return
}
if status != "hold" && status != "pass" && status != "skip" && status != "fail" {
- http.Error(w, "status must be 'hold', 'pass', 'skip', or 'fail'", http.StatusBadRequest)
+ http.Error(w, "status must be 'hold', 'pass', 'skip', or 'fail' (use /api/dropDomain for permanent deletion)", http.StatusBadRequest)
return
}
host = normalizeHost(host)
+ // Setting to 'skip' triggers takedown (hide content but preserve data)
+ if status == "skip" {
+ result := c.skipDomain(host)
+ if result.Error != "" {
+ http.Error(w, result.Error, http.StatusInternalServerError)
+ return
+ }
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(result)
+ return
+ }
+
// When setting to pass, clear any last_error
var err error
if status == "pass" {
@@ -707,7 +721,7 @@ func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) {
})
}
-// handleAPIDenyDomain skips a domain and all its feeds
+// handleAPIDenyDomain skips a domain (takedown accounts, preserve data)
func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
@@ -715,29 +729,199 @@ func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) {
return
}
- // Update domain status to skip
- _, err := c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1`, host)
- if err != nil {
- http.Error(w, err.Error(), http.StatusInternalServerError)
- return
- }
-
- // Skip all feeds from this domain
- feedsAffected, err := c.db.Exec(`UPDATE feeds SET publish_status = 'skip', status = 'dead' WHERE source_host = $1`, host)
- if err != nil {
- http.Error(w, err.Error(), http.StatusInternalServerError)
+ result := c.skipDomain(host)
+ if result.Error != "" {
+ http.Error(w, result.Error, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
- json.NewEncoder(w).Encode(map[string]interface{}{
- "success": true,
- "host": host,
- "feeds_skipped": feedsAffected,
- })
+ json.NewEncoder(w).Encode(result)
}
-// handleAPIUndenyDomain removes skip status from a domain
+// DomainActionResult contains the results of a domain action
+type DomainActionResult struct {
+ Success bool `json:"success"`
+ Host string `json:"host"`
+ Action string `json:"action"`
+ FeedsAffected int64 `json:"feeds_affected,omitempty"`
+ ItemsDeleted int64 `json:"items_deleted,omitempty"`
+ AccountsAffected int `json:"accounts_affected,omitempty"`
+ AccountErrors []string `json:"account_errors,omitempty"`
+ Error string `json:"error,omitempty"`
+}
+
+// getPDSCredentials loads PDS credentials from environment or pds.env file
+func getPDSCredentials() (pdsHost, pdsAdminPassword string) {
+ pdsHost = os.Getenv("PDS_HOST")
+ pdsAdminPassword = os.Getenv("PDS_ADMIN_PASSWORD")
+ if pdsHost == "" || pdsAdminPassword == "" {
+ if file, err := os.Open("pds.env"); err == nil {
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ line := scanner.Text()
+ if strings.HasPrefix(line, "PDS_HOST=") {
+ pdsHost = strings.TrimPrefix(line, "PDS_HOST=")
+ } else if strings.HasPrefix(line, "PDS_ADMIN_PASSWORD=") {
+ pdsAdminPassword = strings.TrimPrefix(line, "PDS_ADMIN_PASSWORD=")
+ }
+ }
+ file.Close()
+ }
+ }
+ return
+}
+
+// getDomainDIDs returns all unique publish_account DIDs for a domain's feeds
+func (c *Crawler) getDomainDIDs(host string) []string {
+ var dids []string
+ rows, err := c.db.Query(`
+ SELECT DISTINCT publish_account FROM feeds
+ WHERE source_host = $1 AND publish_account IS NOT NULL AND publish_account != ''
+ `, host)
+ if err == nil {
+ defer rows.Close()
+ for rows.Next() {
+ var did string
+ if err := rows.Scan(&did); err == nil && did != "" {
+ dids = append(dids, did)
+ }
+ }
+ }
+ return dids
+}
+
+// skipDomain sets a domain to skip, takes down PDS accounts but preserves all data
+func (c *Crawler) skipDomain(host string) DomainActionResult {
+ result := DomainActionResult{Host: host, Action: "skip"}
+
+ pdsHost, pdsAdminPassword := getPDSCredentials()
+ dids := c.getDomainDIDs(host)
+
+ // Takedown PDS accounts (hide content but preserve data)
+ if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
+ publisher := NewPublisher(pdsHost)
+ for _, did := range dids {
+ if err := publisher.TakedownAccount(pdsAdminPassword, did, "domain-skip"); err != nil {
+ result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
+ } else {
+ result.AccountsAffected++
+ }
+ }
+ }
+
+ // Mark feeds as inactive (but don't delete)
+ feedsAffected, err := c.db.Exec(`
+ UPDATE feeds SET status = 'inactive', publish_status = 'skip'
+ WHERE source_host = $1
+ `, host)
+ if err != nil {
+ result.Error = fmt.Sprintf("failed to update feeds: %v", err)
+ return result
+ }
+ result.FeedsAffected = feedsAffected
+
+ // Update domain status to skip
+ _, err = c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1`, host)
+ if err != nil {
+ result.Error = fmt.Sprintf("failed to update domain status: %v", err)
+ return result
+ }
+
+ result.Success = true
+ return result
+}
+
+// handleAPIDropDomain permanently deletes all data for a skipped domain
+func (c *Crawler) handleAPIDropDomain(w http.ResponseWriter, r *http.Request) {
+ host := r.URL.Query().Get("host")
+ if host == "" {
+ http.Error(w, "host parameter required", http.StatusBadRequest)
+ return
+ }
+
+ // Verify domain is currently skipped
+ var status string
+ err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status)
+ if err != nil {
+ http.Error(w, "domain not found", http.StatusNotFound)
+ return
+ }
+ if status != "skip" {
+ http.Error(w, "domain must be skipped before dropping", http.StatusBadRequest)
+ return
+ }
+
+ result := c.dropDomain(host)
+ if result.Error != "" {
+ http.Error(w, result.Error, http.StatusInternalServerError)
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(result)
+}
+
+// dropDomain permanently deletes all data for a domain (feeds, items, PDS accounts)
+func (c *Crawler) dropDomain(host string) DomainActionResult {
+ result := DomainActionResult{Host: host, Action: "drop"}
+
+ pdsHost, pdsAdminPassword := getPDSCredentials()
+ dids := c.getDomainDIDs(host)
+
+ // Delete PDS accounts
+ if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
+ publisher := NewPublisher(pdsHost)
+ for _, did := range dids {
+ if err := publisher.DeleteAccount(pdsAdminPassword, did); err != nil {
+ result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
+ } else {
+ result.AccountsAffected++
+ }
+ }
+ }
+
+ // Get feed URLs for this domain (needed to delete items)
+ var feedURLs []string
+ feedRows, err := c.db.Query(`SELECT url FROM feeds WHERE source_host = $1`, host)
+ if err == nil {
+ defer feedRows.Close()
+ for feedRows.Next() {
+ var url string
+ if err := feedRows.Scan(&url); err == nil {
+ feedURLs = append(feedURLs, url)
+ }
+ }
+ }
+
+ // Delete items for all feeds from this domain
+ for _, feedURL := range feedURLs {
+ deleted, err := c.db.Exec(`DELETE FROM items WHERE feed_url = $1`, feedURL)
+ if err == nil {
+ result.ItemsDeleted += deleted
+ }
+ }
+
+ // Delete all feeds from this domain
+ feedsDeleted, err := c.db.Exec(`DELETE FROM feeds WHERE source_host = $1`, host)
+ if err != nil {
+ result.Error = fmt.Sprintf("failed to delete feeds: %v", err)
+ return result
+ }
+ result.FeedsAffected = feedsDeleted
+
+ // Update domain status to drop
+ _, err = c.db.Exec(`UPDATE domains SET status = 'drop' WHERE host = $1`, host)
+ if err != nil {
+ result.Error = fmt.Sprintf("failed to update domain status: %v", err)
+ return result
+ }
+
+ result.Success = true
+ return result
+}
+
+// handleAPIUndenyDomain removes skip status from a domain (restores accounts)
func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request) {
host := r.URL.Query().Get("host")
if host == "" {
@@ -745,24 +929,68 @@ func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request)
return
}
- // Update domain status back to pass
- _, err := c.db.Exec(`UPDATE domains SET status = 'pass' WHERE host = $1 AND status = 'skip'`, host)
+ // Verify domain is currently skipped
+ var status string
+ err := c.db.QueryRow(`SELECT status FROM domains WHERE host = $1`, host).Scan(&status)
if err != nil {
- http.Error(w, err.Error(), http.StatusInternalServerError)
+ http.Error(w, "domain not found", http.StatusNotFound)
+ return
+ }
+ if status != "skip" {
+ http.Error(w, "domain is not skipped", http.StatusBadRequest)
return
}
- // Restore feeds to hold status and active
- feedsRestored, err := c.db.Exec(`UPDATE feeds SET publish_status = 'hold', status = 'active' WHERE source_host = $1 AND status = 'dead'`, host)
- if err != nil {
- http.Error(w, err.Error(), http.StatusInternalServerError)
+ result := c.restoreDomain(host)
+ if result.Error != "" {
+ http.Error(w, result.Error, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
- json.NewEncoder(w).Encode(map[string]interface{}{
- "success": true,
- "host": host,
- "feeds_restored": feedsRestored,
- })
+ json.NewEncoder(w).Encode(result)
+}
+
+// restoreDomain removes skip status and restores PDS accounts
+func (c *Crawler) restoreDomain(host string) DomainActionResult {
+ result := DomainActionResult{Host: host, Action: "restore"}
+
+ pdsHost, pdsAdminPassword := getPDSCredentials()
+ dids := c.getDomainDIDs(host)
+
+ // Restore PDS accounts (remove takedown)
+ if pdsHost != "" && pdsAdminPassword != "" && len(dids) > 0 {
+ publisher := NewPublisher(pdsHost)
+ for _, did := range dids {
+ if err := publisher.RestoreAccount(pdsAdminPassword, did); err != nil {
+ result.AccountErrors = append(result.AccountErrors, fmt.Sprintf("%s: %v", did, err))
+ } else {
+ result.AccountsAffected++
+ }
+ }
+ }
+
+ // Restore feeds to active status
+ feedsAffected, err := c.db.Exec(`
+ UPDATE feeds SET status = 'active', publish_status = 'pass'
+ WHERE source_host = $1
+ `, host)
+ if err != nil {
+ result.Error = fmt.Sprintf("failed to update feeds: %v", err)
+ return result
+ }
+ result.FeedsAffected = feedsAffected
+
+ // Update domain status back to pass
+ _, err = c.db.Exec(`
+ UPDATE domains SET status = 'pass', last_error = NULL
+ WHERE host = $1
+ `, host)
+ if err != nil {
+ result.Error = fmt.Sprintf("failed to update domain status: %v", err)
+ return result
+ }
+
+ result.Success = true
+ return result
}
diff --git a/domain.go b/domain.go
index 0c044d2..a15a907 100644
--- a/domain.go
+++ b/domain.go
@@ -230,7 +230,7 @@ func (c *Crawler) ImportTestDomains(domains []string) {
for _, host := range domains {
_, err := c.db.Exec(`
INSERT INTO domains (host, status, discovered_at, tld)
- VALUES ($1, 'hold', $2, $3)
+ VALUES ($1, 'pass', $2, $3)
ON CONFLICT(host) DO NOTHING
`, host, now, getTLD(host))
if err != nil {
@@ -241,7 +241,7 @@ func (c *Crawler) ImportTestDomains(domains []string) {
}
}
-// ImportDomainsFromFile reads a vertices file and stores new domains as "hold"
+// ImportDomainsFromFile reads a vertices file and stores new domains as "pass"
func (c *Crawler) ImportDomainsFromFile(filename string, limit int) (imported int, skipped int, err error) {
file, err := os.Open(filename)
if err != nil {
@@ -328,7 +328,7 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
// Build rows for copy, applying auto-skip for spam patterns
rows := make([][]interface{}, len(domains))
for i, d := range domains {
- status := "hold"
+ status := "pass"
if shouldAutoSkipDomain(d.host) {
status = "skip"
}
@@ -347,7 +347,7 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
if err != nil {
// Fall back to individual inserts with ON CONFLICT
for _, d := range domains {
- status := "hold"
+ status := "pass"
if shouldAutoSkipDomain(d.host) {
status = "skip"
}
@@ -436,7 +436,7 @@ func (c *Crawler) parseAndStoreDomains(reader io.Reader, limit int) (imported in
// Insert with ON CONFLICT, applying auto-skip for spam patterns
for _, d := range domains {
- status := "hold"
+ status := "pass"
if shouldAutoSkipDomain(d.host) {
status = "skip"
}
diff --git a/pds_records.go b/pds_records.go
index 5a4f62d..97adae7 100644
--- a/pds_records.go
+++ b/pds_records.go
@@ -270,3 +270,80 @@ func (p *Publisher) DeleteAccount(adminPassword, did string) error {
return nil
}
+
+// TakedownAccount applies a takedown to an account (hides content, preserves data)
+func (p *Publisher) TakedownAccount(adminPassword, did, reason string) error {
+ payload := map[string]interface{}{
+ "subject": map[string]interface{}{
+ "$type": "com.atproto.admin.defs#repoRef",
+ "did": did,
+ },
+ "takedown": map[string]interface{}{
+ "applied": true,
+ "ref": reason,
+ },
+ }
+
+ body, err := json.Marshal(payload)
+ if err != nil {
+ return err
+ }
+
+ req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body))
+ if err != nil {
+ return err
+ }
+ req.Header.Set("Content-Type", "application/json")
+ req.SetBasicAuth("admin", adminPassword)
+
+ resp, err := p.httpClient.Do(req)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ respBody, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("takedown account failed: %s - %s", resp.Status, string(respBody))
+ }
+
+ return nil
+}
+
+// RestoreAccount removes a takedown from an account (makes content visible again)
+func (p *Publisher) RestoreAccount(adminPassword, did string) error {
+ payload := map[string]interface{}{
+ "subject": map[string]interface{}{
+ "$type": "com.atproto.admin.defs#repoRef",
+ "did": did,
+ },
+ "takedown": map[string]interface{}{
+ "applied": false,
+ },
+ }
+
+ body, err := json.Marshal(payload)
+ if err != nil {
+ return err
+ }
+
+ req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body))
+ if err != nil {
+ return err
+ }
+ req.Header.Set("Content-Type", "application/json")
+ req.SetBasicAuth("admin", adminPassword)
+
+ resp, err := p.httpClient.Do(req)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ respBody, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("restore account failed: %s - %s", resp.Status, string(respBody))
+ }
+
+ return nil
+}
diff --git a/routes.go b/routes.go
index bb63414..ee4beb3 100644
--- a/routes.go
+++ b/routes.go
@@ -142,6 +142,9 @@ func (c *Crawler) StartDashboard(addr string) error {
http.HandleFunc("/api/undenyDomain", func(w http.ResponseWriter, r *http.Request) {
c.handleAPIUndenyDomain(w, r)
})
+ http.HandleFunc("/api/dropDomain", func(w http.ResponseWriter, r *http.Request) {
+ c.handleAPIDropDomain(w, r)
+ })
http.HandleFunc("/api/tldStats", func(w http.ResponseWriter, r *http.Request) {
c.handleAPITLDStats(w, r)
})
diff --git a/static/dashboard.js b/static/dashboard.js
index 5e18491..14b0205 100644
--- a/static/dashboard.js
+++ b/static/dashboard.js
@@ -108,6 +108,14 @@ function initDashboard() {
// External link
html += `↗`;
+ // Drop button (only for skipped domains)
+ if (status === 'skip') {
+ html += ``;
+ }
+
html += '';
// Feeds under this domain
@@ -270,6 +278,43 @@ function initDashboard() {
row.addEventListener('mouseenter', () => row.style.background = '#1a1a1a');
row.addEventListener('mouseleave', () => row.style.background = 'transparent');
+ // Drop button handler (for skipped domains)
+ const dropBtn = row.querySelector('.drop-btn');
+ if (dropBtn) {
+ dropBtn.addEventListener('click', async (e) => {
+ e.stopPropagation();
+ const host = dropBtn.dataset.host;
+ if (!confirm(`Permanently delete all data for ${host}?\n\nThis will:\n- Delete all PDS accounts\n- Delete all feed items\n- Delete all feeds\n\nThis cannot be undone.`)) {
+ return;
+ }
+ dropBtn.disabled = true;
+ dropBtn.textContent = '...';
+ try {
+ const resp = await fetch(`/api/dropDomain?host=${encodeURIComponent(host)}`);
+ if (resp.ok) {
+ const result = await resp.json();
+ // Update status to "drop" visually
+ block.dataset.status = 'drop';
+ const statusGroup = row.querySelector('.status-btn-group');
+ if (statusGroup) {
+ statusGroup.innerHTML = 'dropped';
+ }
+ dropBtn.remove();
+ console.log('Drop result:', result);
+ } else {
+ alert('Drop failed: ' + await resp.text());
+ dropBtn.disabled = false;
+ dropBtn.textContent = 'drop';
+ }
+ } catch (err) {
+ console.error('Drop failed:', err);
+ alert('Drop failed: ' + err.message);
+ dropBtn.disabled = false;
+ dropBtn.textContent = 'drop';
+ }
+ });
+ }
+
// Handle inline feed clicks - toggle detail
block.querySelectorAll('.inline-feed-block').forEach(feedBlock => {
const title = feedBlock.querySelector('.feed-title');