Refactor large Go files into focused modules
Split dashboard.go (3,528 lines) into: - routes.go: HTTP route registration - api_domains.go: Domain API handlers - api_feeds.go: Feed API handlers - api_publish.go: Publishing API handlers - api_search.go: Search API handlers - templates.go: HTML templates - dashboard.go: Stats functions only (235 lines) Split publisher.go (1,502 lines) into: - pds_auth.go: Authentication and account management - pds_records.go: Record operations (upload, update, delete) - handle.go: Handle derivation from feed URLs - image.go: Image processing and favicon fetching - publisher.go: Core types and PublishItem (439 lines) Split feed.go (1,137 lines) into: - item.go: Item struct and DB operations - feed_check.go: Feed checking and processing - feed.go: Feed struct and DB operations (565 lines) Also includes domain import batch size increase (1k -> 100k). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+764
@@ -0,0 +1,764 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
)
|
||||
|
||||
func (c *Crawler) handleAPIAllDomains(w http.ResponseWriter, r *http.Request) {
|
||||
offset := 0
|
||||
limit := 100
|
||||
if o := r.URL.Query().Get("offset"); o != "" {
|
||||
fmt.Sscanf(o, "%d", &offset)
|
||||
}
|
||||
if l := r.URL.Query().Get("limit"); l != "" {
|
||||
fmt.Sscanf(l, "%d", &limit)
|
||||
if limit > 100 {
|
||||
limit = 100
|
||||
}
|
||||
}
|
||||
|
||||
// Serve from cache (updated once per minute in background)
|
||||
c.statsMu.RLock()
|
||||
cached := c.cachedAllDomains
|
||||
c.statsMu.RUnlock()
|
||||
|
||||
var domains []DomainStat
|
||||
if cached != nil && offset < len(cached) {
|
||||
end := offset + limit
|
||||
if end > len(cached) {
|
||||
end = len(cached)
|
||||
}
|
||||
domains = cached[offset:end]
|
||||
}
|
||||
if domains == nil {
|
||||
domains = []DomainStat{}
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(domains)
|
||||
}
|
||||
|
||||
// handleAPIDomains lists domains with optional status filter, including their feeds
|
||||
func (c *Crawler) handleAPIDomains(w http.ResponseWriter, r *http.Request) {
|
||||
status := r.URL.Query().Get("status")
|
||||
hasFeeds := r.URL.Query().Get("has_feeds") == "true"
|
||||
limit := 100
|
||||
offset := 0
|
||||
if l := r.URL.Query().Get("limit"); l != "" {
|
||||
fmt.Sscanf(l, "%d", &limit)
|
||||
if limit > 500 {
|
||||
limit = 500
|
||||
}
|
||||
}
|
||||
if o := r.URL.Query().Get("offset"); o != "" {
|
||||
fmt.Sscanf(o, "%d", &offset)
|
||||
}
|
||||
|
||||
// First get domains
|
||||
var rows pgx.Rows
|
||||
var err error
|
||||
if hasFeeds {
|
||||
// Only domains with feeds
|
||||
if status != "" {
|
||||
rows, err = c.db.Query(`
|
||||
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
|
||||
FROM domains d
|
||||
INNER JOIN (
|
||||
SELECT source_host, COUNT(*) as feed_count
|
||||
FROM feeds
|
||||
GROUP BY source_host
|
||||
) f ON d.host = f.source_host
|
||||
WHERE d.status = $1
|
||||
ORDER BY d.tld ASC, d.host ASC
|
||||
LIMIT $2 OFFSET $3
|
||||
`, status, limit, offset)
|
||||
} else {
|
||||
rows, err = c.db.Query(`
|
||||
SELECT d.host, d.tld, d.status, d.last_error, f.feed_count
|
||||
FROM domains d
|
||||
INNER JOIN (
|
||||
SELECT source_host, COUNT(*) as feed_count
|
||||
FROM feeds
|
||||
GROUP BY source_host
|
||||
) f ON d.host = f.source_host
|
||||
ORDER BY d.tld ASC, d.host ASC
|
||||
LIMIT $1 OFFSET $2
|
||||
`, limit, offset)
|
||||
}
|
||||
} else if status != "" {
|
||||
rows, err = c.db.Query(`
|
||||
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
|
||||
FROM domains d
|
||||
LEFT JOIN (
|
||||
SELECT source_host, COUNT(*) as feed_count
|
||||
FROM feeds
|
||||
GROUP BY source_host
|
||||
) f ON d.host = f.source_host
|
||||
WHERE d.status = $1
|
||||
ORDER BY d.tld ASC, d.host ASC
|
||||
LIMIT $2 OFFSET $3
|
||||
`, status, limit, offset)
|
||||
} else {
|
||||
rows, err = c.db.Query(`
|
||||
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
|
||||
FROM domains d
|
||||
LEFT JOIN (
|
||||
SELECT source_host, COUNT(*) as feed_count
|
||||
FROM feeds
|
||||
GROUP BY source_host
|
||||
) f ON d.host = f.source_host
|
||||
ORDER BY d.tld ASC, d.host ASC
|
||||
LIMIT $1 OFFSET $2
|
||||
`, limit, offset)
|
||||
}
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type FeedInfo struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
PublishStatus string `json:"publish_status,omitempty"`
|
||||
}
|
||||
|
||||
type DomainInfo struct {
|
||||
Host string `json:"host"`
|
||||
TLD string `json:"tld"`
|
||||
Status string `json:"status"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
FeedCount int `json:"feed_count"`
|
||||
Feeds []FeedInfo `json:"feeds,omitempty"`
|
||||
}
|
||||
|
||||
var domains []DomainInfo
|
||||
var hosts []string
|
||||
for rows.Next() {
|
||||
var d DomainInfo
|
||||
var tld, lastError *string
|
||||
if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil {
|
||||
continue
|
||||
}
|
||||
d.TLD = StringValue(tld)
|
||||
d.LastError = StringValue(lastError)
|
||||
domains = append(domains, d)
|
||||
hosts = append(hosts, d.Host)
|
||||
}
|
||||
|
||||
// Now get feeds for these domains
|
||||
if len(hosts) > 0 {
|
||||
feedRows, err := c.db.Query(`
|
||||
SELECT source_host, url, title, type, status, publish_status
|
||||
FROM feeds
|
||||
WHERE source_host = ANY($1)
|
||||
ORDER BY source_host, url
|
||||
`, hosts)
|
||||
if err == nil {
|
||||
defer feedRows.Close()
|
||||
feedsByHost := make(map[string][]FeedInfo)
|
||||
for feedRows.Next() {
|
||||
var host string
|
||||
var f FeedInfo
|
||||
var title, feedType, status, publishStatus *string
|
||||
if err := feedRows.Scan(&host, &f.URL, &title, &feedType, &status, &publishStatus); err != nil {
|
||||
continue
|
||||
}
|
||||
f.Title = StringValue(title)
|
||||
f.Type = StringValue(feedType)
|
||||
f.Status = StringValue(status)
|
||||
f.PublishStatus = StringValue(publishStatus)
|
||||
feedsByHost[host] = append(feedsByHost[host], f)
|
||||
}
|
||||
// Attach feeds to domains
|
||||
for i := range domains {
|
||||
if feeds, ok := feedsByHost[domains[i].Host]; ok {
|
||||
domains[i].Feeds = feeds
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(domains)
|
||||
}
|
||||
|
||||
func (c *Crawler) handleAPIDomainsByStatus(w http.ResponseWriter, r *http.Request) {
|
||||
status := r.URL.Query().Get("status")
|
||||
if status == "" {
|
||||
http.Error(w, "status parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
limit := 100
|
||||
offset := 0
|
||||
if l := r.URL.Query().Get("limit"); l != "" {
|
||||
fmt.Sscanf(l, "%d", &limit)
|
||||
if limit > 500 {
|
||||
limit = 500
|
||||
}
|
||||
}
|
||||
if o := r.URL.Query().Get("offset"); o != "" {
|
||||
fmt.Sscanf(o, "%d", &offset)
|
||||
}
|
||||
|
||||
rows, err := c.db.Query(`
|
||||
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
|
||||
FROM domains d
|
||||
LEFT JOIN (
|
||||
SELECT source_host, COUNT(*) as feed_count
|
||||
FROM feeds
|
||||
GROUP BY source_host
|
||||
) f ON d.host = f.source_host
|
||||
WHERE d.status = $1
|
||||
ORDER BY d.tld ASC, d.host ASC
|
||||
LIMIT $2 OFFSET $3
|
||||
`, status, limit, offset)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type DomainInfo struct {
|
||||
Host string `json:"host"`
|
||||
TLD string `json:"tld"`
|
||||
Status string `json:"status"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
FeedCount int `json:"feed_count"`
|
||||
}
|
||||
|
||||
var domains []DomainInfo
|
||||
for rows.Next() {
|
||||
var d DomainInfo
|
||||
var tld, lastError *string
|
||||
if err := rows.Scan(&d.Host, &tld, &d.Status, &lastError, &d.FeedCount); err != nil {
|
||||
continue
|
||||
}
|
||||
d.TLD = StringValue(tld)
|
||||
d.LastError = StringValue(lastError)
|
||||
domains = append(domains, d)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(domains)
|
||||
}
|
||||
|
||||
func (c *Crawler) handleAPIDomainFeeds(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
http.Error(w, "host parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
limit := 100
|
||||
offset := 0
|
||||
if l := r.URL.Query().Get("limit"); l != "" {
|
||||
fmt.Sscanf(l, "%d", &limit)
|
||||
if limit > 500 {
|
||||
limit = 500
|
||||
}
|
||||
}
|
||||
if o := r.URL.Query().Get("offset"); o != "" {
|
||||
fmt.Sscanf(o, "%d", &offset)
|
||||
}
|
||||
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, title, type, status, error_count, last_error, item_count, publish_status, language
|
||||
FROM feeds
|
||||
WHERE source_host = $1
|
||||
ORDER BY url ASC
|
||||
LIMIT $2 OFFSET $3
|
||||
`, host, limit, offset)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type FeedInfo struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Type string `json:"type"`
|
||||
Status string `json:"status,omitempty"`
|
||||
ErrorCount int `json:"error_count,omitempty"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
ItemCount int `json:"item_count,omitempty"`
|
||||
PublishStatus string `json:"publish_status,omitempty"`
|
||||
Language string `json:"language,omitempty"`
|
||||
}
|
||||
|
||||
var feeds []FeedInfo
|
||||
for rows.Next() {
|
||||
var f FeedInfo
|
||||
var title, status, lastError, publishStatus, language *string
|
||||
var errorCount, itemCount *int
|
||||
if err := rows.Scan(&f.URL, &title, &f.Type, &status, &errorCount, &lastError, &itemCount, &publishStatus, &language); err != nil {
|
||||
continue
|
||||
}
|
||||
f.Title = StringValue(title)
|
||||
f.Status = StringValue(status)
|
||||
f.LastError = StringValue(lastError)
|
||||
f.PublishStatus = StringValue(publishStatus)
|
||||
f.Language = StringValue(language)
|
||||
if errorCount != nil {
|
||||
f.ErrorCount = *errorCount
|
||||
}
|
||||
if itemCount != nil {
|
||||
f.ItemCount = *itemCount
|
||||
}
|
||||
feeds = append(feeds, f)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(feeds)
|
||||
}
|
||||
|
||||
// handleAPISetDomainStatus sets the status for a domain
|
||||
// status must be 'hold', 'pass', 'skip', or 'fail'
|
||||
func (c *Crawler) handleAPISetDomainStatus(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
status := r.URL.Query().Get("status")
|
||||
|
||||
if host == "" {
|
||||
http.Error(w, "host parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if status != "hold" && status != "pass" && status != "skip" && status != "fail" {
|
||||
http.Error(w, "status must be 'hold', 'pass', 'skip', or 'fail'", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
host = normalizeHost(host)
|
||||
|
||||
// When setting to pass, clear any last_error
|
||||
var err error
|
||||
if status == "pass" {
|
||||
_, err = c.db.Exec(`
|
||||
UPDATE domains SET status = $1, last_error = NULL
|
||||
WHERE host = $2
|
||||
`, status, host)
|
||||
} else {
|
||||
_, err = c.db.Exec(`
|
||||
UPDATE domains SET status = $1
|
||||
WHERE host = $2
|
||||
`, status, host)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{
|
||||
"host": host,
|
||||
"status": status,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *Crawler) handleAPIRevisitDomain(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
http.Error(w, "host parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
_, err := c.db.Exec(`
|
||||
UPDATE domains SET status = 'pass', last_checked_at = NULL, last_crawled_at = NULL, last_error = NULL
|
||||
WHERE host = $1
|
||||
`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "queued", "host": host})
|
||||
}
|
||||
|
||||
// handleAPIPriorityCrawl immediately crawls a domain (adds it if not exists)
|
||||
func (c *Crawler) handleAPIPriorityCrawl(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
http.Error(w, "host parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
host = normalizeHost(host)
|
||||
|
||||
// Add domain if it doesn't exist, or reset to pass for crawling
|
||||
_, err := c.db.Exec(`
|
||||
INSERT INTO domains (host, status, discovered_at, tld)
|
||||
VALUES ($1, 'pass', NOW(), $2)
|
||||
ON CONFLICT(host) DO UPDATE SET status = 'pass', last_checked_at = NULL, last_crawled_at = NULL, last_error = NULL
|
||||
`, host, getTLD(host))
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Crawl synchronously
|
||||
fmt.Printf("Priority crawl: %s\n", host)
|
||||
feedsFound, crawlErr := c.crawlHost(host)
|
||||
|
||||
errStr := ""
|
||||
if crawlErr != nil {
|
||||
errStr = crawlErr.Error()
|
||||
}
|
||||
|
||||
// Mark as crawled
|
||||
c.markDomainCrawled(host, feedsFound, errStr)
|
||||
|
||||
// Get the feeds we found
|
||||
feeds, _ := c.GetFeedsByHost(host)
|
||||
|
||||
type FeedSummary struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Type string `json:"type"`
|
||||
Category string `json:"category"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
var feedSummaries []FeedSummary
|
||||
for _, f := range feeds {
|
||||
feedSummaries = append(feedSummaries, FeedSummary{
|
||||
URL: f.URL,
|
||||
Title: f.Title,
|
||||
Type: f.Type,
|
||||
Category: f.Category,
|
||||
Status: f.Status,
|
||||
})
|
||||
}
|
||||
|
||||
result := map[string]interface{}{
|
||||
"host": host,
|
||||
"feeds_found": feedsFound,
|
||||
"feeds": feedSummaries,
|
||||
}
|
||||
if crawlErr != nil {
|
||||
result["error"] = crawlErr.Error()
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
// handleAPIFilter handles flexible filtering with stackable parameters
|
||||
func (c *Crawler) handleAPIFilter(w http.ResponseWriter, r *http.Request) {
|
||||
tld := r.URL.Query().Get("tld")
|
||||
domain := r.URL.Query().Get("domain")
|
||||
feedStatus := r.URL.Query().Get("feedStatus")
|
||||
domainStatus := r.URL.Query().Get("domainStatus")
|
||||
languages := r.URL.Query().Get("languages") // comma-separated list
|
||||
show := r.URL.Query().Get("show") // "feeds" or "domains"
|
||||
sort := r.URL.Query().Get("sort") // "alpha" or "feeds"
|
||||
|
||||
limit := 100
|
||||
offset := 0
|
||||
if l := r.URL.Query().Get("limit"); l != "" {
|
||||
fmt.Sscanf(l, "%d", &limit)
|
||||
if limit > 500 {
|
||||
limit = 500
|
||||
}
|
||||
}
|
||||
if o := r.URL.Query().Get("offset"); o != "" {
|
||||
fmt.Sscanf(o, "%d", &offset)
|
||||
}
|
||||
|
||||
// Parse languages into slice
|
||||
var langList []string
|
||||
if languages != "" {
|
||||
for _, lang := range strings.Split(languages, ",") {
|
||||
lang = strings.TrimSpace(lang)
|
||||
if lang != "" {
|
||||
langList = append(langList, lang)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine what to show based on filters
|
||||
if show == "" {
|
||||
if feedStatus != "" || domain != "" || len(langList) > 0 {
|
||||
show = "feeds"
|
||||
} else {
|
||||
show = "domains"
|
||||
}
|
||||
}
|
||||
|
||||
if show == "feeds" {
|
||||
c.filterFeeds(w, tld, domain, feedStatus, langList, limit, offset)
|
||||
} else {
|
||||
c.filterDomains(w, tld, domainStatus, sort, limit, offset)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Crawler) filterDomains(w http.ResponseWriter, tld, status, sort string, limit, offset int) {
|
||||
var args []interface{}
|
||||
argNum := 1
|
||||
query := `
|
||||
SELECT d.host, d.tld, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
|
||||
FROM domains d
|
||||
LEFT JOIN (
|
||||
SELECT source_host, COUNT(*) as feed_count
|
||||
FROM feeds
|
||||
GROUP BY source_host
|
||||
) f ON d.host = f.source_host
|
||||
WHERE 1=1`
|
||||
|
||||
if tld != "" {
|
||||
query += fmt.Sprintf(" AND d.tld = $%d", argNum)
|
||||
args = append(args, tld)
|
||||
argNum++
|
||||
}
|
||||
if status != "" {
|
||||
query += fmt.Sprintf(" AND d.status = $%d", argNum)
|
||||
args = append(args, status)
|
||||
argNum++
|
||||
}
|
||||
|
||||
// Sort by feed count descending or alphabetically
|
||||
if sort == "feeds" {
|
||||
query += fmt.Sprintf(" ORDER BY feed_count DESC, d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1)
|
||||
} else {
|
||||
query += fmt.Sprintf(" ORDER BY d.tld ASC, d.host ASC LIMIT $%d OFFSET $%d", argNum, argNum+1)
|
||||
}
|
||||
args = append(args, limit, offset)
|
||||
|
||||
rows, err := c.db.Query(query, args...)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type DomainInfo struct {
|
||||
Host string `json:"host"`
|
||||
TLD string `json:"tld"`
|
||||
Status string `json:"status"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
FeedCount int `json:"feed_count"`
|
||||
}
|
||||
|
||||
var domains []DomainInfo
|
||||
for rows.Next() {
|
||||
var d DomainInfo
|
||||
var tldVal, lastError *string
|
||||
if err := rows.Scan(&d.Host, &tldVal, &d.Status, &lastError, &d.FeedCount); err != nil {
|
||||
continue
|
||||
}
|
||||
d.TLD = StringValue(tldVal)
|
||||
d.LastError = StringValue(lastError)
|
||||
domains = append(domains, d)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"type": "domains",
|
||||
"data": domains,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *Crawler) handleAPITLDDomains(w http.ResponseWriter, r *http.Request) {
|
||||
tld := r.URL.Query().Get("tld")
|
||||
if tld == "" {
|
||||
http.Error(w, "tld parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
limit := 100
|
||||
offset := 0
|
||||
if l := r.URL.Query().Get("limit"); l != "" {
|
||||
fmt.Sscanf(l, "%d", &limit)
|
||||
if limit > 500 {
|
||||
limit = 500
|
||||
}
|
||||
}
|
||||
if o := r.URL.Query().Get("offset"); o != "" {
|
||||
fmt.Sscanf(o, "%d", &offset)
|
||||
}
|
||||
|
||||
rows, err := c.db.Query(`
|
||||
SELECT d.host, d.status, d.last_error, COALESCE(f.feed_count, 0) as feed_count
|
||||
FROM domains d
|
||||
LEFT JOIN (
|
||||
SELECT source_host, COUNT(*) as feed_count
|
||||
FROM feeds
|
||||
GROUP BY source_host
|
||||
) f ON d.host = f.source_host
|
||||
WHERE d.tld = $1
|
||||
ORDER BY d.tld ASC, d.host ASC
|
||||
LIMIT $2 OFFSET $3
|
||||
`, tld, limit, offset)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type DomainInfo struct {
|
||||
Host string `json:"host"`
|
||||
Status string `json:"status"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
FeedCount int `json:"feed_count"`
|
||||
}
|
||||
|
||||
var domains []DomainInfo
|
||||
for rows.Next() {
|
||||
var d DomainInfo
|
||||
var lastError *string
|
||||
if err := rows.Scan(&d.Host, &d.Status, &lastError, &d.FeedCount); err != nil {
|
||||
continue
|
||||
}
|
||||
d.LastError = StringValue(lastError)
|
||||
domains = append(domains, d)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(domains)
|
||||
}
|
||||
|
||||
func (c *Crawler) handleAPITLDs(w http.ResponseWriter, r *http.Request) {
|
||||
hasFeeds := r.URL.Query().Get("has_feeds") == "true"
|
||||
|
||||
var rows pgx.Rows
|
||||
var err error
|
||||
|
||||
if hasFeeds {
|
||||
// Only TLDs that have domains with feeds
|
||||
rows, err = c.db.Query(`
|
||||
SELECT DISTINCT d.tld, COUNT(DISTINCT d.host) as domain_count
|
||||
FROM domains d
|
||||
INNER JOIN feeds f ON d.host = f.source_host
|
||||
WHERE d.tld IS NOT NULL AND d.tld != ''
|
||||
GROUP BY d.tld
|
||||
ORDER BY d.tld ASC
|
||||
`)
|
||||
} else {
|
||||
// All TLDs
|
||||
rows, err = c.db.Query(`
|
||||
SELECT tld, COUNT(*) as domain_count
|
||||
FROM domains
|
||||
WHERE tld IS NOT NULL AND tld != ''
|
||||
GROUP BY tld
|
||||
ORDER BY tld ASC
|
||||
`)
|
||||
}
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type TLDInfo struct {
|
||||
TLD string `json:"tld"`
|
||||
DomainCount int `json:"domain_count"`
|
||||
}
|
||||
|
||||
var tlds []TLDInfo
|
||||
for rows.Next() {
|
||||
var t TLDInfo
|
||||
if err := rows.Scan(&t.TLD, &t.DomainCount); err != nil {
|
||||
continue
|
||||
}
|
||||
tlds = append(tlds, t)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(tlds)
|
||||
}
|
||||
|
||||
func (c *Crawler) handleAPITLDStats(w http.ResponseWriter, r *http.Request) {
|
||||
tld := r.URL.Query().Get("tld")
|
||||
if tld == "" {
|
||||
http.Error(w, "tld parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
var domainCount, feedCount int
|
||||
err := c.db.QueryRow(`SELECT COUNT(*) FROM domains WHERE tld = $1`, tld).Scan(&domainCount)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
err = c.db.QueryRow(`SELECT COUNT(*) FROM feeds WHERE tld = $1`, tld).Scan(&feedCount)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"tld": tld,
|
||||
"domain_count": domainCount,
|
||||
"feed_count": feedCount,
|
||||
})
|
||||
}
|
||||
|
||||
// handleAPIDenyDomain skips a domain and all its feeds
|
||||
func (c *Crawler) handleAPIDenyDomain(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
http.Error(w, "host parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Update domain status to skip
|
||||
_, err := c.db.Exec(`UPDATE domains SET status = 'skip' WHERE host = $1`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Skip all feeds from this domain
|
||||
feedsAffected, err := c.db.Exec(`UPDATE feeds SET publish_status = 'skip', status = 'dead' WHERE source_host = $1`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"success": true,
|
||||
"host": host,
|
||||
"feeds_skipped": feedsAffected,
|
||||
})
|
||||
}
|
||||
|
||||
// handleAPIUndenyDomain removes skip status from a domain
|
||||
func (c *Crawler) handleAPIUndenyDomain(w http.ResponseWriter, r *http.Request) {
|
||||
host := r.URL.Query().Get("host")
|
||||
if host == "" {
|
||||
http.Error(w, "host parameter required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Update domain status back to pass
|
||||
_, err := c.db.Exec(`UPDATE domains SET status = 'pass' WHERE host = $1 AND status = 'skip'`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Restore feeds to hold status and active
|
||||
feedsRestored, err := c.db.Exec(`UPDATE feeds SET publish_status = 'hold', status = 'active' WHERE source_host = $1 AND status = 'dead'`, host)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"success": true,
|
||||
"host": host,
|
||||
"feeds_restored": feedsRestored,
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user