Restore working codebase with all methods

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-02-01 19:08:53 -05:00
parent 211812363a
commit 8a9001c02c
18 changed files with 2357 additions and 331 deletions
+1
View File
@@ -6,3 +6,4 @@ feeds/
.gitignore .gitignore
.claude .claude
CLAUDE.md CLAUDE.md
.launch.sh
+13 -7
View File
@@ -2,7 +2,7 @@
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
> **Note:** Always run applications in containers via `docker compose up -d --build` when possible. This ensures proper networking between services (database, traefik, etc.) and matches the production environment. > **IMPORTANT:** Always use `./.launch.sh` to deploy changes. This script updates version numbers in static files (CSS/JS cache busting) before running `docker compose up -d --build`. Never use `docker compose` directly.
## Build & Run ## Build & Run
@@ -84,17 +84,23 @@ PostgreSQL with pgx driver, using connection pooling:
Column naming: snake_case (e.g., `source_host`, `pub_date`, `item_count`) Column naming: snake_case (e.g., `source_host`, `pub_date`, `item_count`)
### Crawl Logic ### Processing Terminology
1. Domains import as `pass` by default (auto-crawled) - **domain_check**: DNS lookup to verify domain is live
2. Crawl loop picks up domains where `last_crawled_at IS NULL` - **feed_crawl**: Crawl a live domain to discover RSS/Atom feeds
3. Full recursive crawl (HTTPS, fallback HTTP) up to MaxDepth=10, MaxPagesPerHost=10 - **feed_check**: Check a known feed for new items
### Domain Processing Flow
1. Domains import as `pass` by default
2. Domain loop runs **domain_check** (DNS lookup) for unchecked domains
3. Domain loop runs **feed_crawl** for checked domains (recursive crawl up to MaxDepth=10, MaxPagesPerHost=10)
4. Extract `<link rel="alternate">` and anchor hrefs containing rss/atom/feed 4. Extract `<link rel="alternate">` and anchor hrefs containing rss/atom/feed
5. Parse discovered feeds for metadata, save with next_crawl_at 5. Parse discovered feeds for metadata, save with `next_check_at`
### Feed Checking ### Feed Checking
Uses conditional HTTP (ETag, If-Modified-Since). Adaptive backoff: base 100s + 100s per consecutive no-change. Respects RSS `<ttl>` and Syndication namespace hints. **feed_check** uses conditional HTTP (ETag, If-Modified-Since). Adaptive backoff: base 100s + 100s per consecutive no-change. Respects RSS `<ttl>` and Syndication namespace hints.
### Publishing ### Publishing
+4 -4
View File
@@ -1,9 +1,9 @@
FROM golang:1.24-alpine AS builder FROM golang:latest AS builder
WORKDIR /app WORKDIR /app
# Install build dependencies # Install build dependencies
RUN apk add --no-cache gcc musl-dev RUN apt-get update && apt-get install -y gcc && rm -rf /var/lib/apt/lists/*
# Copy go mod files first for layer caching # Copy go mod files first for layer caching
COPY go.mod go.sum ./ COPY go.mod go.sum ./
@@ -17,12 +17,12 @@ COPY static/ ./static/
RUN CGO_ENABLED=1 go build -o 1440.news . RUN CGO_ENABLED=1 go build -o 1440.news .
# Runtime stage # Runtime stage
FROM alpine:latest FROM ubuntu:latest
WORKDIR /app WORKDIR /app
# Install runtime dependencies # Install runtime dependencies
RUN apk add --no-cache ca-certificates tzdata RUN apt-get update && apt-get install -y ca-certificates tzdata && rm -rf /var/lib/apt/lists/*
# Copy binary from builder # Copy binary from builder
COPY --from=builder /app/1440.news . COPY --from=builder /app/1440.news .
+9 -9
View File
@@ -26,8 +26,8 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
Language string `json:"language,omitempty"` Language string `json:"language,omitempty"`
SiteURL string `json:"siteUrl,omitempty"` SiteURL string `json:"siteUrl,omitempty"`
DiscoveredAt string `json:"discoveredAt,omitempty"` DiscoveredAt string `json:"discoveredAt,omitempty"`
LastCrawledAt string `json:"lastCrawledAt,omitempty"` LastCheckedAt string `json:"lastCheckedAt,omitempty"`
NextCrawlAt string `json:"nextCrawlAt,omitempty"` NextCheckAt string `json:"nextCheckAt,omitempty"`
LastBuildDate string `json:"lastBuildDate,omitempty"` LastBuildDate string `json:"lastBuildDate,omitempty"`
Status string `json:"status,omitempty"` Status string `json:"status,omitempty"`
LastError string `json:"lastError,omitempty"` LastError string `json:"lastError,omitempty"`
@@ -40,7 +40,7 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
var f FeedDetails var f FeedDetails
var category, title, description, language, siteUrl *string var category, title, description, language, siteUrl *string
var lastCrawledAt, nextCrawlAt, lastBuildDate *time.Time var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time
var status, lastError *string var status, lastError *string
var oldestItemDate, newestItemDate *time.Time var oldestItemDate, newestItemDate *time.Time
var itemCount *int var itemCount *int
@@ -49,7 +49,7 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
err := c.db.QueryRow(` err := c.db.QueryRow(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, status, last_error,
(SELECT COUNT(*) FROM items WHERE feed_url = feeds.url) as item_count, (SELECT COUNT(*) FROM items WHERE feed_url = feeds.url) as item_count,
oldest_item_date, newest_item_date, oldest_item_date, newest_item_date,
@@ -57,7 +57,7 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
FROM feeds WHERE url = $1 FROM feeds WHERE url = $1
`, feedURL).Scan( `, feedURL).Scan(
&f.URL, &f.Type, &category, &title, &description, &language, &siteUrl, &f.URL, &f.Type, &category, &title, &description, &language, &siteUrl,
&discoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate, &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&status, &lastError, &status, &lastError,
&itemCount, &oldestItemDate, &newestItemDate, &itemCount, &oldestItemDate, &newestItemDate,
&publishStatus, &publishAccount, &publishStatus, &publishAccount,
@@ -78,11 +78,11 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
f.Language = StringValue(language) f.Language = StringValue(language)
f.SiteURL = StringValue(siteUrl) f.SiteURL = StringValue(siteUrl)
f.DiscoveredAt = discoveredAt.Format(time.RFC3339) f.DiscoveredAt = discoveredAt.Format(time.RFC3339)
if lastCrawledAt != nil { if lastCheckedAt != nil {
f.LastCrawledAt = lastCrawledAt.Format(time.RFC3339) f.LastCheckedAt = lastCheckedAt.Format(time.RFC3339)
} }
if nextCrawlAt != nil { if nextCheckAt != nil {
f.NextCrawlAt = nextCrawlAt.Format(time.RFC3339) f.NextCheckAt = nextCheckAt.Format(time.RFC3339)
} }
if lastBuildDate != nil { if lastBuildDate != nil {
f.LastBuildDate = lastBuildDate.Format(time.RFC3339) f.LastBuildDate = lastBuildDate.Format(time.RFC3339)
+30 -27
View File
@@ -4,6 +4,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http" "net/http"
"strings"
"time" "time"
"github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5"
@@ -16,16 +17,16 @@ type SearchResult struct {
} }
type SearchFeed struct { type SearchFeed struct {
URL string `json:"url"` URL string `json:"url"`
Type string `json:"type"` Type string `json:"type"`
Category string `json:"category"` Category string `json:"category"`
Title string `json:"title"` Title string `json:"title"`
Description string `json:"description"` Description string `json:"description"`
Language string `json:"language"` Language string `json:"language"`
SiteURL string `json:"site_url"` SiteURL string `json:"site_url"`
DiscoveredAt string `json:"discovered_at"` DiscoveredAt string `json:"discovered_at"`
LastCrawledAt string `json:"last_crawled_at"` LastCheckedAt string `json:"last_checked_at"`
NextCrawlAt string `json:"next_crawl_at"` NextCheckAt string `json:"next_check_at"`
LastBuildDate string `json:"last_build_date"` LastBuildDate string `json:"last_build_date"`
Status string `json:"status"` Status string `json:"status"`
LastError string `json:"last_error"` LastError string `json:"last_error"`
@@ -76,7 +77,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
var url string var url string
var feedType, category, title, description, language, siteUrl *string var feedType, category, title, description, language, siteUrl *string
var discoveredAt time.Time var discoveredAt time.Time
var lastCrawledAt, nextCrawlAt, lastBuildDate *time.Time var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time
var itemCount *int var itemCount *int
var status, lastError *string var status, lastError *string
var lastErrorAt *time.Time var lastErrorAt *time.Time
@@ -85,7 +86,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
var noUpdate *bool var noUpdate *bool
if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl, if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl,
&discoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate, &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&status, &lastError, &lastErrorAt, &status, &lastError, &lastErrorAt,
&sourceUrl, &sourceHost, &tld, &sourceUrl, &sourceHost, &tld,
&itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil { &itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil {
@@ -110,11 +111,11 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SourceHost: StringValue(sourceHost), SourceHost: StringValue(sourceHost),
TLD: StringValue(tld), TLD: StringValue(tld),
} }
if lastCrawledAt != nil { if lastCheckedAt != nil {
sf.LastCrawledAt = lastCrawledAt.Format(time.RFC3339) sf.LastCheckedAt = lastCheckedAt.Format(time.RFC3339)
} }
if nextCrawlAt != nil { if nextCheckAt != nil {
sf.NextCrawlAt = nextCrawlAt.Format(time.RFC3339) sf.NextCheckAt = nextCheckAt.Format(time.RFC3339)
} }
if lastBuildDate != nil { if lastBuildDate != nil {
sf.LastBuildDate = lastBuildDate.Format(time.RFC3339) sf.LastBuildDate = lastBuildDate.Format(time.RFC3339)
@@ -138,16 +139,18 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
} }
// Search feeds by source_host (LIKE search for domain matching) // Search feeds by source_host (LIKE search for domain matching)
// Use LOWER() to leverage trigram index
lowerPattern := "%" + strings.ToLower(query) + "%"
hostRows, err := c.db.Query(` hostRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update item_count, oldest_item_date, newest_item_date, no_update
FROM feeds FROM feeds
WHERE source_host ILIKE $1 OR url ILIKE $1 WHERE LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1
LIMIT $2 LIMIT $2
`, "%"+query+"%", limit) `, lowerPattern, limit)
if err == nil { if err == nil {
defer hostRows.Close() defer hostRows.Close()
for hostRows.Next() { for hostRows.Next() {
@@ -163,7 +166,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
tsQuery := ToSearchQuery(query) tsQuery := ToSearchQuery(query)
feedRows, err := c.db.Query(` feedRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update item_count, oldest_item_date, newest_item_date, no_update
@@ -228,7 +231,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
// Fetch feed info for this item's feed // Fetch feed info for this item's feed
var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string
var fDiscoveredAt time.Time var fDiscoveredAt time.Time
var fLastCrawledAt, fNextCrawlAt, fLastBuildDate *time.Time var fLastCheckedAt, fNextCheckAt, fLastBuildDate *time.Time
var fItemCount *int var fItemCount *int
var fStatus, fLastError *string var fStatus, fLastError *string
var fLastErrorAt *time.Time var fLastErrorAt *time.Time
@@ -238,13 +241,13 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
c.db.QueryRow(` c.db.QueryRow(`
SELECT type, category, title, description, language, site_url, SELECT type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update item_count, oldest_item_date, newest_item_date, no_update
FROM feeds WHERE url = $1 FROM feeds WHERE url = $1
`, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl, `, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl,
&fDiscoveredAt, &fLastCrawledAt, &fNextCrawlAt, &fLastBuildDate, &fDiscoveredAt, &fLastCheckedAt, &fNextCheckAt, &fLastBuildDate,
&fStatus, &fLastError, &fLastErrorAt, &fStatus, &fLastError, &fLastErrorAt,
&fSourceUrl, &fSourceHost, &fTLD, &fSourceUrl, &fSourceHost, &fTLD,
&fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate) &fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate)
@@ -268,11 +271,11 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SourceHost: StringValue(fSourceHost), SourceHost: StringValue(fSourceHost),
TLD: StringValue(fTLD), TLD: StringValue(fTLD),
} }
if fLastCrawledAt != nil { if fLastCheckedAt != nil {
sf.LastCrawledAt = fLastCrawledAt.Format(time.RFC3339) sf.LastCheckedAt = fLastCheckedAt.Format(time.RFC3339)
} }
if fNextCrawlAt != nil { if fNextCheckAt != nil {
sf.NextCrawlAt = fNextCrawlAt.Format(time.RFC3339) sf.NextCheckAt = fNextCheckAt.Format(time.RFC3339)
} }
if fLastBuildDate != nil { if fLastBuildDate != nil {
sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339) sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339)
+91 -41
View File
@@ -1,9 +1,11 @@
package main package main
import ( import (
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"net"
"net/http" "net/http"
"os" "os"
"strings" "strings"
@@ -15,23 +17,22 @@ import (
) )
type Crawler struct { type Crawler struct {
MaxDepth int MaxDepth int
MaxPagesPerHost int MaxPagesPerHost int
Timeout time.Duration Timeout time.Duration
UserAgent string UserAgent string
visited sync.Map visited sync.Map
feedsMu sync.Mutex feedsMu sync.Mutex
client *http.Client client *http.Client
hostsProcessed int32 domainsCrawled int32 // feed_crawl: domains crawled for feed discovery
feedsChecked int32 domainsChecked int32 // domain_check: domains checked for liveness
startTime time.Time feedsChecked int32 // feed_check: feeds checked for new items
db *DB startTime time.Time
displayedCrawlRate int db *DB
displayedCheckRate int domainsImported int32
domainsImported int32 cachedStats *DashboardStats
cachedStats *DashboardStats cachedAllDomains []DomainStat
cachedAllDomains []DomainStat statsMu sync.RWMutex
statsMu sync.RWMutex
} }
func NewCrawler(connString string) (*Crawler, error) { func NewCrawler(connString string) (*Crawler, error) {
@@ -467,43 +468,92 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
return items, nil return items, nil
} }
// StartCrawlLoop runs the domain crawling loop independently // dnsResolver uses local caching DNS (infra-dns) with fallback to system
func (c *Crawler) StartCrawlLoop() { var dnsResolver = &net.Resolver{
numWorkers := 100 PreferGo: true,
Dial: func(ctx context.Context, network, address string) (net.Conn, error) {
d := net.Dialer{Timeout: 2 * time.Second}
// Try local caching DNS first (CoreDNS on proxy network)
conn, err := d.DialContext(ctx, "udp", "infra-dns:53")
if err == nil {
return conn, nil
}
// Fallback to system DNS
return d.DialContext(ctx, network, address)
},
}
// domainCheck performs a DNS lookup to check if a domain resolves
func (c *Crawler) domainCheck(host string) error {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
_, err := dnsResolver.LookupHost(ctx, host)
return err
}
// StartDomainLoop runs the domain processing loop (domain_check + feed_crawl)
func (c *Crawler) StartDomainLoop() {
numWorkers := 1000
// Buffered channel for domain work // Buffered channel for domain work
workChan := make(chan *Domain, 100) workChan := make(chan *Domain, 1000)
// Start workers // Start workers
for i := 0; i < numWorkers; i++ { for i := 0; i < numWorkers; i++ {
go func() { go func() {
for domain := range workChan { for domain := range workChan {
feedsFound, crawlErr := c.crawlHost(domain.Host) fh := domain.FullHost()
errStr := "" if domain.CrawledAt.Equal(DomainStateUnchecked) {
if crawlErr != nil { // domain_check: DNS lookup for liveness
errStr = crawlErr.Error() err := c.domainCheck(fh)
} errStr := ""
if err := c.markDomainCrawled(domain.Host, feedsFound, errStr); err != nil { if err != nil {
fmt.Printf("Error marking domain %s as crawled: %v\n", domain.Host, err) errStr = err.Error()
}
if err := c.markDomainChecked(domain.Host, domain.TLD, errStr); err != nil {
fmt.Printf("Error marking domain %s as checked: %v\n", fh, err)
}
atomic.AddInt32(&c.domainsChecked, 1)
} else {
// feed_crawl: crawl domain to discover feeds
feedsFound, crawlErr := c.feedCrawl(fh)
errStr := ""
if crawlErr != nil {
errStr = crawlErr.Error()
}
if err := c.markDomainCrawled(domain.Host, domain.TLD, feedsFound, errStr); err != nil {
fmt.Printf("Error marking domain %s as crawled: %v\n", fh, err)
}
atomic.AddInt32(&c.domainsCrawled, 1)
} }
} }
}() }()
} }
const fetchSize = 100 const fetchSize = 1000
for { for {
domains, err := c.GetDomainsToCrawl(fetchSize) domains, err := c.GetDomainsToProcess(fetchSize)
if err != nil { if err != nil {
fmt.Printf("Error fetching domains to crawl: %v\n", err) fmt.Printf("Error fetching domains to process: %v\n", err)
} }
if len(domains) == 0 { if len(domains) == 0 {
c.displayedCrawlRate = 0
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
continue continue
} }
fmt.Printf("%s crawl: %d domains to crawl\n", time.Now().Format("15:04:05"), len(domains)) // Count unchecked vs checked for logging
unchecked := 0
for _, d := range domains {
if d.CrawledAt.Equal(DomainStateUnchecked) {
unchecked++
}
}
checked := len(domains) - unchecked
if unchecked > 0 || checked > 0 {
fmt.Printf("%s domain: %d domain_check, %d feed_crawl\n", time.Now().Format("15:04:05"), unchecked, checked)
}
for _, domain := range domains { for _, domain := range domains {
workChan <- domain workChan <- domain
@@ -513,12 +563,12 @@ func (c *Crawler) StartCrawlLoop() {
} }
} }
// StartCheckLoop runs the feed checking loop independently // StartFeedCheckLoop runs the feed_check loop (checking feeds for new items)
func (c *Crawler) StartCheckLoop() { func (c *Crawler) StartFeedCheckLoop() {
numWorkers := 100 numWorkers := 1000
// Buffered channel for feed work // Buffered channel for feed work
workChan := make(chan *Feed, 100) workChan := make(chan *Feed, 1000)
// Start workers // Start workers
for i := 0; i < numWorkers; i++ { for i := 0; i < numWorkers; i++ {
@@ -537,12 +587,11 @@ func (c *Crawler) StartCheckLoop() {
} }
if len(feeds) == 0 { if len(feeds) == 0 {
c.displayedCheckRate = 0
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
continue continue
} }
fmt.Printf("%s check: %d feeds to check\n", time.Now().Format("15:04:05"), len(feeds)) fmt.Printf("%s feed_check: %d feeds\n", time.Now().Format("15:04:05"), len(feeds))
for _, feed := range feeds { for _, feed := range feeds {
workChan <- feed workChan <- feed
@@ -552,8 +601,9 @@ func (c *Crawler) StartCheckLoop() {
} }
} }
func (c *Crawler) crawlHost(host string) (feedsFound int, err error) { // feedCrawl crawls a domain to discover RSS/Atom feeds
atomic.AddInt32(&c.hostsProcessed, 1) func (c *Crawler) feedCrawl(host string) (feedsFound int, err error) {
atomic.AddInt32(&c.domainsCrawled, 1)
localVisited := make(map[string]bool) localVisited := make(map[string]bool)
pagesVisited := 0 pagesVisited := 0
+55 -22
View File
@@ -12,17 +12,26 @@ type DashboardStats struct {
HoldDomains int `json:"hold_domains"` HoldDomains int `json:"hold_domains"`
PassDomains int `json:"pass_domains"` PassDomains int `json:"pass_domains"`
SkipDomains int `json:"skip_domains"` SkipDomains int `json:"skip_domains"`
DeadDomains int `json:"dead_domains"`
// Feed stats // Feed stats
TotalFeeds int `json:"total_feeds"` TotalFeeds int `json:"total_feeds"`
AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds)
PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing)
SkipFeeds int `json:"skip_feeds"`
HoldFeeds int `json:"hold_feeds"`
DeadFeeds int `json:"dead_feeds"`
EmptyFeeds int `json:"empty_feeds"`
RSSFeeds int `json:"rss_feeds"` RSSFeeds int `json:"rss_feeds"`
AtomFeeds int `json:"atom_feeds"` AtomFeeds int `json:"atom_feeds"`
JSONFeeds int `json:"json_feeds"`
UnknownFeeds int `json:"unknown_feeds"` UnknownFeeds int `json:"unknown_feeds"`
// Crawl progress // Processing rates (per minute)
HostsProcessed int32 `json:"hosts_processed"` DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count
CrawlRate int `json:"crawl_rate"` // crawls per minute DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute
CheckRate int `json:"check_rate"` // feed checks per minute FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute
FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute
// Timing // Timing
UpdatedAt time.Time `json:"updated_at"` UpdatedAt time.Time `json:"updated_at"`
@@ -122,28 +131,15 @@ func (c *Crawler) GetDashboardStats() (*DashboardStats, error) {
func (c *Crawler) calculateStats() (*DashboardStats, error) { func (c *Crawler) calculateStats() (*DashboardStats, error) {
stats := &DashboardStats{ stats := &DashboardStats{
UpdatedAt: time.Now(), UpdatedAt: time.Now(),
HostsProcessed: c.hostsProcessed, DomainsCrawled: c.domainsCrawled,
} }
// Calculate crawl rate (crawls per minute), smoothed by +/-1 per update // Calculate rates (per minute)
elapsed := time.Since(c.startTime).Minutes() elapsed := time.Since(c.startTime).Minutes()
if elapsed > 0 { if elapsed > 0 {
actualRate := int(float64(c.hostsProcessed) / elapsed) stats.DomainCheckRate = int(float64(c.domainsChecked) / elapsed)
if actualRate > c.displayedCrawlRate { stats.FeedCrawlRate = int(float64(c.domainsCrawled) / elapsed)
c.displayedCrawlRate++ stats.FeedCheckRate = int(float64(c.feedsChecked) / elapsed)
} else if actualRate < c.displayedCrawlRate {
c.displayedCrawlRate--
}
stats.CrawlRate = c.displayedCrawlRate
// Calculate check rate (feed checks per minute), smoothed by +/-1 per update
actualCheckRate := int(float64(c.feedsChecked) / elapsed)
if actualCheckRate > c.displayedCheckRate {
c.displayedCheckRate++
} else if actualCheckRate < c.displayedCheckRate {
c.displayedCheckRate--
}
stats.CheckRate = c.displayedCheckRate
} }
// Get domain stats // Get domain stats
@@ -186,6 +182,8 @@ func (c *Crawler) collectDomainStats(stats *DashboardStats) error {
stats.PassDomains = count stats.PassDomains = count
case "skip": case "skip":
stats.SkipDomains = count stats.SkipDomains = count
case "dead":
stats.DeadDomains = count
} }
} }
if err := rows.Err(); err != nil { if err := rows.Err(); err != nil {
@@ -202,6 +200,39 @@ func (c *Crawler) collectFeedStats(stats *DashboardStats) error {
return err return err
} }
// Get status counts
statusRows, err := c.db.Query("SELECT status, COUNT(*) FROM feeds GROUP BY status")
if err != nil {
return err
}
defer statusRows.Close()
for statusRows.Next() {
var status *string
var count int
if err := statusRows.Scan(&status, &count); err != nil {
continue
}
if status != nil {
switch *status {
case "pass":
stats.AliveFeeds = count
case "skip":
stats.SkipFeeds = count
case "hold":
stats.HoldFeeds = count
case "dead":
stats.DeadFeeds = count
}
}
}
// Count feeds approved for publishing (publish_status='pass')
c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE publish_status = 'pass'").Scan(&stats.PublishFeeds)
// Count empty feeds (item_count = 0 or NULL)
c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE item_count IS NULL OR item_count = 0").Scan(&stats.EmptyFeeds)
// Single query to get all type counts (one index scan instead of three) // Single query to get all type counts (one index scan instead of three)
rows, err := c.db.Query("SELECT type, COUNT(*) FROM feeds GROUP BY type") rows, err := c.db.Query("SELECT type, COUNT(*) FROM feeds GROUP BY type")
if err != nil { if err != nil {
@@ -223,6 +254,8 @@ func (c *Crawler) collectFeedStats(stats *DashboardStats) error {
stats.RSSFeeds = count stats.RSSFeeds = count
case "atom": case "atom":
stats.AtomFeeds = count stats.AtomFeeds = count
case "json":
stats.JSONFeeds = count
default: default:
stats.UnknownFeeds += count stats.UnknownFeeds += count
} }
+4 -3
View File
@@ -1,14 +1,15 @@
services: services:
app-1440-news: app-1440-news:
build: . build: .
container_name: app-1440-news image: atproto-1440news-app
container_name: atproto-1440news-app
restart: unless-stopped restart: unless-stopped
stop_grace_period: 30s stop_grace_period: 30s
env_file: env_file:
- pds.env - pds.env
- oauth.env - oauth.env
environment: environment:
DB_HOST: atproto-postgres DB_HOST: infra-postgres
DB_PORT: 5432 DB_PORT: 5432
DB_USER: news_1440 DB_USER: news_1440
DB_PASSWORD_FILE: /run/secrets/db_password DB_PASSWORD_FILE: /run/secrets/db_password
@@ -54,7 +55,7 @@ services:
secrets: secrets:
db_password: db_password:
file: ../postgres/secrets/news_1440_password.txt file: ../../../infra/postgres/secrets/news_1440_password.txt
networks: networks:
proxy: proxy:
+126 -71
View File
@@ -14,19 +14,38 @@ import (
"github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5"
) )
// Domain represents a host to be crawled for feeds // Domain represents a host to process for feeds
// Status: hold (pending review), pass (approved), skip (not processing) // Status: hold (pending review), pass (approved), skip (not processing)
// CrawledAt: zero time = needs domain_check, +1 sec = needs feed_crawl, real time = done
type Domain struct { type Domain struct {
Host string `json:"host"` Host string `json:"host"`
Status string `json:"status"` Status string `json:"status"`
DiscoveredAt time.Time `json:"discovered_at"` CrawledAt time.Time `json:"crawled_at"`
LastCheckedAt time.Time `json:"last_checked_at,omitempty"` FeedsFound int `json:"feeds_found,omitempty"`
LastCrawledAt time.Time `json:"last_crawled_at,omitempty"` LastError string `json:"last_error,omitempty"`
FeedsFound int `json:"feeds_found,omitempty"` TLD string `json:"tld,omitempty"`
LastError string `json:"last_error,omitempty"` MissCount int `json:"miss_count,omitempty"`
TLD string `json:"tld,omitempty"`
} }
// MissCountThreshold is the number of consecutive errors before setting status to hold
const MissCountThreshold = 100
// ErrorRetryDelay is how long to wait before retrying a domain with errors (1 hour minimum)
// At 100 seconds actual rate due to queue, 100 misses = ~2.8 hours
// At 1 hour minimum delay, 100 misses = ~4+ days in practice
var ErrorRetryDelay = 1 * time.Hour
// FullHost returns the complete hostname (host + tld)
func (d *Domain) FullHost() string {
return fullHost(d.Host, d.TLD)
}
// Sentinel values for domain processing state
var (
DomainStateUnchecked = time.Time{} // 0001-01-01 00:00:00 - needs domain_check
DomainStateChecked = time.Time{}.Add(time.Second) // 0001-01-01 00:00:01 - needs feed_crawl
)
// shouldAutoSkipDomain checks if a domain should be auto-skipped based on patterns // shouldAutoSkipDomain checks if a domain should be auto-skipped based on patterns
func shouldAutoSkipDomain(host string) bool { func shouldAutoSkipDomain(host string) bool {
// Never skip our own domain // Never skip our own domain
@@ -51,62 +70,63 @@ func shouldAutoSkipDomain(host string) bool {
// saveDomain stores a domain in PostgreSQL // saveDomain stores a domain in PostgreSQL
func (c *Crawler) saveDomain(domain *Domain) error { func (c *Crawler) saveDomain(domain *Domain) error {
// Auto-skip domains matching spam patterns // Auto-skip domains matching spam patterns
fh := domain.FullHost()
status := domain.Status status := domain.Status
if shouldAutoSkipDomain(domain.Host) { if shouldAutoSkipDomain(fh) {
status = "skip" status = "skip"
} }
_, err := c.db.Exec(` _, err := c.db.Exec(`
INSERT INTO domains (host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld) INSERT INTO domains (host, status, crawled_at, feeds_found, last_error, tld)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8) VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT(host) DO UPDATE SET ON CONFLICT(host, tld) DO UPDATE SET
status = EXCLUDED.status, status = EXCLUDED.status,
last_checked_at = EXCLUDED.last_checked_at, crawled_at = EXCLUDED.crawled_at,
last_crawled_at = EXCLUDED.last_crawled_at,
feeds_found = EXCLUDED.feeds_found, feeds_found = EXCLUDED.feeds_found,
last_error = EXCLUDED.last_error, last_error = EXCLUDED.last_error
tld = EXCLUDED.tld `, stripTLD(fh), status, domain.CrawledAt,
`, domain.Host, status, domain.DiscoveredAt, NullableTime(domain.LastCheckedAt), domain.FeedsFound, NullableString(domain.LastError), domain.TLD)
NullableTime(domain.LastCrawledAt), domain.FeedsFound, NullableString(domain.LastError), domain.TLD)
return err return err
} }
// saveDomainTx stores a domain using a transaction // saveDomainTx stores a domain using a transaction
func (c *Crawler) saveDomainTx(tx pgx.Tx, domain *Domain) error { func (c *Crawler) saveDomainTx(tx pgx.Tx, domain *Domain) error {
// Auto-skip domains matching spam patterns // Auto-skip domains matching spam patterns
fh := domain.FullHost()
status := domain.Status status := domain.Status
if shouldAutoSkipDomain(domain.Host) { if shouldAutoSkipDomain(fh) {
status = "skip" status = "skip"
} }
_, err := tx.Exec(context.Background(), ` _, err := tx.Exec(context.Background(), `
INSERT INTO domains (host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld) INSERT INTO domains (host, status, crawled_at, feeds_found, last_error, tld)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8) VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT(host) DO NOTHING ON CONFLICT(host, tld) DO NOTHING
`, domain.Host, status, domain.DiscoveredAt, NullableTime(domain.LastCheckedAt), `, stripTLD(fh), status, domain.CrawledAt,
NullableTime(domain.LastCrawledAt), domain.FeedsFound, NullableString(domain.LastError), domain.TLD) domain.FeedsFound, NullableString(domain.LastError), domain.TLD)
return err return err
} }
// domainExists checks if a domain already exists in the database // domainExists checks if a domain already exists in the database
func (c *Crawler) domainExists(host string) bool { func (c *Crawler) domainExists(host string) bool {
host = normalizeHost(host)
var exists bool var exists bool
err := c.db.QueryRow("SELECT EXISTS(SELECT 1 FROM domains WHERE host = $1)", normalizeHost(host)).Scan(&exists) err := c.db.QueryRow("SELECT EXISTS(SELECT 1 FROM domains WHERE host = $1 AND tld = $2)", stripTLD(host), getTLD(host)).Scan(&exists)
return err == nil && exists return err == nil && exists
} }
// getDomain retrieves a domain from PostgreSQL // getDomain retrieves a domain from PostgreSQL
func (c *Crawler) getDomain(host string) (*Domain, error) { func (c *Crawler) getDomain(host string) (*Domain, error) {
host = normalizeHost(host)
domain := &Domain{} domain := &Domain{}
var lastCheckedAt, lastCrawledAt *time.Time
var lastError *string var lastError *string
err := c.db.QueryRow(` err := c.db.QueryRow(`
SELECT host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld SELECT host, tld, status, crawled_at, feeds_found, last_error
FROM domains WHERE host = $1 FROM domains WHERE host = $1 AND tld = $2
`, normalizeHost(host)).Scan( `, stripTLD(host), getTLD(host)).Scan(
&domain.Host, &domain.Status, &domain.DiscoveredAt, &lastCheckedAt, &lastCrawledAt, &domain.Host, &domain.TLD, &domain.Status, &domain.CrawledAt,
&domain.FeedsFound, &lastError, &domain.TLD, &domain.FeedsFound, &lastError,
) )
if err == pgx.ErrNoRows { if err == pgx.ErrNoRows {
@@ -116,21 +136,26 @@ func (c *Crawler) getDomain(host string) (*Domain, error) {
return nil, err return nil, err
} }
domain.LastCheckedAt = TimeValue(lastCheckedAt)
domain.LastCrawledAt = TimeValue(lastCrawledAt)
domain.LastError = StringValue(lastError) domain.LastError = StringValue(lastError)
return domain, nil return domain, nil
} }
// GetDomainsToCrawl returns domains ready for crawling (status='pass', not yet crawled) // GetDomainsToProcess returns domains needing processing (domain_check or feed_crawl)
func (c *Crawler) GetDomainsToCrawl(limit int) ([]*Domain, error) { // crawled_at = zero time means needs domain_check, +1 sec means needs feed_crawl
// Domains with errors are retried when crawled_at < now (scheduled by ErrorRetryDelay)
func (c *Crawler) GetDomainsToProcess(limit int) ([]*Domain, error) {
now := time.Now()
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld SELECT host, status, crawled_at, feeds_found, last_error, tld
FROM domains WHERE status = 'pass' AND last_crawled_at IS NULL FROM domains
ORDER BY discovered_at DESC WHERE status = 'pass' AND (
LIMIT $1 (crawled_at < '0001-01-02' AND last_error IS NULL) -- new domains
`, limit) OR (crawled_at < $1 AND last_error IS NOT NULL) -- retry errors after delay
)
ORDER BY crawled_at ASC
LIMIT $2
`, now, limit)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -139,23 +164,45 @@ func (c *Crawler) GetDomainsToCrawl(limit int) ([]*Domain, error) {
return c.scanDomains(rows) return c.scanDomains(rows)
} }
// markDomainChecked updates a domain after domain_check (sets to +1 sec for feed_crawl)
// host parameter should be the stripped host (without TLD)
func (c *Crawler) markDomainChecked(host, tld, lastError string) error {
if lastError != "" {
// Increment miss_count, set to 'hold' only at threshold
// Schedule retry after ErrorRetryDelay
retryAt := time.Now().Add(ErrorRetryDelay)
_, err := c.db.Exec(`
UPDATE domains SET
crawled_at = $1,
last_error = $2,
miss_count = miss_count + 1,
status = CASE WHEN miss_count + 1 >= $3 THEN 'hold' ELSE status END
WHERE host = $4 AND tld = $5
`, retryAt, lastError, MissCountThreshold, host, tld)
return err
}
// Success - reset miss_count
_, err := c.db.Exec(`
UPDATE domains SET crawled_at = $1, last_error = NULL, miss_count = 0
WHERE host = $2 AND tld = $3
`, DomainStateChecked, host, tld)
return err
}
// scanDomains is a helper to scan multiple domain rows // scanDomains is a helper to scan multiple domain rows
func (c *Crawler) scanDomains(rows pgx.Rows) ([]*Domain, error) { func (c *Crawler) scanDomains(rows pgx.Rows) ([]*Domain, error) {
var domains []*Domain var domains []*Domain
for rows.Next() { for rows.Next() {
domain := &Domain{} domain := &Domain{}
var lastCheckedAt, lastCrawledAt *time.Time
var lastError *string var lastError *string
if err := rows.Scan( if err := rows.Scan(
&domain.Host, &domain.Status, &domain.DiscoveredAt, &lastCheckedAt, &lastCrawledAt, &domain.Host, &domain.Status, &domain.CrawledAt,
&domain.FeedsFound, &lastError, &domain.TLD, &domain.FeedsFound, &lastError, &domain.TLD,
); err != nil { ); err != nil {
continue continue
} }
domain.LastCheckedAt = TimeValue(lastCheckedAt)
domain.LastCrawledAt = TimeValue(lastCrawledAt)
domain.LastError = StringValue(lastError) domain.LastError = StringValue(lastError)
domains = append(domains, domain) domains = append(domains, domain)
@@ -164,20 +211,30 @@ func (c *Crawler) scanDomains(rows pgx.Rows) ([]*Domain, error) {
return domains, rows.Err() return domains, rows.Err()
} }
// markDomainCrawled updates a domain after the crawl stage // markDomainCrawled updates a domain after feed_crawl (sets to NOW())
func (c *Crawler) markDomainCrawled(host string, feedsFound int, lastError string) error { // host parameter should be the stripped host (without TLD)
now := time.Now() func (c *Crawler) markDomainCrawled(host, tld string, feedsFound int, lastError string) error {
if lastError != "" { if lastError != "" {
// Increment miss_count, set to 'hold' only at threshold
// Schedule retry after ErrorRetryDelay
retryAt := time.Now().Add(ErrorRetryDelay)
_, err := c.db.Exec(` _, err := c.db.Exec(`
UPDATE domains SET last_crawled_at = $1, feeds_found = $2, last_error = $3 UPDATE domains SET
WHERE host = $4 crawled_at = $1,
`, now, feedsFound, lastError, normalizeHost(host)) feeds_found = $2,
last_error = $3,
miss_count = miss_count + 1,
status = CASE WHEN miss_count + 1 >= $4 THEN 'hold' ELSE status END
WHERE host = $5 AND tld = $6
`, retryAt, feedsFound, lastError, MissCountThreshold, host, tld)
return err return err
} }
// Success - reset miss_count
now := time.Now()
_, err := c.db.Exec(` _, err := c.db.Exec(`
UPDATE domains SET last_crawled_at = $1, feeds_found = $2, last_error = NULL UPDATE domains SET crawled_at = $1, feeds_found = $2, last_error = NULL, miss_count = 0
WHERE host = $3 WHERE host = $3 AND tld = $4
`, now, feedsFound, normalizeHost(host)) `, now, feedsFound, host, tld)
return err return err
} }
@@ -193,13 +250,13 @@ func (c *Crawler) GetDomainCount() (total int, hold int, err error) {
// ImportTestDomains adds a list of specific domains for testing // ImportTestDomains adds a list of specific domains for testing
func (c *Crawler) ImportTestDomains(domains []string) { func (c *Crawler) ImportTestDomains(domains []string) {
now := time.Now()
for _, host := range domains { for _, host := range domains {
host = normalizeHost(host)
_, err := c.db.Exec(` _, err := c.db.Exec(`
INSERT INTO domains (host, status, discovered_at, tld) INSERT INTO domains (host, status, tld)
VALUES ($1, 'pass', $2, $3) VALUES ($1, 'pass', $2)
ON CONFLICT(host) DO NOTHING ON CONFLICT(host, tld) DO NOTHING
`, host, now, getTLD(host)) `, stripTLD(host), getTLD(host))
if err != nil { if err != nil {
fmt.Printf("Error adding test domain %s: %v\n", host, err) fmt.Printf("Error adding test domain %s: %v\n", host, err)
} else { } else {
@@ -255,7 +312,6 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
scanner.Buffer(buf, 1024*1024) scanner.Buffer(buf, 1024*1024)
const batchSize = 100 const batchSize = 100
now := time.Now()
totalImported := 0 totalImported := 0
batchCount := 0 batchCount := 0
@@ -299,14 +355,14 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
if shouldAutoSkipDomain(d.host) { if shouldAutoSkipDomain(d.host) {
status = "skip" status = "skip"
} }
rows[i] = []interface{}{d.host, status, now, d.tld} rows[i] = []interface{}{stripTLD(d.host), status, d.tld}
} }
// Use CopyFrom for bulk insert // Use CopyFrom for bulk insert
imported, err := conn.CopyFrom( imported, err := conn.CopyFrom(
ctx, ctx,
pgx.Identifier{"domains"}, pgx.Identifier{"domains"},
[]string{"host", "status", "discovered_at", "tld"}, []string{"host", "status", "tld"},
pgx.CopyFromRows(rows), pgx.CopyFromRows(rows),
) )
conn.Release() conn.Release()
@@ -319,10 +375,10 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
status = "skip" status = "skip"
} }
c.db.Exec(` c.db.Exec(`
INSERT INTO domains (host, status, discovered_at, tld) INSERT INTO domains (host, status, tld)
VALUES ($1, $2, $3, $4) VALUES ($1, $2, $3)
ON CONFLICT(host) DO NOTHING ON CONFLICT(host, tld) DO NOTHING
`, d.host, status, now, d.tld) `, stripTLD(d.host), status, d.tld)
} }
imported = int64(len(domains)) imported = int64(len(domains))
} }
@@ -369,7 +425,6 @@ func (c *Crawler) parseAndStoreDomains(reader io.Reader, limit int) (imported in
buf := make([]byte, 0, 64*1024) buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 1024*1024) scanner.Buffer(buf, 1024*1024)
now := time.Now()
count := 0 count := 0
const batchSize = 100 const batchSize = 100
@@ -408,10 +463,10 @@ func (c *Crawler) parseAndStoreDomains(reader io.Reader, limit int) (imported in
status = "skip" status = "skip"
} }
result, err := c.db.Exec(` result, err := c.db.Exec(`
INSERT INTO domains (host, status, discovered_at, tld) INSERT INTO domains (host, status, tld)
VALUES ($1, $2, $3, $4) VALUES ($1, $2, $3)
ON CONFLICT(host) DO NOTHING ON CONFLICT(host, tld) DO NOTHING
`, d.host, status, now, d.tld) `, stripTLD(d.host), status, d.tld)
if err != nil { if err != nil {
skipped++ skipped++
} else if result > 0 { } else if result > 0 {
+25 -25
View File
@@ -101,8 +101,8 @@ type Feed struct {
// Timing // Timing
DiscoveredAt time.Time `json:"discovered_at"` DiscoveredAt time.Time `json:"discovered_at"`
LastCrawledAt time.Time `json:"last_crawled_at,omitempty"` LastCheckedAt time.Time `json:"last_checked_at,omitempty"` // feed_check: when last checked
NextCrawlAt time.Time `json:"next_crawl_at,omitempty"` NextCheckAt time.Time `json:"next_check_at,omitempty"` // feed_check: when to next check
LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated
// Cache headers for conditional requests // Cache headers for conditional requests
@@ -120,7 +120,7 @@ type Feed struct {
TLD string `json:"tld,omitempty"` TLD string `json:"tld,omitempty"`
// Content stats // Content stats
ItemCount int `json:"item_count,omitempty"` // Number of items in last crawl ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check
OldestItemDate time.Time `json:"oldest_item_date,omitempty"` OldestItemDate time.Time `json:"oldest_item_date,omitempty"`
NewestItemDate time.Time `json:"newest_item_date,omitempty"` NewestItemDate time.Time `json:"newest_item_date,omitempty"`
@@ -153,7 +153,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
_, err := c.db.Exec(` _, err := c.db.Exec(`
INSERT INTO feeds ( INSERT INTO feeds (
url, type, category, title, description, language, site_url, url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
@@ -168,8 +168,8 @@ func (c *Crawler) saveFeed(feed *Feed) error {
description = EXCLUDED.description, description = EXCLUDED.description,
language = EXCLUDED.language, language = EXCLUDED.language,
site_url = EXCLUDED.site_url, site_url = EXCLUDED.site_url,
last_crawled_at = EXCLUDED.last_crawled_at, last_checked_at = EXCLUDED.last_checked_at,
next_crawl_at = EXCLUDED.next_crawl_at, next_check_at = EXCLUDED.next_check_at,
last_build_date = EXCLUDED.last_build_date, last_build_date = EXCLUDED.last_build_date,
etag = EXCLUDED.etag, etag = EXCLUDED.etag,
last_modified = EXCLUDED.last_modified, last_modified = EXCLUDED.last_modified,
@@ -185,7 +185,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
`, `,
feed.URL, feed.Type, feed.Category, NullableString(feed.Title), NullableString(feed.Description), feed.URL, feed.Type, feed.Category, NullableString(feed.Title), NullableString(feed.Description),
NullableString(feed.Language), NullableString(feed.SiteURL), NullableString(feed.Language), NullableString(feed.SiteURL),
feed.DiscoveredAt, NullableTime(feed.LastCrawledAt), NullableTime(feed.NextCrawlAt), NullableTime(feed.LastBuildDate), feed.DiscoveredAt, NullableTime(feed.LastCheckedAt), NullableTime(feed.NextCheckAt), NullableTime(feed.LastBuildDate),
NullableString(feed.ETag), NullableString(feed.LastModified), NullableString(feed.ETag), NullableString(feed.LastModified),
feed.Status, NullableString(feed.LastError), NullableTime(feed.LastErrorAt), feed.Status, NullableString(feed.LastError), NullableTime(feed.LastErrorAt),
NullableString(feed.SourceURL), NullableString(feed.SourceHost), NullableString(feed.TLD), NullableString(feed.SourceURL), NullableString(feed.SourceHost), NullableString(feed.TLD),
@@ -200,14 +200,14 @@ func (c *Crawler) saveFeed(feed *Feed) error {
func (c *Crawler) getFeed(feedURL string) (*Feed, error) { func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
feed := &Feed{} feed := &Feed{}
var category, title, description, language, siteURL *string var category, title, description, language, siteURL *string
var lastCrawledAt, nextCrawlAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string var etag, lastModified, lastError, sourceURL, sourceHost, tld *string
var publishStatus, publishAccount *string var publishStatus, publishAccount *string
var itemCount, noUpdate *int var itemCount, noUpdate *int
err := c.db.QueryRow(` err := c.db.QueryRow(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
@@ -217,7 +217,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
FROM feeds WHERE url = $1 FROM feeds WHERE url = $1
`, normalizeURL(feedURL)).Scan( `, normalizeURL(feedURL)).Scan(
&feed.URL, &feed.Type, &category, &title, &description, &language, &siteURL, &feed.URL, &feed.Type, &category, &title, &description, &language, &siteURL,
&feed.DiscoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate, &feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&etag, &lastModified, &etag, &lastModified,
&feed.Status, &lastError, &lastErrorAt, &feed.Status, &lastError, &lastErrorAt,
&sourceURL, &sourceHost, &tld, &sourceURL, &sourceHost, &tld,
@@ -243,8 +243,8 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
feed.Description = StringValue(description) feed.Description = StringValue(description)
feed.Language = StringValue(language) feed.Language = StringValue(language)
feed.SiteURL = StringValue(siteURL) feed.SiteURL = StringValue(siteURL)
feed.LastCrawledAt = TimeValue(lastCrawledAt) feed.LastCheckedAt = TimeValue(lastCheckedAt)
feed.NextCrawlAt = TimeValue(nextCrawlAt) feed.NextCheckAt = TimeValue(nextCheckAt)
feed.LastBuildDate = TimeValue(lastBuildDate) feed.LastBuildDate = TimeValue(lastBuildDate)
feed.ETag = StringValue(etag) feed.ETag = StringValue(etag)
feed.LastModified = StringValue(lastModified) feed.LastModified = StringValue(lastModified)
@@ -282,7 +282,7 @@ func (c *Crawler) feedExists(feedURL string) bool {
func (c *Crawler) GetAllFeeds() ([]*Feed, error) { func (c *Crawler) GetAllFeeds() ([]*Feed, error) {
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
@@ -313,11 +313,11 @@ func (c *Crawler) GetFeedCountByHost(host string) (int, error) {
return count, err return count, err
} }
// GetFeedsDueForCheck returns feeds where next_crawl_at <= now, ordered by no_update desc (prioritize infrequent feeds) // GetFeedsDueForCheck returns feeds for feed_check, ordered by last_checked_at ASC (oldest first)
func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) { func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
@@ -325,8 +325,8 @@ func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
no_update, no_update,
publish_status, publish_account publish_status, publish_account
FROM feeds FROM feeds
WHERE next_crawl_at <= NOW() AND status = 'pass' WHERE last_checked_at > '0001-01-01 00:00:00' AND status = 'pass'
ORDER BY no_update DESC ORDER BY last_checked_at ASC
LIMIT $1 LIMIT $1
`, limit) `, limit)
if err != nil { if err != nil {
@@ -341,7 +341,7 @@ func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
func (c *Crawler) GetFeedsByHost(host string) ([]*Feed, error) { func (c *Crawler) GetFeedsByHost(host string) ([]*Feed, error) {
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
@@ -363,7 +363,7 @@ func (c *Crawler) SearchFeeds(query string) ([]*Feed, error) {
tsquery := ToSearchQuery(query) tsquery := ToSearchQuery(query)
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
@@ -389,7 +389,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
for rows.Next() { for rows.Next() {
feed := &Feed{} feed := &Feed{}
var feedType, category, title, description, language, siteURL *string var feedType, category, title, description, language, siteURL *string
var lastCrawledAt, nextCrawlAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string var etag, lastModified, lastError, sourceURL, sourceHost, tld *string
var itemCount, noUpdate *int var itemCount, noUpdate *int
var status *string var status *string
@@ -397,7 +397,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
if err := rows.Scan( if err := rows.Scan(
&feed.URL, &feedType, &category, &title, &description, &language, &siteURL, &feed.URL, &feedType, &category, &title, &description, &language, &siteURL,
&feed.DiscoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate, &feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&etag, &lastModified, &etag, &lastModified,
&status, &lastError, &lastErrorAt, &status, &lastError, &lastErrorAt,
&sourceURL, &sourceHost, &tld, &sourceURL, &sourceHost, &tld,
@@ -419,8 +419,8 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
feed.Description = StringValue(description) feed.Description = StringValue(description)
feed.Language = StringValue(language) feed.Language = StringValue(language)
feed.SiteURL = StringValue(siteURL) feed.SiteURL = StringValue(siteURL)
feed.LastCrawledAt = TimeValue(lastCrawledAt) feed.LastCheckedAt = TimeValue(lastCheckedAt)
feed.NextCrawlAt = TimeValue(nextCrawlAt) feed.NextCheckAt = TimeValue(nextCheckAt)
feed.LastBuildDate = TimeValue(lastBuildDate) feed.LastBuildDate = TimeValue(lastBuildDate)
feed.ETag = StringValue(etag) feed.ETag = StringValue(etag)
feed.LastModified = StringValue(lastModified) feed.LastModified = StringValue(lastModified)
@@ -471,7 +471,7 @@ func (c *Crawler) SetPublishStatus(feedURL, status, account string) error {
func (c *Crawler) GetFeedsByPublishStatus(status string) ([]*Feed, error) { func (c *Crawler) GetFeedsByPublishStatus(status string) ([]*Feed, error) {
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
@@ -493,7 +493,7 @@ func (c *Crawler) GetFeedsByPublishStatus(status string) ([]*Feed, error) {
func (c *Crawler) GetPublishCandidates(limit int) ([]*Feed, error) { func (c *Crawler) GetPublishCandidates(limit int) ([]*Feed, error) {
rows, err := c.db.Query(` rows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url, SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date, discovered_at, last_checked_at, next_check_at, last_build_date,
etag, last_modified, etag, last_modified,
status, last_error, last_error_at, status, last_error, last_error_at,
source_url, source_host, tld, source_url, source_host, tld,
+11 -11
View File
@@ -31,7 +31,7 @@ func (c *Crawler) processFeed(feedURL, sourceHost, body string, headers http.Hea
Type: feedType, Type: feedType,
Category: classifyFeed(feedURL), Category: classifyFeed(feedURL),
DiscoveredAt: now, DiscoveredAt: now,
LastCrawledAt: now, LastCheckedAt: now,
Status: "pass", Status: "pass",
SourceHost: sourceHost, SourceHost: sourceHost,
TLD: getTLD(sourceHost), TLD: getTLD(sourceHost),
@@ -53,8 +53,8 @@ func (c *Crawler) processFeed(feedURL, sourceHost, body string, headers http.Hea
// Refine category based on parsed title (e.g., "Comments on:") // Refine category based on parsed title (e.g., "Comments on:")
feed.Category = classifyFeedByTitle(feed.Title, feed.Category) feed.Category = classifyFeedByTitle(feed.Title, feed.Category)
// Calculate next crawl time // Calculate next feed_check time
feed.NextCrawlAt = c.calculateNextCrawl(feed) feed.NextCheckAt = c.calculateNextCheck(feed)
if err := c.saveFeed(feed); err != nil { if err := c.saveFeed(feed); err != nil {
return return
@@ -92,7 +92,7 @@ func (c *Crawler) addFeed(feedURL, feedType, sourceHost, sourceURL string) {
SourceURL: normalizeURL(sourceURL), SourceURL: normalizeURL(sourceURL),
SourceHost: sourceHost, SourceHost: sourceHost,
TLD: getTLD(sourceHost), TLD: getTLD(sourceHost),
NextCrawlAt: now, // Should be crawled immediately NextCheckAt: now, // Should be crawled immediately
} }
if err := c.saveFeed(feed); err != nil { if err := c.saveFeed(feed); err != nil {
@@ -148,9 +148,9 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
err = fmt.Errorf("all URL variants failed") err = fmt.Errorf("all URL variants failed")
} }
now := time.Now() now := time.Now()
feed.LastCrawledAt = now feed.LastCheckedAt = now
feed.NoUpdate++ feed.NoUpdate++
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
feed.LastError = err.Error() feed.LastError = err.Error()
feed.LastErrorAt = now feed.LastErrorAt = now
feed.Status = "hold" feed.Status = "hold"
@@ -165,13 +165,13 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
defer resp.Body.Close() defer resp.Body.Close()
now := time.Now() now := time.Now()
feed.LastCrawledAt = now feed.LastCheckedAt = now
// 304 Not Modified - feed hasn't changed // 304 Not Modified - feed hasn't changed
if resp.StatusCode == http.StatusNotModified { if resp.StatusCode == http.StatusNotModified {
feed.NoUpdate++ feed.NoUpdate++
// Adaptive backoff: 100s base + 100s per consecutive no-change // Adaptive backoff: 100s base + 100s per consecutive no-change
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
feed.LastError = "" feed.LastError = ""
feed.Status = "pass" feed.Status = "pass"
// Auto-hold feeds after 1000 consecutive no-changes // Auto-hold feeds after 1000 consecutive no-changes
@@ -186,7 +186,7 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
// Non-200 response // Non-200 response
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
feed.NoUpdate++ feed.NoUpdate++
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
feed.LastError = resp.Status feed.LastError = resp.Status
feed.LastErrorAt = now feed.LastErrorAt = now
feed.Status = "hold" feed.Status = "hold"
@@ -203,7 +203,7 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
bodyBytes, err := io.ReadAll(resp.Body) bodyBytes, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
feed.NoUpdate++ feed.NoUpdate++
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
feed.LastError = err.Error() feed.LastError = err.Error()
feed.LastErrorAt = now feed.LastErrorAt = now
feed.Status = "hold" feed.Status = "hold"
@@ -238,7 +238,7 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
// Content changed - reset backoff // Content changed - reset backoff
feed.NoUpdate = 0 feed.NoUpdate = 0
feed.NextCrawlAt = now.Add(100 * time.Second) feed.NextCheckAt = now.Add(100 * time.Second)
feed.LastError = "" feed.LastError = ""
feed.Status = "pass" feed.Status = "pass"
c.saveFeed(feed) c.saveFeed(feed)
+2 -2
View File
@@ -386,8 +386,8 @@ func parseRSSDate(s string) (time.Time, error) {
return time.Time{}, fmt.Errorf("unable to parse date: %s", s) return time.Time{}, fmt.Errorf("unable to parse date: %s", s)
} }
// calculateNextCrawl determines when to next crawl this feed // calculateNextCheck determines when to next check this feed (feed_check)
func (c *Crawler) calculateNextCrawl(feed *Feed) time.Time { func (c *Crawler) calculateNextCheck(feed *Feed) time.Time {
// Adaptive backoff: 100s base + 100s per consecutive no-change // Adaptive backoff: 100s base + 100s per consecutive no-change
return time.Now().Add(time.Duration(100+100*feed.NoUpdate) * time.Second) return time.Now().Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
} }
+3
View File
@@ -109,6 +109,9 @@ func (c *Crawler) StartDashboard(addr string) error {
http.HandleFunc("/api/tlds", withAuth(func(w http.ResponseWriter, r *http.Request) { http.HandleFunc("/api/tlds", withAuth(func(w http.ResponseWriter, r *http.Request) {
c.handleAPITLDs(w, r) c.handleAPITLDs(w, r)
})) }))
http.HandleFunc("/api/searchStats", withAuth(func(w http.ResponseWriter, r *http.Request) {
c.handleAPISearchStats(w, r)
}))
http.HandleFunc("/api/tldDomains", withAuth(func(w http.ResponseWriter, r *http.Request) { http.HandleFunc("/api/tldDomains", withAuth(func(w http.ResponseWriter, r *http.Request) {
c.handleAPITLDDomains(w, r) c.handleAPITLDDomains(w, r)
})) }))
-42
View File
@@ -1,42 +0,0 @@
#!/bin/bash
# Deploy script - increments version, commits, pushes, and relaunches container
# Usage: ./scripts/deploy.sh [optional commit message]
set -e
cd "$(dirname "$0")/.."
# Extract current version number from templates.go
CURRENT=$(grep -o '>v[0-9]*<' templates.go | grep -o '[0-9]*' | head -1)
if [ -z "$CURRENT" ]; then
echo "Could not find version number in templates.go"
exit 1
fi
# Increment version
NEW=$((CURRENT + 1))
# Update templates.go
sed -i '' "s/>v${CURRENT}</>v${NEW}</" templates.go
echo "Version: v${CURRENT} -> v${NEW}"
# Build commit message
if [ -n "$1" ]; then
COMMIT_MSG="v${NEW}: $1"
else
COMMIT_MSG="v${NEW}"
fi
# Commit and push
git add -A
git commit -m "$COMMIT_MSG"
git push
echo "Committed: $COMMIT_MSG"
# Rebuild and relaunch
docker compose up -d --build
echo "Deployed v${NEW}"
+451 -65
View File
@@ -14,6 +14,142 @@ function initDashboard() {
let infiniteScrollState = null; let infiniteScrollState = null;
let isLoadingMore = false; let isLoadingMore = false;
let searchQuery = ''; let searchQuery = '';
let domainFilter = 'all'; // all, pass, skip, hold, dead
// Feed filter: multi-select with ALL as exclusion toggle
// When allSelected=true, selected items are EXCLUDED; when false, selected items are INCLUDED
let feedFilter = { allSelected: false, statuses: [], types: [] };
let currentOpenTLD = null; // Track which TLD is currently open
// Smart sticky header - scroll normally, show fixed on scroll up
let lastScrollY = 0;
const topSection = document.getElementById('topSection');
const spacer = document.getElementById('topSectionSpacer');
let headerHeight = topSection.offsetHeight;
let isFixed = false;
window.addEventListener('scroll', () => {
const currentScrollY = window.scrollY;
// If at top, return to normal flow
if (currentScrollY <= 0) {
topSection.classList.remove('fixed', 'hidden');
spacer.classList.remove('active');
isFixed = false;
lastScrollY = currentScrollY;
return;
}
// Only activate fixed mode after scrolling past the header
if (currentScrollY > headerHeight) {
if (currentScrollY < lastScrollY) {
// Scrolling up - show fixed header
if (!isFixed) {
spacer.style.height = headerHeight + 'px';
spacer.classList.add('active');
topSection.classList.add('fixed');
// Start hidden, then show
topSection.classList.add('hidden');
requestAnimationFrame(() => {
topSection.classList.remove('hidden');
});
isFixed = true;
} else {
topSection.classList.remove('hidden');
}
} else if (currentScrollY > lastScrollY && isFixed) {
// Scrolling down while fixed - hide it
topSection.classList.add('hidden');
}
}
lastScrollY = currentScrollY;
}, { passive: true });
// Stat card click handler
document.addEventListener('click', (e) => {
const card = e.target.closest('.card.clickable');
if (!card) return;
const filterType = card.dataset.filter;
const status = card.dataset.status;
const type = card.dataset.type;
if (filterType === 'domain') {
// Remove active from domain cards only
document.querySelectorAll('.card.clickable[data-filter="domain"]').forEach(c => c.classList.remove('active'));
card.classList.add('active');
domainFilter = status || 'all';
// Update placeholder
const searchInput = document.getElementById('searchInput');
searchInput.placeholder = domainFilter === 'all' ? 'Search domains...' : `Showing ${domainFilter} domains...`;
// Reload TLD list with new filter
loadFeeds(searchQuery);
} else if (filterType === 'feed') {
const wasActive = card.classList.contains('active');
if (status === 'all') {
// ALL card toggles exclusion mode
if (wasActive) {
card.classList.remove('active');
feedFilter.allSelected = false;
} else {
card.classList.add('active');
feedFilter.allSelected = true;
}
} else if (status) {
// Status card (pass, skip, hold, dead) - multi-select
if (wasActive) {
card.classList.remove('active');
feedFilter.statuses = feedFilter.statuses.filter(s => s !== status);
} else {
card.classList.add('active');
feedFilter.statuses.push(status);
}
} else if (type) {
// Type card (rss, atom, json, unknown, empty) - multi-select
if (wasActive) {
card.classList.remove('active');
feedFilter.types = feedFilter.types.filter(t => t !== type);
} else {
card.classList.add('active');
feedFilter.types.push(type);
}
}
// Reload TLD list with feed filter
loadFeeds(searchQuery);
}
});
// Refresh only expanded TLD sections with new domain filter
function refreshExpandedTLDs() {
const expandedContainer = document.getElementById('expandedTLDContent');
if (expandedContainer && expandedContainer.style.display !== 'none' && expandedContainer.dataset.tld) {
// Mark as needing reload and reload
expandedContainer.dataset.loaded = 'false';
loadTLDDomains(expandedContainer, searchQuery);
}
}
// Apply feed filter to currently visible feeds
function applyFeedFilter() {
document.querySelectorAll('.inline-feed-block').forEach(block => {
const feedStatus = block.dataset.status || 'hold';
const feedType = block.dataset.type || 'unknown';
let show = true;
if (feedFilter.status !== 'all' && feedStatus !== feedFilter.status) {
show = false;
}
if (feedFilter.type && feedType !== feedFilter.type) {
show = false;
}
block.style.display = show ? 'block' : 'none';
});
}
// Event delegation for domain-spacer clicks (toggle feeds) // Event delegation for domain-spacer clicks (toggle feeds)
document.addEventListener('click', (e) => { document.addEventListener('click', (e) => {
@@ -96,8 +232,8 @@ function initDashboard() {
['Oldest Item', f.oldestItemDate], ['Oldest Item', f.oldestItemDate],
['Newest Item', f.newestItemDate], ['Newest Item', f.newestItemDate],
['Discovered', f.discoveredAt], ['Discovered', f.discoveredAt],
['Last Crawled', f.lastCrawledAt], ['Last Checked', f.lastCheckedAt],
['Next Crawl', f.nextCrawlAt], ['Next Check', f.nextCheckAt],
['Publish Status', f.publishStatus], ['Publish Status', f.publishStatus],
['Publish Account', f.publishAccount], ['Publish Account', f.publishAccount],
]; ];
@@ -122,7 +258,8 @@ function initDashboard() {
const items = await resp.json(); const items = await resp.json();
if (!items || items.length === 0) { if (!items || items.length === 0) {
itemsDiv.innerHTML = '<span style="color: #666;">No items</span>'; // Just clear the items area, keep the feed visible
itemsDiv.innerHTML = '';
return; return;
} }
@@ -173,7 +310,6 @@ function initDashboard() {
function renderTLDHeader(tld) { function renderTLDHeader(tld) {
return `<div class="tld-section" data-tld="${escapeHtml(tld)}"> return `<div class="tld-section" data-tld="${escapeHtml(tld)}">
<div class="tld-header" style="display: flex; align-items: center; padding: 10px; background: #1a1a1a; border-bottom: 1px solid #333; cursor: pointer; user-select: none;"> <div class="tld-header" style="display: flex; align-items: center; padding: 10px; background: #1a1a1a; border-bottom: 1px solid #333; cursor: pointer; user-select: none;">
<span class="tld-toggle" style="color: #666; margin-right: 10px;">▼</span>
<span style="color: #0af; font-weight: bold; font-size: 1.1em;">.${escapeHtml(tld)}</span> <span style="color: #0af; font-weight: bold; font-size: 1.1em;">.${escapeHtml(tld)}</span>
</div> </div>
<div class="tld-content" style="display: block;">`; <div class="tld-content" style="display: block;">`;
@@ -192,45 +328,163 @@ function initDashboard() {
} }
} }
// Event delegation for TLD header/footer clicks (toggle section) // Event delegation for TLD clicks (toggle section)
document.addEventListener('click', (e) => { document.addEventListener('click', (e) => {
const tldHeader = e.target.closest('.tld-header'); const tldHeader = e.target.closest('.tld-header');
const tldFooter = e.target.closest('.tld-footer'); const tldFooter = e.target.closest('.tld-footer');
const expandedContainer = document.getElementById('expandedTLDContent');
// Handle clicks in expanded container header
if (tldHeader && tldHeader.closest('#expandedTLDContent')) {
// Close the expanded content
const currentSection = document.querySelector('.tld-section.expanded');
if (currentSection) {
currentSection.classList.remove('expanded');
}
expandedContainer.style.display = 'none';
expandedContainer.innerHTML = '';
currentOpenTLD = null;
// Show TLD list again
const domainList = document.querySelector('.domain-list');
if (domainList) domainList.style.display = '';
updateStats(); // Revert to search or all stats
return;
}
// Handle clicks on TLD cards
if (tldHeader || tldFooter) { if (tldHeader || tldFooter) {
const section = (tldHeader || tldFooter).closest('.tld-section'); const section = (tldHeader || tldFooter).closest('.tld-section');
if (section) { if (section) {
const content = section.querySelector('.tld-content'); const tld = section.dataset.tld;
const toggle = section.querySelector('.tld-toggle'); const isExpanded = section.classList.contains('expanded');
if (content) {
const isVisible = content.style.display !== 'none';
content.style.display = isVisible ? 'none' : 'block';
if (toggle) toggle.textContent = isVisible ? '▶' : '▼';
if (isVisible) { if (isExpanded) {
// Closing - scroll to next TLD section // Closing this TLD
const nextSection = section.nextElementSibling; section.classList.remove('expanded');
if (nextSection && nextSection.classList.contains('tld-section')) { expandedContainer.style.display = 'none';
nextSection.scrollIntoView({ behavior: 'smooth', block: 'start' }); expandedContainer.innerHTML = '';
} currentOpenTLD = null;
} else { // Show TLD list again
// Opening - load domains if not already loaded const domainList = document.querySelector('.domain-list');
if (section.dataset.loaded === 'false') { if (domainList) domainList.style.display = '';
loadTLDDomains(section, searchQuery); updateStats(); // Revert to search or all stats
} } else {
} // Close any other open TLD first
document.querySelectorAll('.tld-section.expanded').forEach(s => {
s.classList.remove('expanded');
});
// Opening this TLD
section.classList.add('expanded');
currentOpenTLD = tld;
// Hide TLD list
const domainList = document.querySelector('.domain-list');
if (domainList) domainList.style.display = 'none';
// Show TLD stats (filtered by search if active)
const currentSearch = document.getElementById('searchInput').value.trim();
updateStatsForTLD(tld, currentSearch);
// Set up expanded container with header
expandedContainer.innerHTML = `
<div class="tld-header">
<span class="tld-name">.${escapeHtml(tld)}</span>
</div>
<div class="tld-content">
<div class="tld-loading" style="padding: 10px; color: #666;">Loading...</div>
</div>
`;
expandedContainer.style.display = 'block';
expandedContainer.dataset.tld = tld;
expandedContainer.dataset.loaded = 'false';
// Load domains
loadTLDDomains(expandedContainer, searchQuery);
// Scroll to expanded container
expandedContainer.scrollIntoView({ behavior: 'smooth', block: 'start' });
} }
} }
} }
}); });
// Update stats for a specific TLD (optionally filtered by search)
async function updateStatsForTLD(tld, search = '') {
try {
let url = `/api/tldStats?tld=${encodeURIComponent(tld)}`;
if (search) {
url += `&search=${encodeURIComponent(search)}`;
}
const resp = await fetch(url);
if (!resp.ok) return;
const stats = await resp.json();
document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains || 0);
document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains || 0);
document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains || 0);
document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains || 0);
document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains || 0);
document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds || 0);
document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds || 0);
document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds || 0);
document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds || 0);
document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds || 0);
document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds || 0);
document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds || 0);
document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds || 0);
document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds || 0);
document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds || 0);
document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds || 0);
document.getElementById('updatedAt').textContent = search ? `Search "${search}" in .${tld}` : `Stats for .${tld}`;
} catch (err) {
console.error('TLD stats update failed:', err);
}
}
// Update stats for search results
async function updateStatsForSearch(query) {
try {
const resp = await fetch(`/api/searchStats?search=${encodeURIComponent(query)}`);
if (!resp.ok) {
console.error('Search stats failed:', resp.status);
return;
}
const stats = await resp.json();
document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains || 0);
document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains || 0);
document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains || 0);
document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains || 0);
document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains || 0);
document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds || 0);
document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds || 0);
document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds || 0);
document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds || 0);
document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds || 0);
document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds || 0);
document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds || 0);
document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds || 0);
document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds || 0);
document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds || 0);
document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds || 0);
document.getElementById('updatedAt').textContent = `Search: "${query}"`;
} catch (err) {
console.error('Search stats update failed:', err);
}
}
// Render domain row with feeds // Render domain row with feeds
function renderDomainRow(d) { function renderDomainRow(d) {
const status = d.status || 'hold'; const status = d.status || 'hold';
let html = `<div class="domain-block" data-host="${escapeHtml(d.host)}" data-status="${status}">`; const fullDomain = d.tld ? d.host + '.' + d.tld : d.host;
let html = `<div class="domain-block" data-host="${escapeHtml(fullDomain)}" data-status="${status}">`;
html += `<div class="domain-row" style="display: flex; align-items: center; padding: 8px 10px; border-bottom: 1px solid #202020;">`; html += `<div class="domain-row" style="display: flex; align-items: center; padding: 8px 10px; border-bottom: 1px solid #202020;">`;
html += renderStatusBtns(status, 'domain', d.host); html += renderStatusBtns(status, 'domain', fullDomain);
html += `<a class="domain-name" href="https://${escapeHtml(d.host)}" target="_blank" style="color: #0af; text-decoration: none;">${escapeHtml(d.host)}</a>`; html += `<a class="domain-name" href="https://${escapeHtml(fullDomain)}" target="_blank" style="color: #0af; text-decoration: none;">${escapeHtml(fullDomain)}</a>`;
if (d.last_error) { if (d.last_error) {
html += `<span class="domain-spacer" style="color: #f66; margin-left: 10px; flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; cursor: pointer;" title="${escapeHtml(d.last_error)}">${escapeHtml(d.last_error)}</span>`; html += `<span class="domain-spacer" style="color: #f66; margin-left: 10px; flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; cursor: pointer;" title="${escapeHtml(d.last_error)}">${escapeHtml(d.last_error)}</span>`;
@@ -244,7 +498,8 @@ function initDashboard() {
html += '<div class="domain-feeds" style="display: block; margin-left: 10px; border-left: 2px solid #333; padding-left: 6px;">'; html += '<div class="domain-feeds" style="display: block; margin-left: 10px; border-left: 2px solid #333; padding-left: 6px;">';
d.feeds.forEach(f => { d.feeds.forEach(f => {
const feedStatus = f.publish_status || 'hold'; const feedStatus = f.publish_status || 'hold';
html += `<div class="inline-feed-block" data-url="${escapeHtml(f.url)}" data-status="${feedStatus}">`; const feedType = f.type || 'unknown';
html += `<div class="inline-feed-block" data-url="${escapeHtml(f.url)}" data-status="${feedStatus}" data-type="${feedType}">`;
html += `<div class="feed-row" style="display: flex; align-items: center; padding: 4px 0;">`; html += `<div class="feed-row" style="display: flex; align-items: center; padding: 4px 0;">`;
html += `<span style="width: 48px; flex-shrink: 0; white-space: nowrap; margin-right: 6px; color: #666; text-align: center;">${escapeHtml(f.language || '')}&nbsp;</span>`; html += `<span style="width: 48px; flex-shrink: 0; white-space: nowrap; margin-right: 6px; color: #666; text-align: center;">${escapeHtml(f.language || '')}&nbsp;</span>`;
@@ -341,7 +596,7 @@ function initDashboard() {
async function loadFeeds(query = '') { async function loadFeeds(query = '') {
const output = document.getElementById('output'); const output = document.getElementById('output');
output.innerHTML = '<div class="domain-list"></div><div id="infiniteLoader" style="text-align: center; padding: 10px; color: #666;">Loading TLDs...</div>'; output.innerHTML = '<div class="domain-list"></div><div id="expandedTLDContent" style="display: none;"></div><div id="infiniteLoader" style="text-align: center; padding: 10px; color: #666;">Loading TLDs...</div>';
// Disconnect previous observer if any // Disconnect previous observer if any
if (tldObserver) { if (tldObserver) {
@@ -349,26 +604,59 @@ function initDashboard() {
} }
try { try {
// Fetch all TLDs first // Fetch TLDs with optional domain status filter, feed filter, and search
const tldsResp = await fetch('/api/tlds?has_feeds=true'); let tldsUrl = '/api/tlds';
const params = [];
if (domainFilter !== 'all') {
params.push(`status=${domainFilter}`);
}
// Add feed filter params if any are selected
if (feedFilter.allSelected || feedFilter.statuses.length > 0 || feedFilter.types.length > 0) {
if (feedFilter.allSelected) {
params.push('feedMode=exclude');
} else {
params.push('feedMode=include');
}
if (feedFilter.statuses.length > 0) {
params.push(`feedStatuses=${feedFilter.statuses.join(',')}`);
}
if (feedFilter.types.length > 0) {
params.push(`feedTypes=${feedFilter.types.join(',')}`);
}
}
if (query) {
params.push(`search=${encodeURIComponent(query)}`);
}
if (params.length > 0) {
tldsUrl += '?' + params.join('&');
}
const tldsResp = await fetch(tldsUrl);
if (!tldsResp.ok) {
const errText = await tldsResp.text();
throw new Error(`HTTP ${tldsResp.status}: ${errText}`);
}
const tlds = await tldsResp.json(); const tlds = await tldsResp.json();
if (!tlds || tlds.length === 0) { if (!tlds || tlds.length === 0) {
document.getElementById('infiniteLoader').textContent = 'No feeds found'; // Update stats for empty results
if (query) {
await updateStatsForSearch(query);
} else {
await updateStats();
}
document.getElementById('infiniteLoader').textContent = query ? 'No matches found' : 'No feeds found';
return; return;
} }
const container = output.querySelector('.domain-list'); const container = output.querySelector('.domain-list');
// Render all TLD sections as collapsed placeholders // Render all TLD sections as card placeholders
tlds.forEach(t => { tlds.forEach(t => {
const tld = t.tld || 'unknown'; const tld = t.tld || 'unknown';
container.insertAdjacentHTML('beforeend', ` container.insertAdjacentHTML('beforeend', `
<div class="tld-section" data-tld="${escapeHtml(tld)}" data-loaded="false"> <div class="tld-section" data-tld="${escapeHtml(tld)}" data-loaded="false">
<div class="tld-header" style="display: flex; align-items: center; padding: 10px; background: #1a1a1a; border-bottom: 1px solid #333; cursor: pointer; user-select: none;"> <div class="tld-header">
<span class="tld-toggle" style="color: #666; margin-right: 10px;">▶</span> <span class="tld-name">.${escapeHtml(tld)}</span>
<span style="color: #0af; font-weight: bold; font-size: 1.1em;">.${escapeHtml(tld)}</span>
<span style="color: #666; margin-left: 10px; font-size: 0.9em;">(${t.domain_count} domains)</span>
</div> </div>
<div class="tld-content" style="display: none;"> <div class="tld-content" style="display: none;">
<div class="tld-loading" style="padding: 10px; color: #666;">Loading...</div> <div class="tld-loading" style="padding: 10px; color: #666;">Loading...</div>
@@ -377,25 +665,49 @@ function initDashboard() {
`); `);
}); });
document.getElementById('infiniteLoader').textContent = `${tlds.length} TLDs loaded`; document.getElementById('infiniteLoader').textContent = '';
// Set up IntersectionObserver for lazy loading (loads even when collapsed) // Auto-expand if single TLD match, otherwise update stats for search/all
tldObserver = new IntersectionObserver((entries) => { if (tlds.length === 1) {
entries.forEach(entry => { const tld = tlds[0].tld;
if (entry.isIntersecting) { const expandedContainer = document.getElementById('expandedTLDContent');
const section = entry.target; const section = output.querySelector('.tld-section');
if (section.dataset.loaded === 'false') {
loadTLDDomains(section, query);
tldObserver.unobserve(section);
}
}
});
}, { rootMargin: '500px' });
// Observe all TLD sections if (section && expandedContainer) {
container.querySelectorAll('.tld-section').forEach(section => { // Mark as expanded
tldObserver.observe(section); section.classList.add('expanded');
}); currentOpenTLD = tld;
// Hide TLD list
const domainList = document.querySelector('.domain-list');
if (domainList) domainList.style.display = 'none';
// Set up expanded container
expandedContainer.innerHTML = `
<div class="tld-header">
<span class="tld-name">.${escapeHtml(tld)}</span>
</div>
<div class="tld-content">
<div class="tld-loading" style="padding: 10px; color: #666;">Loading...</div>
</div>
`;
expandedContainer.style.display = 'block';
expandedContainer.dataset.tld = tld;
expandedContainer.dataset.loaded = 'false';
// Load domains
loadTLDDomains(expandedContainer, query);
// Show TLD stats (filtered by search if active)
await updateStatsForTLD(tld, query);
}
} else {
// Multiple TLDs - show search or global stats
if (query) {
await updateStatsForSearch(query);
} else {
await updateStats();
}
}
} catch (err) { } catch (err) {
document.getElementById('infiniteLoader').textContent = 'Error: ' + err.message; document.getElementById('infiniteLoader').textContent = 'Error: ' + err.message;
@@ -408,12 +720,30 @@ function initDashboard() {
section.dataset.loaded = 'loading'; section.dataset.loaded = 'loading';
try { try {
let url = `/api/domains?has_feeds=true&tld=${encodeURIComponent(tld)}&limit=500`; let url = `/api/domains?tld=${encodeURIComponent(tld)}&limit=500`;
if (domainFilter !== 'all') {
url += `&status=${domainFilter}`;
}
if (query) { if (query) {
url += `&search=${encodeURIComponent(query)}`; url += `&search=${encodeURIComponent(query)}`;
} }
// Apply feed filter if any feed cards are selected
if (feedFilter.allSelected || feedFilter.statuses.length > 0 || feedFilter.types.length > 0) {
if (feedFilter.allSelected) {
url += '&feedMode=exclude';
} else {
url += '&feedMode=include';
}
if (feedFilter.statuses.length > 0) {
url += `&feedStatuses=${feedFilter.statuses.join(',')}`;
}
if (feedFilter.types.length > 0) {
url += `&feedTypes=${feedFilter.types.join(',')}`;
}
}
const resp = await fetch(url); const resp = await fetch(url);
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
const domains = await resp.json(); const domains = await resp.json();
const content = section.querySelector('.tld-content'); const content = section.querySelector('.tld-content');
@@ -449,34 +779,90 @@ function initDashboard() {
// Search handler // Search handler
const searchInput = document.getElementById('searchInput'); const searchInput = document.getElementById('searchInput');
let searchTimeout; function doSearch() {
searchInput.addEventListener('input', () => { searchQuery = searchInput.value.trim();
clearTimeout(searchTimeout); loadFeeds(searchQuery);
searchTimeout = setTimeout(() => { }
searchQuery = searchInput.value.trim();
loadFeeds(searchQuery); // Search on button click
}, 300); document.getElementById('searchBtn').addEventListener('click', doSearch);
// Clear button - clears search and resets all filters
document.getElementById('clearBtn').addEventListener('click', () => {
searchInput.value = '';
searchQuery = '';
// Reset filters to default
domainFilter = 'all';
feedFilter = { allSelected: false, statuses: [], types: [] };
// Reset active card styling
document.querySelectorAll('.card.clickable.active').forEach(c => c.classList.remove('active'));
document.querySelector('.card.clickable[data-filter="domain"][data-status="all"]')?.classList.add('active');
searchInput.placeholder = 'Search domains...';
// Close any expanded TLD
currentOpenTLD = null;
const expandedContainer = document.getElementById('expandedTLDContent');
if (expandedContainer) {
expandedContainer.style.display = 'none';
expandedContainer.innerHTML = '';
}
// Show TLD list if hidden
const domainList = document.querySelector('.domain-list');
if (domainList) domainList.style.display = '';
// Reload and update stats
loadFeeds();
}); });
// Initial load // Search on Enter key
searchInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter') {
e.preventDefault();
doSearch();
}
});
// Initial load - set default active cards and load
document.querySelector('.card.clickable[data-filter="domain"][data-status="all"]')?.classList.add('active');
loadFeeds(); loadFeeds();
// Update stats periodically // Update stats periodically
async function updateStats() { async function updateStats() {
// Check actual input value for current search state
const currentSearch = document.getElementById('searchInput')?.value.trim() || '';
// Priority: open TLD > search query > all
if (currentOpenTLD) {
updateStatsForTLD(currentOpenTLD, currentSearch);
return;
}
if (currentSearch) {
updateStatsForSearch(currentSearch);
return;
}
try { try {
const resp = await fetch('/api/stats'); const resp = await fetch('/api/stats');
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
const stats = await resp.json(); const stats = await resp.json();
document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains); document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains);
document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains); document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains);
document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains); document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains);
document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains); document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains);
document.getElementById('crawlRate').textContent = commaFormat(stats.crawl_rate); document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains);
document.getElementById('checkRate').textContent = commaFormat(stats.check_rate); document.getElementById('domainCheckRate').textContent = commaFormat(stats.domain_check_rate);
document.getElementById('feedCrawlRate').textContent = commaFormat(stats.feed_crawl_rate);
document.getElementById('feedCheckRate').textContent = commaFormat(stats.feed_check_rate);
document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds); document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds);
document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds);
document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds);
document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds);
document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds);
document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds);
document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds);
document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds); document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds);
document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds); document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds);
document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds);
document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds); document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds);
document.getElementById('updatedAt').textContent = 'Last updated: ' + new Date().toLocaleString(); document.getElementById('updatedAt').textContent = 'All TLDs - ' + new Date().toLocaleTimeString();
} catch (err) { } catch (err) {
console.error('Stats update failed:', err); console.error('Stats update failed:', err);
} }
+2 -2
View File
@@ -445,8 +445,8 @@ const dashboardHTML = `<!DOCTYPE html>
<title>1440.news Feed Crawler</title> <title>1440.news Feed Crawler</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/static/dashboard.css?v=222"> <link rel="stylesheet" href="/static/dashboard.css?v=1769990750">
<script src="/static/dashboard.js?v=222"></script> <script src="/static/dashboard.js?v=1769990750"></script>
</head> </head>
<body> <body>
<div id="topSection"> <div id="topSection">
Executable
BIN
View File
Binary file not shown.
+1530
View File
File diff suppressed because it is too large Load Diff