Restore working codebase with all methods
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,3 +6,4 @@ feeds/
|
||||
.gitignore
|
||||
.claude
|
||||
CLAUDE.md
|
||||
.launch.sh
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
> **Note:** Always run applications in containers via `docker compose up -d --build` when possible. This ensures proper networking between services (database, traefik, etc.) and matches the production environment.
|
||||
> **IMPORTANT:** Always use `./.launch.sh` to deploy changes. This script updates version numbers in static files (CSS/JS cache busting) before running `docker compose up -d --build`. Never use `docker compose` directly.
|
||||
|
||||
## Build & Run
|
||||
|
||||
@@ -84,17 +84,23 @@ PostgreSQL with pgx driver, using connection pooling:
|
||||
|
||||
Column naming: snake_case (e.g., `source_host`, `pub_date`, `item_count`)
|
||||
|
||||
### Crawl Logic
|
||||
### Processing Terminology
|
||||
|
||||
1. Domains import as `pass` by default (auto-crawled)
|
||||
2. Crawl loop picks up domains where `last_crawled_at IS NULL`
|
||||
3. Full recursive crawl (HTTPS, fallback HTTP) up to MaxDepth=10, MaxPagesPerHost=10
|
||||
- **domain_check**: DNS lookup to verify domain is live
|
||||
- **feed_crawl**: Crawl a live domain to discover RSS/Atom feeds
|
||||
- **feed_check**: Check a known feed for new items
|
||||
|
||||
### Domain Processing Flow
|
||||
|
||||
1. Domains import as `pass` by default
|
||||
2. Domain loop runs **domain_check** (DNS lookup) for unchecked domains
|
||||
3. Domain loop runs **feed_crawl** for checked domains (recursive crawl up to MaxDepth=10, MaxPagesPerHost=10)
|
||||
4. Extract `<link rel="alternate">` and anchor hrefs containing rss/atom/feed
|
||||
5. Parse discovered feeds for metadata, save with next_crawl_at
|
||||
5. Parse discovered feeds for metadata, save with `next_check_at`
|
||||
|
||||
### Feed Checking
|
||||
|
||||
Uses conditional HTTP (ETag, If-Modified-Since). Adaptive backoff: base 100s + 100s per consecutive no-change. Respects RSS `<ttl>` and Syndication namespace hints.
|
||||
**feed_check** uses conditional HTTP (ETag, If-Modified-Since). Adaptive backoff: base 100s + 100s per consecutive no-change. Respects RSS `<ttl>` and Syndication namespace hints.
|
||||
|
||||
### Publishing
|
||||
|
||||
|
||||
+4
-4
@@ -1,9 +1,9 @@
|
||||
FROM golang:1.24-alpine AS builder
|
||||
FROM golang:latest AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install build dependencies
|
||||
RUN apk add --no-cache gcc musl-dev
|
||||
RUN apt-get update && apt-get install -y gcc && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy go mod files first for layer caching
|
||||
COPY go.mod go.sum ./
|
||||
@@ -17,12 +17,12 @@ COPY static/ ./static/
|
||||
RUN CGO_ENABLED=1 go build -o 1440.news .
|
||||
|
||||
# Runtime stage
|
||||
FROM alpine:latest
|
||||
FROM ubuntu:latest
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
RUN apt-get update && apt-get install -y ca-certificates tzdata && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy binary from builder
|
||||
COPY --from=builder /app/1440.news .
|
||||
|
||||
+9
-9
@@ -26,8 +26,8 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
|
||||
Language string `json:"language,omitempty"`
|
||||
SiteURL string `json:"siteUrl,omitempty"`
|
||||
DiscoveredAt string `json:"discoveredAt,omitempty"`
|
||||
LastCrawledAt string `json:"lastCrawledAt,omitempty"`
|
||||
NextCrawlAt string `json:"nextCrawlAt,omitempty"`
|
||||
LastCheckedAt string `json:"lastCheckedAt,omitempty"`
|
||||
NextCheckAt string `json:"nextCheckAt,omitempty"`
|
||||
LastBuildDate string `json:"lastBuildDate,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
LastError string `json:"lastError,omitempty"`
|
||||
@@ -40,7 +40,7 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
var f FeedDetails
|
||||
var category, title, description, language, siteUrl *string
|
||||
var lastCrawledAt, nextCrawlAt, lastBuildDate *time.Time
|
||||
var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time
|
||||
var status, lastError *string
|
||||
var oldestItemDate, newestItemDate *time.Time
|
||||
var itemCount *int
|
||||
@@ -49,7 +49,7 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
err := c.db.QueryRow(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
status, last_error,
|
||||
(SELECT COUNT(*) FROM items WHERE feed_url = feeds.url) as item_count,
|
||||
oldest_item_date, newest_item_date,
|
||||
@@ -57,7 +57,7 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
|
||||
FROM feeds WHERE url = $1
|
||||
`, feedURL).Scan(
|
||||
&f.URL, &f.Type, &category, &title, &description, &language, &siteUrl,
|
||||
&discoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate,
|
||||
&discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
|
||||
&status, &lastError,
|
||||
&itemCount, &oldestItemDate, &newestItemDate,
|
||||
&publishStatus, &publishAccount,
|
||||
@@ -78,11 +78,11 @@ func (c *Crawler) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) {
|
||||
f.Language = StringValue(language)
|
||||
f.SiteURL = StringValue(siteUrl)
|
||||
f.DiscoveredAt = discoveredAt.Format(time.RFC3339)
|
||||
if lastCrawledAt != nil {
|
||||
f.LastCrawledAt = lastCrawledAt.Format(time.RFC3339)
|
||||
if lastCheckedAt != nil {
|
||||
f.LastCheckedAt = lastCheckedAt.Format(time.RFC3339)
|
||||
}
|
||||
if nextCrawlAt != nil {
|
||||
f.NextCrawlAt = nextCrawlAt.Format(time.RFC3339)
|
||||
if nextCheckAt != nil {
|
||||
f.NextCheckAt = nextCheckAt.Format(time.RFC3339)
|
||||
}
|
||||
if lastBuildDate != nil {
|
||||
f.LastBuildDate = lastBuildDate.Format(time.RFC3339)
|
||||
|
||||
+30
-27
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
@@ -16,16 +17,16 @@ type SearchResult struct {
|
||||
}
|
||||
|
||||
type SearchFeed struct {
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"`
|
||||
Category string `json:"category"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Language string `json:"language"`
|
||||
SiteURL string `json:"site_url"`
|
||||
DiscoveredAt string `json:"discovered_at"`
|
||||
LastCrawledAt string `json:"last_crawled_at"`
|
||||
NextCrawlAt string `json:"next_crawl_at"`
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"`
|
||||
Category string `json:"category"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Language string `json:"language"`
|
||||
SiteURL string `json:"site_url"`
|
||||
DiscoveredAt string `json:"discovered_at"`
|
||||
LastCheckedAt string `json:"last_checked_at"`
|
||||
NextCheckAt string `json:"next_check_at"`
|
||||
LastBuildDate string `json:"last_build_date"`
|
||||
Status string `json:"status"`
|
||||
LastError string `json:"last_error"`
|
||||
@@ -76,7 +77,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
var url string
|
||||
var feedType, category, title, description, language, siteUrl *string
|
||||
var discoveredAt time.Time
|
||||
var lastCrawledAt, nextCrawlAt, lastBuildDate *time.Time
|
||||
var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time
|
||||
var itemCount *int
|
||||
var status, lastError *string
|
||||
var lastErrorAt *time.Time
|
||||
@@ -85,7 +86,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
var noUpdate *bool
|
||||
|
||||
if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl,
|
||||
&discoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate,
|
||||
&discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
|
||||
&status, &lastError, &lastErrorAt,
|
||||
&sourceUrl, &sourceHost, &tld,
|
||||
&itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil {
|
||||
@@ -110,11 +111,11 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
SourceHost: StringValue(sourceHost),
|
||||
TLD: StringValue(tld),
|
||||
}
|
||||
if lastCrawledAt != nil {
|
||||
sf.LastCrawledAt = lastCrawledAt.Format(time.RFC3339)
|
||||
if lastCheckedAt != nil {
|
||||
sf.LastCheckedAt = lastCheckedAt.Format(time.RFC3339)
|
||||
}
|
||||
if nextCrawlAt != nil {
|
||||
sf.NextCrawlAt = nextCrawlAt.Format(time.RFC3339)
|
||||
if nextCheckAt != nil {
|
||||
sf.NextCheckAt = nextCheckAt.Format(time.RFC3339)
|
||||
}
|
||||
if lastBuildDate != nil {
|
||||
sf.LastBuildDate = lastBuildDate.Format(time.RFC3339)
|
||||
@@ -138,16 +139,18 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// Search feeds by source_host (LIKE search for domain matching)
|
||||
// Use LOWER() to leverage trigram index
|
||||
lowerPattern := "%" + strings.ToLower(query) + "%"
|
||||
hostRows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
item_count, oldest_item_date, newest_item_date, no_update
|
||||
FROM feeds
|
||||
WHERE source_host ILIKE $1 OR url ILIKE $1
|
||||
WHERE LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1
|
||||
LIMIT $2
|
||||
`, "%"+query+"%", limit)
|
||||
`, lowerPattern, limit)
|
||||
if err == nil {
|
||||
defer hostRows.Close()
|
||||
for hostRows.Next() {
|
||||
@@ -163,7 +166,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
tsQuery := ToSearchQuery(query)
|
||||
feedRows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
item_count, oldest_item_date, newest_item_date, no_update
|
||||
@@ -228,7 +231,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
// Fetch feed info for this item's feed
|
||||
var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string
|
||||
var fDiscoveredAt time.Time
|
||||
var fLastCrawledAt, fNextCrawlAt, fLastBuildDate *time.Time
|
||||
var fLastCheckedAt, fNextCheckAt, fLastBuildDate *time.Time
|
||||
var fItemCount *int
|
||||
var fStatus, fLastError *string
|
||||
var fLastErrorAt *time.Time
|
||||
@@ -238,13 +241,13 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
c.db.QueryRow(`
|
||||
SELECT type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
item_count, oldest_item_date, newest_item_date, no_update
|
||||
FROM feeds WHERE url = $1
|
||||
`, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl,
|
||||
&fDiscoveredAt, &fLastCrawledAt, &fNextCrawlAt, &fLastBuildDate,
|
||||
&fDiscoveredAt, &fLastCheckedAt, &fNextCheckAt, &fLastBuildDate,
|
||||
&fStatus, &fLastError, &fLastErrorAt,
|
||||
&fSourceUrl, &fSourceHost, &fTLD,
|
||||
&fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate)
|
||||
@@ -268,11 +271,11 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
||||
SourceHost: StringValue(fSourceHost),
|
||||
TLD: StringValue(fTLD),
|
||||
}
|
||||
if fLastCrawledAt != nil {
|
||||
sf.LastCrawledAt = fLastCrawledAt.Format(time.RFC3339)
|
||||
if fLastCheckedAt != nil {
|
||||
sf.LastCheckedAt = fLastCheckedAt.Format(time.RFC3339)
|
||||
}
|
||||
if fNextCrawlAt != nil {
|
||||
sf.NextCrawlAt = fNextCrawlAt.Format(time.RFC3339)
|
||||
if fNextCheckAt != nil {
|
||||
sf.NextCheckAt = fNextCheckAt.Format(time.RFC3339)
|
||||
}
|
||||
if fLastBuildDate != nil {
|
||||
sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339)
|
||||
|
||||
+91
-41
@@ -1,9 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
@@ -15,23 +17,22 @@ import (
|
||||
)
|
||||
|
||||
type Crawler struct {
|
||||
MaxDepth int
|
||||
MaxPagesPerHost int
|
||||
Timeout time.Duration
|
||||
UserAgent string
|
||||
visited sync.Map
|
||||
feedsMu sync.Mutex
|
||||
client *http.Client
|
||||
hostsProcessed int32
|
||||
feedsChecked int32
|
||||
startTime time.Time
|
||||
db *DB
|
||||
displayedCrawlRate int
|
||||
displayedCheckRate int
|
||||
domainsImported int32
|
||||
cachedStats *DashboardStats
|
||||
cachedAllDomains []DomainStat
|
||||
statsMu sync.RWMutex
|
||||
MaxDepth int
|
||||
MaxPagesPerHost int
|
||||
Timeout time.Duration
|
||||
UserAgent string
|
||||
visited sync.Map
|
||||
feedsMu sync.Mutex
|
||||
client *http.Client
|
||||
domainsCrawled int32 // feed_crawl: domains crawled for feed discovery
|
||||
domainsChecked int32 // domain_check: domains checked for liveness
|
||||
feedsChecked int32 // feed_check: feeds checked for new items
|
||||
startTime time.Time
|
||||
db *DB
|
||||
domainsImported int32
|
||||
cachedStats *DashboardStats
|
||||
cachedAllDomains []DomainStat
|
||||
statsMu sync.RWMutex
|
||||
}
|
||||
|
||||
func NewCrawler(connString string) (*Crawler, error) {
|
||||
@@ -467,43 +468,92 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// StartCrawlLoop runs the domain crawling loop independently
|
||||
func (c *Crawler) StartCrawlLoop() {
|
||||
numWorkers := 100
|
||||
// dnsResolver uses local caching DNS (infra-dns) with fallback to system
|
||||
var dnsResolver = &net.Resolver{
|
||||
PreferGo: true,
|
||||
Dial: func(ctx context.Context, network, address string) (net.Conn, error) {
|
||||
d := net.Dialer{Timeout: 2 * time.Second}
|
||||
// Try local caching DNS first (CoreDNS on proxy network)
|
||||
conn, err := d.DialContext(ctx, "udp", "infra-dns:53")
|
||||
if err == nil {
|
||||
return conn, nil
|
||||
}
|
||||
// Fallback to system DNS
|
||||
return d.DialContext(ctx, network, address)
|
||||
},
|
||||
}
|
||||
|
||||
// domainCheck performs a DNS lookup to check if a domain resolves
|
||||
func (c *Crawler) domainCheck(host string) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_, err := dnsResolver.LookupHost(ctx, host)
|
||||
return err
|
||||
}
|
||||
|
||||
// StartDomainLoop runs the domain processing loop (domain_check + feed_crawl)
|
||||
func (c *Crawler) StartDomainLoop() {
|
||||
numWorkers := 1000
|
||||
|
||||
// Buffered channel for domain work
|
||||
workChan := make(chan *Domain, 100)
|
||||
workChan := make(chan *Domain, 1000)
|
||||
|
||||
// Start workers
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
go func() {
|
||||
for domain := range workChan {
|
||||
feedsFound, crawlErr := c.crawlHost(domain.Host)
|
||||
errStr := ""
|
||||
if crawlErr != nil {
|
||||
errStr = crawlErr.Error()
|
||||
}
|
||||
if err := c.markDomainCrawled(domain.Host, feedsFound, errStr); err != nil {
|
||||
fmt.Printf("Error marking domain %s as crawled: %v\n", domain.Host, err)
|
||||
fh := domain.FullHost()
|
||||
if domain.CrawledAt.Equal(DomainStateUnchecked) {
|
||||
// domain_check: DNS lookup for liveness
|
||||
err := c.domainCheck(fh)
|
||||
errStr := ""
|
||||
if err != nil {
|
||||
errStr = err.Error()
|
||||
}
|
||||
if err := c.markDomainChecked(domain.Host, domain.TLD, errStr); err != nil {
|
||||
fmt.Printf("Error marking domain %s as checked: %v\n", fh, err)
|
||||
}
|
||||
atomic.AddInt32(&c.domainsChecked, 1)
|
||||
} else {
|
||||
// feed_crawl: crawl domain to discover feeds
|
||||
feedsFound, crawlErr := c.feedCrawl(fh)
|
||||
errStr := ""
|
||||
if crawlErr != nil {
|
||||
errStr = crawlErr.Error()
|
||||
}
|
||||
if err := c.markDomainCrawled(domain.Host, domain.TLD, feedsFound, errStr); err != nil {
|
||||
fmt.Printf("Error marking domain %s as crawled: %v\n", fh, err)
|
||||
}
|
||||
atomic.AddInt32(&c.domainsCrawled, 1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
const fetchSize = 100
|
||||
const fetchSize = 1000
|
||||
for {
|
||||
domains, err := c.GetDomainsToCrawl(fetchSize)
|
||||
domains, err := c.GetDomainsToProcess(fetchSize)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching domains to crawl: %v\n", err)
|
||||
fmt.Printf("Error fetching domains to process: %v\n", err)
|
||||
}
|
||||
|
||||
if len(domains) == 0 {
|
||||
c.displayedCrawlRate = 0
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("%s crawl: %d domains to crawl\n", time.Now().Format("15:04:05"), len(domains))
|
||||
// Count unchecked vs checked for logging
|
||||
unchecked := 0
|
||||
for _, d := range domains {
|
||||
if d.CrawledAt.Equal(DomainStateUnchecked) {
|
||||
unchecked++
|
||||
}
|
||||
}
|
||||
checked := len(domains) - unchecked
|
||||
|
||||
if unchecked > 0 || checked > 0 {
|
||||
fmt.Printf("%s domain: %d domain_check, %d feed_crawl\n", time.Now().Format("15:04:05"), unchecked, checked)
|
||||
}
|
||||
|
||||
for _, domain := range domains {
|
||||
workChan <- domain
|
||||
@@ -513,12 +563,12 @@ func (c *Crawler) StartCrawlLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
// StartCheckLoop runs the feed checking loop independently
|
||||
func (c *Crawler) StartCheckLoop() {
|
||||
numWorkers := 100
|
||||
// StartFeedCheckLoop runs the feed_check loop (checking feeds for new items)
|
||||
func (c *Crawler) StartFeedCheckLoop() {
|
||||
numWorkers := 1000
|
||||
|
||||
// Buffered channel for feed work
|
||||
workChan := make(chan *Feed, 100)
|
||||
workChan := make(chan *Feed, 1000)
|
||||
|
||||
// Start workers
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
@@ -537,12 +587,11 @@ func (c *Crawler) StartCheckLoop() {
|
||||
}
|
||||
|
||||
if len(feeds) == 0 {
|
||||
c.displayedCheckRate = 0
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("%s check: %d feeds to check\n", time.Now().Format("15:04:05"), len(feeds))
|
||||
fmt.Printf("%s feed_check: %d feeds\n", time.Now().Format("15:04:05"), len(feeds))
|
||||
|
||||
for _, feed := range feeds {
|
||||
workChan <- feed
|
||||
@@ -552,8 +601,9 @@ func (c *Crawler) StartCheckLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Crawler) crawlHost(host string) (feedsFound int, err error) {
|
||||
atomic.AddInt32(&c.hostsProcessed, 1)
|
||||
// feedCrawl crawls a domain to discover RSS/Atom feeds
|
||||
func (c *Crawler) feedCrawl(host string) (feedsFound int, err error) {
|
||||
atomic.AddInt32(&c.domainsCrawled, 1)
|
||||
|
||||
localVisited := make(map[string]bool)
|
||||
pagesVisited := 0
|
||||
|
||||
+55
-22
@@ -12,17 +12,26 @@ type DashboardStats struct {
|
||||
HoldDomains int `json:"hold_domains"`
|
||||
PassDomains int `json:"pass_domains"`
|
||||
SkipDomains int `json:"skip_domains"`
|
||||
DeadDomains int `json:"dead_domains"`
|
||||
|
||||
// Feed stats
|
||||
TotalFeeds int `json:"total_feeds"`
|
||||
AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds)
|
||||
PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing)
|
||||
SkipFeeds int `json:"skip_feeds"`
|
||||
HoldFeeds int `json:"hold_feeds"`
|
||||
DeadFeeds int `json:"dead_feeds"`
|
||||
EmptyFeeds int `json:"empty_feeds"`
|
||||
RSSFeeds int `json:"rss_feeds"`
|
||||
AtomFeeds int `json:"atom_feeds"`
|
||||
JSONFeeds int `json:"json_feeds"`
|
||||
UnknownFeeds int `json:"unknown_feeds"`
|
||||
|
||||
// Crawl progress
|
||||
HostsProcessed int32 `json:"hosts_processed"`
|
||||
CrawlRate int `json:"crawl_rate"` // crawls per minute
|
||||
CheckRate int `json:"check_rate"` // feed checks per minute
|
||||
// Processing rates (per minute)
|
||||
DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count
|
||||
DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute
|
||||
FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute
|
||||
FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute
|
||||
|
||||
// Timing
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
@@ -122,28 +131,15 @@ func (c *Crawler) GetDashboardStats() (*DashboardStats, error) {
|
||||
func (c *Crawler) calculateStats() (*DashboardStats, error) {
|
||||
stats := &DashboardStats{
|
||||
UpdatedAt: time.Now(),
|
||||
HostsProcessed: c.hostsProcessed,
|
||||
DomainsCrawled: c.domainsCrawled,
|
||||
}
|
||||
|
||||
// Calculate crawl rate (crawls per minute), smoothed by +/-1 per update
|
||||
// Calculate rates (per minute)
|
||||
elapsed := time.Since(c.startTime).Minutes()
|
||||
if elapsed > 0 {
|
||||
actualRate := int(float64(c.hostsProcessed) / elapsed)
|
||||
if actualRate > c.displayedCrawlRate {
|
||||
c.displayedCrawlRate++
|
||||
} else if actualRate < c.displayedCrawlRate {
|
||||
c.displayedCrawlRate--
|
||||
}
|
||||
stats.CrawlRate = c.displayedCrawlRate
|
||||
|
||||
// Calculate check rate (feed checks per minute), smoothed by +/-1 per update
|
||||
actualCheckRate := int(float64(c.feedsChecked) / elapsed)
|
||||
if actualCheckRate > c.displayedCheckRate {
|
||||
c.displayedCheckRate++
|
||||
} else if actualCheckRate < c.displayedCheckRate {
|
||||
c.displayedCheckRate--
|
||||
}
|
||||
stats.CheckRate = c.displayedCheckRate
|
||||
stats.DomainCheckRate = int(float64(c.domainsChecked) / elapsed)
|
||||
stats.FeedCrawlRate = int(float64(c.domainsCrawled) / elapsed)
|
||||
stats.FeedCheckRate = int(float64(c.feedsChecked) / elapsed)
|
||||
}
|
||||
|
||||
// Get domain stats
|
||||
@@ -186,6 +182,8 @@ func (c *Crawler) collectDomainStats(stats *DashboardStats) error {
|
||||
stats.PassDomains = count
|
||||
case "skip":
|
||||
stats.SkipDomains = count
|
||||
case "dead":
|
||||
stats.DeadDomains = count
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
@@ -202,6 +200,39 @@ func (c *Crawler) collectFeedStats(stats *DashboardStats) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get status counts
|
||||
statusRows, err := c.db.Query("SELECT status, COUNT(*) FROM feeds GROUP BY status")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer statusRows.Close()
|
||||
|
||||
for statusRows.Next() {
|
||||
var status *string
|
||||
var count int
|
||||
if err := statusRows.Scan(&status, &count); err != nil {
|
||||
continue
|
||||
}
|
||||
if status != nil {
|
||||
switch *status {
|
||||
case "pass":
|
||||
stats.AliveFeeds = count
|
||||
case "skip":
|
||||
stats.SkipFeeds = count
|
||||
case "hold":
|
||||
stats.HoldFeeds = count
|
||||
case "dead":
|
||||
stats.DeadFeeds = count
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Count feeds approved for publishing (publish_status='pass')
|
||||
c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE publish_status = 'pass'").Scan(&stats.PublishFeeds)
|
||||
|
||||
// Count empty feeds (item_count = 0 or NULL)
|
||||
c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE item_count IS NULL OR item_count = 0").Scan(&stats.EmptyFeeds)
|
||||
|
||||
// Single query to get all type counts (one index scan instead of three)
|
||||
rows, err := c.db.Query("SELECT type, COUNT(*) FROM feeds GROUP BY type")
|
||||
if err != nil {
|
||||
@@ -223,6 +254,8 @@ func (c *Crawler) collectFeedStats(stats *DashboardStats) error {
|
||||
stats.RSSFeeds = count
|
||||
case "atom":
|
||||
stats.AtomFeeds = count
|
||||
case "json":
|
||||
stats.JSONFeeds = count
|
||||
default:
|
||||
stats.UnknownFeeds += count
|
||||
}
|
||||
|
||||
+4
-3
@@ -1,14 +1,15 @@
|
||||
services:
|
||||
app-1440-news:
|
||||
build: .
|
||||
container_name: app-1440-news
|
||||
image: atproto-1440news-app
|
||||
container_name: atproto-1440news-app
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 30s
|
||||
env_file:
|
||||
- pds.env
|
||||
- oauth.env
|
||||
environment:
|
||||
DB_HOST: atproto-postgres
|
||||
DB_HOST: infra-postgres
|
||||
DB_PORT: 5432
|
||||
DB_USER: news_1440
|
||||
DB_PASSWORD_FILE: /run/secrets/db_password
|
||||
@@ -54,7 +55,7 @@ services:
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
file: ../postgres/secrets/news_1440_password.txt
|
||||
file: ../../../infra/postgres/secrets/news_1440_password.txt
|
||||
|
||||
networks:
|
||||
proxy:
|
||||
|
||||
@@ -14,19 +14,38 @@ import (
|
||||
"github.com/jackc/pgx/v5"
|
||||
)
|
||||
|
||||
// Domain represents a host to be crawled for feeds
|
||||
// Domain represents a host to process for feeds
|
||||
// Status: hold (pending review), pass (approved), skip (not processing)
|
||||
// CrawledAt: zero time = needs domain_check, +1 sec = needs feed_crawl, real time = done
|
||||
type Domain struct {
|
||||
Host string `json:"host"`
|
||||
Status string `json:"status"`
|
||||
DiscoveredAt time.Time `json:"discovered_at"`
|
||||
LastCheckedAt time.Time `json:"last_checked_at,omitempty"`
|
||||
LastCrawledAt time.Time `json:"last_crawled_at,omitempty"`
|
||||
FeedsFound int `json:"feeds_found,omitempty"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
TLD string `json:"tld,omitempty"`
|
||||
Host string `json:"host"`
|
||||
Status string `json:"status"`
|
||||
CrawledAt time.Time `json:"crawled_at"`
|
||||
FeedsFound int `json:"feeds_found,omitempty"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
TLD string `json:"tld,omitempty"`
|
||||
MissCount int `json:"miss_count,omitempty"`
|
||||
}
|
||||
|
||||
// MissCountThreshold is the number of consecutive errors before setting status to hold
|
||||
const MissCountThreshold = 100
|
||||
|
||||
// ErrorRetryDelay is how long to wait before retrying a domain with errors (1 hour minimum)
|
||||
// At 100 seconds actual rate due to queue, 100 misses = ~2.8 hours
|
||||
// At 1 hour minimum delay, 100 misses = ~4+ days in practice
|
||||
var ErrorRetryDelay = 1 * time.Hour
|
||||
|
||||
// FullHost returns the complete hostname (host + tld)
|
||||
func (d *Domain) FullHost() string {
|
||||
return fullHost(d.Host, d.TLD)
|
||||
}
|
||||
|
||||
// Sentinel values for domain processing state
|
||||
var (
|
||||
DomainStateUnchecked = time.Time{} // 0001-01-01 00:00:00 - needs domain_check
|
||||
DomainStateChecked = time.Time{}.Add(time.Second) // 0001-01-01 00:00:01 - needs feed_crawl
|
||||
)
|
||||
|
||||
// shouldAutoSkipDomain checks if a domain should be auto-skipped based on patterns
|
||||
func shouldAutoSkipDomain(host string) bool {
|
||||
// Never skip our own domain
|
||||
@@ -51,62 +70,63 @@ func shouldAutoSkipDomain(host string) bool {
|
||||
// saveDomain stores a domain in PostgreSQL
|
||||
func (c *Crawler) saveDomain(domain *Domain) error {
|
||||
// Auto-skip domains matching spam patterns
|
||||
fh := domain.FullHost()
|
||||
status := domain.Status
|
||||
if shouldAutoSkipDomain(domain.Host) {
|
||||
if shouldAutoSkipDomain(fh) {
|
||||
status = "skip"
|
||||
}
|
||||
|
||||
_, err := c.db.Exec(`
|
||||
INSERT INTO domains (host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
ON CONFLICT(host) DO UPDATE SET
|
||||
INSERT INTO domains (host, status, crawled_at, feeds_found, last_error, tld)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT(host, tld) DO UPDATE SET
|
||||
status = EXCLUDED.status,
|
||||
last_checked_at = EXCLUDED.last_checked_at,
|
||||
last_crawled_at = EXCLUDED.last_crawled_at,
|
||||
crawled_at = EXCLUDED.crawled_at,
|
||||
feeds_found = EXCLUDED.feeds_found,
|
||||
last_error = EXCLUDED.last_error,
|
||||
tld = EXCLUDED.tld
|
||||
`, domain.Host, status, domain.DiscoveredAt, NullableTime(domain.LastCheckedAt),
|
||||
NullableTime(domain.LastCrawledAt), domain.FeedsFound, NullableString(domain.LastError), domain.TLD)
|
||||
last_error = EXCLUDED.last_error
|
||||
`, stripTLD(fh), status, domain.CrawledAt,
|
||||
domain.FeedsFound, NullableString(domain.LastError), domain.TLD)
|
||||
return err
|
||||
}
|
||||
|
||||
// saveDomainTx stores a domain using a transaction
|
||||
func (c *Crawler) saveDomainTx(tx pgx.Tx, domain *Domain) error {
|
||||
// Auto-skip domains matching spam patterns
|
||||
fh := domain.FullHost()
|
||||
status := domain.Status
|
||||
if shouldAutoSkipDomain(domain.Host) {
|
||||
if shouldAutoSkipDomain(fh) {
|
||||
status = "skip"
|
||||
}
|
||||
|
||||
_, err := tx.Exec(context.Background(), `
|
||||
INSERT INTO domains (host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
ON CONFLICT(host) DO NOTHING
|
||||
`, domain.Host, status, domain.DiscoveredAt, NullableTime(domain.LastCheckedAt),
|
||||
NullableTime(domain.LastCrawledAt), domain.FeedsFound, NullableString(domain.LastError), domain.TLD)
|
||||
INSERT INTO domains (host, status, crawled_at, feeds_found, last_error, tld)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT(host, tld) DO NOTHING
|
||||
`, stripTLD(fh), status, domain.CrawledAt,
|
||||
domain.FeedsFound, NullableString(domain.LastError), domain.TLD)
|
||||
return err
|
||||
}
|
||||
|
||||
// domainExists checks if a domain already exists in the database
|
||||
func (c *Crawler) domainExists(host string) bool {
|
||||
host = normalizeHost(host)
|
||||
var exists bool
|
||||
err := c.db.QueryRow("SELECT EXISTS(SELECT 1 FROM domains WHERE host = $1)", normalizeHost(host)).Scan(&exists)
|
||||
err := c.db.QueryRow("SELECT EXISTS(SELECT 1 FROM domains WHERE host = $1 AND tld = $2)", stripTLD(host), getTLD(host)).Scan(&exists)
|
||||
return err == nil && exists
|
||||
}
|
||||
|
||||
// getDomain retrieves a domain from PostgreSQL
|
||||
func (c *Crawler) getDomain(host string) (*Domain, error) {
|
||||
host = normalizeHost(host)
|
||||
domain := &Domain{}
|
||||
var lastCheckedAt, lastCrawledAt *time.Time
|
||||
var lastError *string
|
||||
|
||||
err := c.db.QueryRow(`
|
||||
SELECT host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld
|
||||
FROM domains WHERE host = $1
|
||||
`, normalizeHost(host)).Scan(
|
||||
&domain.Host, &domain.Status, &domain.DiscoveredAt, &lastCheckedAt, &lastCrawledAt,
|
||||
&domain.FeedsFound, &lastError, &domain.TLD,
|
||||
SELECT host, tld, status, crawled_at, feeds_found, last_error
|
||||
FROM domains WHERE host = $1 AND tld = $2
|
||||
`, stripTLD(host), getTLD(host)).Scan(
|
||||
&domain.Host, &domain.TLD, &domain.Status, &domain.CrawledAt,
|
||||
&domain.FeedsFound, &lastError,
|
||||
)
|
||||
|
||||
if err == pgx.ErrNoRows {
|
||||
@@ -116,21 +136,26 @@ func (c *Crawler) getDomain(host string) (*Domain, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
domain.LastCheckedAt = TimeValue(lastCheckedAt)
|
||||
domain.LastCrawledAt = TimeValue(lastCrawledAt)
|
||||
domain.LastError = StringValue(lastError)
|
||||
|
||||
return domain, nil
|
||||
}
|
||||
|
||||
// GetDomainsToCrawl returns domains ready for crawling (status='pass', not yet crawled)
|
||||
func (c *Crawler) GetDomainsToCrawl(limit int) ([]*Domain, error) {
|
||||
// GetDomainsToProcess returns domains needing processing (domain_check or feed_crawl)
|
||||
// crawled_at = zero time means needs domain_check, +1 sec means needs feed_crawl
|
||||
// Domains with errors are retried when crawled_at < now (scheduled by ErrorRetryDelay)
|
||||
func (c *Crawler) GetDomainsToProcess(limit int) ([]*Domain, error) {
|
||||
now := time.Now()
|
||||
rows, err := c.db.Query(`
|
||||
SELECT host, status, discovered_at, last_checked_at, last_crawled_at, feeds_found, last_error, tld
|
||||
FROM domains WHERE status = 'pass' AND last_crawled_at IS NULL
|
||||
ORDER BY discovered_at DESC
|
||||
LIMIT $1
|
||||
`, limit)
|
||||
SELECT host, status, crawled_at, feeds_found, last_error, tld
|
||||
FROM domains
|
||||
WHERE status = 'pass' AND (
|
||||
(crawled_at < '0001-01-02' AND last_error IS NULL) -- new domains
|
||||
OR (crawled_at < $1 AND last_error IS NOT NULL) -- retry errors after delay
|
||||
)
|
||||
ORDER BY crawled_at ASC
|
||||
LIMIT $2
|
||||
`, now, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -139,23 +164,45 @@ func (c *Crawler) GetDomainsToCrawl(limit int) ([]*Domain, error) {
|
||||
return c.scanDomains(rows)
|
||||
}
|
||||
|
||||
// markDomainChecked updates a domain after domain_check (sets to +1 sec for feed_crawl)
|
||||
// host parameter should be the stripped host (without TLD)
|
||||
func (c *Crawler) markDomainChecked(host, tld, lastError string) error {
|
||||
if lastError != "" {
|
||||
// Increment miss_count, set to 'hold' only at threshold
|
||||
// Schedule retry after ErrorRetryDelay
|
||||
retryAt := time.Now().Add(ErrorRetryDelay)
|
||||
_, err := c.db.Exec(`
|
||||
UPDATE domains SET
|
||||
crawled_at = $1,
|
||||
last_error = $2,
|
||||
miss_count = miss_count + 1,
|
||||
status = CASE WHEN miss_count + 1 >= $3 THEN 'hold' ELSE status END
|
||||
WHERE host = $4 AND tld = $5
|
||||
`, retryAt, lastError, MissCountThreshold, host, tld)
|
||||
return err
|
||||
}
|
||||
// Success - reset miss_count
|
||||
_, err := c.db.Exec(`
|
||||
UPDATE domains SET crawled_at = $1, last_error = NULL, miss_count = 0
|
||||
WHERE host = $2 AND tld = $3
|
||||
`, DomainStateChecked, host, tld)
|
||||
return err
|
||||
}
|
||||
|
||||
// scanDomains is a helper to scan multiple domain rows
|
||||
func (c *Crawler) scanDomains(rows pgx.Rows) ([]*Domain, error) {
|
||||
var domains []*Domain
|
||||
for rows.Next() {
|
||||
domain := &Domain{}
|
||||
var lastCheckedAt, lastCrawledAt *time.Time
|
||||
var lastError *string
|
||||
|
||||
if err := rows.Scan(
|
||||
&domain.Host, &domain.Status, &domain.DiscoveredAt, &lastCheckedAt, &lastCrawledAt,
|
||||
&domain.Host, &domain.Status, &domain.CrawledAt,
|
||||
&domain.FeedsFound, &lastError, &domain.TLD,
|
||||
); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
domain.LastCheckedAt = TimeValue(lastCheckedAt)
|
||||
domain.LastCrawledAt = TimeValue(lastCrawledAt)
|
||||
domain.LastError = StringValue(lastError)
|
||||
|
||||
domains = append(domains, domain)
|
||||
@@ -164,20 +211,30 @@ func (c *Crawler) scanDomains(rows pgx.Rows) ([]*Domain, error) {
|
||||
return domains, rows.Err()
|
||||
}
|
||||
|
||||
// markDomainCrawled updates a domain after the crawl stage
|
||||
func (c *Crawler) markDomainCrawled(host string, feedsFound int, lastError string) error {
|
||||
now := time.Now()
|
||||
// markDomainCrawled updates a domain after feed_crawl (sets to NOW())
|
||||
// host parameter should be the stripped host (without TLD)
|
||||
func (c *Crawler) markDomainCrawled(host, tld string, feedsFound int, lastError string) error {
|
||||
if lastError != "" {
|
||||
// Increment miss_count, set to 'hold' only at threshold
|
||||
// Schedule retry after ErrorRetryDelay
|
||||
retryAt := time.Now().Add(ErrorRetryDelay)
|
||||
_, err := c.db.Exec(`
|
||||
UPDATE domains SET last_crawled_at = $1, feeds_found = $2, last_error = $3
|
||||
WHERE host = $4
|
||||
`, now, feedsFound, lastError, normalizeHost(host))
|
||||
UPDATE domains SET
|
||||
crawled_at = $1,
|
||||
feeds_found = $2,
|
||||
last_error = $3,
|
||||
miss_count = miss_count + 1,
|
||||
status = CASE WHEN miss_count + 1 >= $4 THEN 'hold' ELSE status END
|
||||
WHERE host = $5 AND tld = $6
|
||||
`, retryAt, feedsFound, lastError, MissCountThreshold, host, tld)
|
||||
return err
|
||||
}
|
||||
// Success - reset miss_count
|
||||
now := time.Now()
|
||||
_, err := c.db.Exec(`
|
||||
UPDATE domains SET last_crawled_at = $1, feeds_found = $2, last_error = NULL
|
||||
WHERE host = $3
|
||||
`, now, feedsFound, normalizeHost(host))
|
||||
UPDATE domains SET crawled_at = $1, feeds_found = $2, last_error = NULL, miss_count = 0
|
||||
WHERE host = $3 AND tld = $4
|
||||
`, now, feedsFound, host, tld)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -193,13 +250,13 @@ func (c *Crawler) GetDomainCount() (total int, hold int, err error) {
|
||||
|
||||
// ImportTestDomains adds a list of specific domains for testing
|
||||
func (c *Crawler) ImportTestDomains(domains []string) {
|
||||
now := time.Now()
|
||||
for _, host := range domains {
|
||||
host = normalizeHost(host)
|
||||
_, err := c.db.Exec(`
|
||||
INSERT INTO domains (host, status, discovered_at, tld)
|
||||
VALUES ($1, 'pass', $2, $3)
|
||||
ON CONFLICT(host) DO NOTHING
|
||||
`, host, now, getTLD(host))
|
||||
INSERT INTO domains (host, status, tld)
|
||||
VALUES ($1, 'pass', $2)
|
||||
ON CONFLICT(host, tld) DO NOTHING
|
||||
`, stripTLD(host), getTLD(host))
|
||||
if err != nil {
|
||||
fmt.Printf("Error adding test domain %s: %v\n", host, err)
|
||||
} else {
|
||||
@@ -255,7 +312,6 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
|
||||
scanner.Buffer(buf, 1024*1024)
|
||||
|
||||
const batchSize = 100
|
||||
now := time.Now()
|
||||
totalImported := 0
|
||||
batchCount := 0
|
||||
|
||||
@@ -299,14 +355,14 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
|
||||
if shouldAutoSkipDomain(d.host) {
|
||||
status = "skip"
|
||||
}
|
||||
rows[i] = []interface{}{d.host, status, now, d.tld}
|
||||
rows[i] = []interface{}{stripTLD(d.host), status, d.tld}
|
||||
}
|
||||
|
||||
// Use CopyFrom for bulk insert
|
||||
imported, err := conn.CopyFrom(
|
||||
ctx,
|
||||
pgx.Identifier{"domains"},
|
||||
[]string{"host", "status", "discovered_at", "tld"},
|
||||
[]string{"host", "status", "tld"},
|
||||
pgx.CopyFromRows(rows),
|
||||
)
|
||||
conn.Release()
|
||||
@@ -319,10 +375,10 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
|
||||
status = "skip"
|
||||
}
|
||||
c.db.Exec(`
|
||||
INSERT INTO domains (host, status, discovered_at, tld)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT(host) DO NOTHING
|
||||
`, d.host, status, now, d.tld)
|
||||
INSERT INTO domains (host, status, tld)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT(host, tld) DO NOTHING
|
||||
`, stripTLD(d.host), status, d.tld)
|
||||
}
|
||||
imported = int64(len(domains))
|
||||
}
|
||||
@@ -369,7 +425,6 @@ func (c *Crawler) parseAndStoreDomains(reader io.Reader, limit int) (imported in
|
||||
buf := make([]byte, 0, 64*1024)
|
||||
scanner.Buffer(buf, 1024*1024)
|
||||
|
||||
now := time.Now()
|
||||
count := 0
|
||||
const batchSize = 100
|
||||
|
||||
@@ -408,10 +463,10 @@ func (c *Crawler) parseAndStoreDomains(reader io.Reader, limit int) (imported in
|
||||
status = "skip"
|
||||
}
|
||||
result, err := c.db.Exec(`
|
||||
INSERT INTO domains (host, status, discovered_at, tld)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT(host) DO NOTHING
|
||||
`, d.host, status, now, d.tld)
|
||||
INSERT INTO domains (host, status, tld)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT(host, tld) DO NOTHING
|
||||
`, stripTLD(d.host), status, d.tld)
|
||||
if err != nil {
|
||||
skipped++
|
||||
} else if result > 0 {
|
||||
|
||||
@@ -101,8 +101,8 @@ type Feed struct {
|
||||
|
||||
// Timing
|
||||
DiscoveredAt time.Time `json:"discovered_at"`
|
||||
LastCrawledAt time.Time `json:"last_crawled_at,omitempty"`
|
||||
NextCrawlAt time.Time `json:"next_crawl_at,omitempty"`
|
||||
LastCheckedAt time.Time `json:"last_checked_at,omitempty"` // feed_check: when last checked
|
||||
NextCheckAt time.Time `json:"next_check_at,omitempty"` // feed_check: when to next check
|
||||
LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated
|
||||
|
||||
// Cache headers for conditional requests
|
||||
@@ -120,7 +120,7 @@ type Feed struct {
|
||||
TLD string `json:"tld,omitempty"`
|
||||
|
||||
// Content stats
|
||||
ItemCount int `json:"item_count,omitempty"` // Number of items in last crawl
|
||||
ItemCount int `json:"item_count,omitempty"` // Number of items in last feed_check
|
||||
OldestItemDate time.Time `json:"oldest_item_date,omitempty"`
|
||||
NewestItemDate time.Time `json:"newest_item_date,omitempty"`
|
||||
|
||||
@@ -153,7 +153,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
_, err := c.db.Exec(`
|
||||
INSERT INTO feeds (
|
||||
url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
@@ -168,8 +168,8 @@ func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
description = EXCLUDED.description,
|
||||
language = EXCLUDED.language,
|
||||
site_url = EXCLUDED.site_url,
|
||||
last_crawled_at = EXCLUDED.last_crawled_at,
|
||||
next_crawl_at = EXCLUDED.next_crawl_at,
|
||||
last_checked_at = EXCLUDED.last_checked_at,
|
||||
next_check_at = EXCLUDED.next_check_at,
|
||||
last_build_date = EXCLUDED.last_build_date,
|
||||
etag = EXCLUDED.etag,
|
||||
last_modified = EXCLUDED.last_modified,
|
||||
@@ -185,7 +185,7 @@ func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
`,
|
||||
feed.URL, feed.Type, feed.Category, NullableString(feed.Title), NullableString(feed.Description),
|
||||
NullableString(feed.Language), NullableString(feed.SiteURL),
|
||||
feed.DiscoveredAt, NullableTime(feed.LastCrawledAt), NullableTime(feed.NextCrawlAt), NullableTime(feed.LastBuildDate),
|
||||
feed.DiscoveredAt, NullableTime(feed.LastCheckedAt), NullableTime(feed.NextCheckAt), NullableTime(feed.LastBuildDate),
|
||||
NullableString(feed.ETag), NullableString(feed.LastModified),
|
||||
feed.Status, NullableString(feed.LastError), NullableTime(feed.LastErrorAt),
|
||||
NullableString(feed.SourceURL), NullableString(feed.SourceHost), NullableString(feed.TLD),
|
||||
@@ -200,14 +200,14 @@ func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
|
||||
feed := &Feed{}
|
||||
var category, title, description, language, siteURL *string
|
||||
var lastCrawledAt, nextCrawlAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
|
||||
var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
|
||||
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string
|
||||
var publishStatus, publishAccount *string
|
||||
var itemCount, noUpdate *int
|
||||
|
||||
err := c.db.QueryRow(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
@@ -217,7 +217,7 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
|
||||
FROM feeds WHERE url = $1
|
||||
`, normalizeURL(feedURL)).Scan(
|
||||
&feed.URL, &feed.Type, &category, &title, &description, &language, &siteURL,
|
||||
&feed.DiscoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate,
|
||||
&feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
|
||||
&etag, &lastModified,
|
||||
&feed.Status, &lastError, &lastErrorAt,
|
||||
&sourceURL, &sourceHost, &tld,
|
||||
@@ -243,8 +243,8 @@ func (c *Crawler) getFeed(feedURL string) (*Feed, error) {
|
||||
feed.Description = StringValue(description)
|
||||
feed.Language = StringValue(language)
|
||||
feed.SiteURL = StringValue(siteURL)
|
||||
feed.LastCrawledAt = TimeValue(lastCrawledAt)
|
||||
feed.NextCrawlAt = TimeValue(nextCrawlAt)
|
||||
feed.LastCheckedAt = TimeValue(lastCheckedAt)
|
||||
feed.NextCheckAt = TimeValue(nextCheckAt)
|
||||
feed.LastBuildDate = TimeValue(lastBuildDate)
|
||||
feed.ETag = StringValue(etag)
|
||||
feed.LastModified = StringValue(lastModified)
|
||||
@@ -282,7 +282,7 @@ func (c *Crawler) feedExists(feedURL string) bool {
|
||||
func (c *Crawler) GetAllFeeds() ([]*Feed, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
@@ -313,11 +313,11 @@ func (c *Crawler) GetFeedCountByHost(host string) (int, error) {
|
||||
return count, err
|
||||
}
|
||||
|
||||
// GetFeedsDueForCheck returns feeds where next_crawl_at <= now, ordered by no_update desc (prioritize infrequent feeds)
|
||||
// GetFeedsDueForCheck returns feeds for feed_check, ordered by last_checked_at ASC (oldest first)
|
||||
func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
@@ -325,8 +325,8 @@ func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
|
||||
no_update,
|
||||
publish_status, publish_account
|
||||
FROM feeds
|
||||
WHERE next_crawl_at <= NOW() AND status = 'pass'
|
||||
ORDER BY no_update DESC
|
||||
WHERE last_checked_at > '0001-01-01 00:00:00' AND status = 'pass'
|
||||
ORDER BY last_checked_at ASC
|
||||
LIMIT $1
|
||||
`, limit)
|
||||
if err != nil {
|
||||
@@ -341,7 +341,7 @@ func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) {
|
||||
func (c *Crawler) GetFeedsByHost(host string) ([]*Feed, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
@@ -363,7 +363,7 @@ func (c *Crawler) SearchFeeds(query string) ([]*Feed, error) {
|
||||
tsquery := ToSearchQuery(query)
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
@@ -389,7 +389,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
|
||||
for rows.Next() {
|
||||
feed := &Feed{}
|
||||
var feedType, category, title, description, language, siteURL *string
|
||||
var lastCrawledAt, nextCrawlAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
|
||||
var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate *time.Time
|
||||
var etag, lastModified, lastError, sourceURL, sourceHost, tld *string
|
||||
var itemCount, noUpdate *int
|
||||
var status *string
|
||||
@@ -397,7 +397,7 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
|
||||
|
||||
if err := rows.Scan(
|
||||
&feed.URL, &feedType, &category, &title, &description, &language, &siteURL,
|
||||
&feed.DiscoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate,
|
||||
&feed.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
|
||||
&etag, &lastModified,
|
||||
&status, &lastError, &lastErrorAt,
|
||||
&sourceURL, &sourceHost, &tld,
|
||||
@@ -419,8 +419,8 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
|
||||
feed.Description = StringValue(description)
|
||||
feed.Language = StringValue(language)
|
||||
feed.SiteURL = StringValue(siteURL)
|
||||
feed.LastCrawledAt = TimeValue(lastCrawledAt)
|
||||
feed.NextCrawlAt = TimeValue(nextCrawlAt)
|
||||
feed.LastCheckedAt = TimeValue(lastCheckedAt)
|
||||
feed.NextCheckAt = TimeValue(nextCheckAt)
|
||||
feed.LastBuildDate = TimeValue(lastBuildDate)
|
||||
feed.ETag = StringValue(etag)
|
||||
feed.LastModified = StringValue(lastModified)
|
||||
@@ -471,7 +471,7 @@ func (c *Crawler) SetPublishStatus(feedURL, status, account string) error {
|
||||
func (c *Crawler) GetFeedsByPublishStatus(status string) ([]*Feed, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
@@ -493,7 +493,7 @@ func (c *Crawler) GetFeedsByPublishStatus(status string) ([]*Feed, error) {
|
||||
func (c *Crawler) GetPublishCandidates(limit int) ([]*Feed, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, type, category, title, description, language, site_url,
|
||||
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
||||
discovered_at, last_checked_at, next_check_at, last_build_date,
|
||||
etag, last_modified,
|
||||
status, last_error, last_error_at,
|
||||
source_url, source_host, tld,
|
||||
|
||||
+11
-11
@@ -31,7 +31,7 @@ func (c *Crawler) processFeed(feedURL, sourceHost, body string, headers http.Hea
|
||||
Type: feedType,
|
||||
Category: classifyFeed(feedURL),
|
||||
DiscoveredAt: now,
|
||||
LastCrawledAt: now,
|
||||
LastCheckedAt: now,
|
||||
Status: "pass",
|
||||
SourceHost: sourceHost,
|
||||
TLD: getTLD(sourceHost),
|
||||
@@ -53,8 +53,8 @@ func (c *Crawler) processFeed(feedURL, sourceHost, body string, headers http.Hea
|
||||
// Refine category based on parsed title (e.g., "Comments on:")
|
||||
feed.Category = classifyFeedByTitle(feed.Title, feed.Category)
|
||||
|
||||
// Calculate next crawl time
|
||||
feed.NextCrawlAt = c.calculateNextCrawl(feed)
|
||||
// Calculate next feed_check time
|
||||
feed.NextCheckAt = c.calculateNextCheck(feed)
|
||||
|
||||
if err := c.saveFeed(feed); err != nil {
|
||||
return
|
||||
@@ -92,7 +92,7 @@ func (c *Crawler) addFeed(feedURL, feedType, sourceHost, sourceURL string) {
|
||||
SourceURL: normalizeURL(sourceURL),
|
||||
SourceHost: sourceHost,
|
||||
TLD: getTLD(sourceHost),
|
||||
NextCrawlAt: now, // Should be crawled immediately
|
||||
NextCheckAt: now, // Should be crawled immediately
|
||||
}
|
||||
|
||||
if err := c.saveFeed(feed); err != nil {
|
||||
@@ -148,9 +148,9 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
|
||||
err = fmt.Errorf("all URL variants failed")
|
||||
}
|
||||
now := time.Now()
|
||||
feed.LastCrawledAt = now
|
||||
feed.LastCheckedAt = now
|
||||
feed.NoUpdate++
|
||||
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.LastError = err.Error()
|
||||
feed.LastErrorAt = now
|
||||
feed.Status = "hold"
|
||||
@@ -165,13 +165,13 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
|
||||
defer resp.Body.Close()
|
||||
|
||||
now := time.Now()
|
||||
feed.LastCrawledAt = now
|
||||
feed.LastCheckedAt = now
|
||||
|
||||
// 304 Not Modified - feed hasn't changed
|
||||
if resp.StatusCode == http.StatusNotModified {
|
||||
feed.NoUpdate++
|
||||
// Adaptive backoff: 100s base + 100s per consecutive no-change
|
||||
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.LastError = ""
|
||||
feed.Status = "pass"
|
||||
// Auto-hold feeds after 1000 consecutive no-changes
|
||||
@@ -186,7 +186,7 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
|
||||
// Non-200 response
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
feed.NoUpdate++
|
||||
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.LastError = resp.Status
|
||||
feed.LastErrorAt = now
|
||||
feed.Status = "hold"
|
||||
@@ -203,7 +203,7 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
|
||||
bodyBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
feed.NoUpdate++
|
||||
feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.NextCheckAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
feed.LastError = err.Error()
|
||||
feed.LastErrorAt = now
|
||||
feed.Status = "hold"
|
||||
@@ -238,7 +238,7 @@ func (c *Crawler) CheckFeed(feed *Feed) (bool, error) {
|
||||
|
||||
// Content changed - reset backoff
|
||||
feed.NoUpdate = 0
|
||||
feed.NextCrawlAt = now.Add(100 * time.Second)
|
||||
feed.NextCheckAt = now.Add(100 * time.Second)
|
||||
feed.LastError = ""
|
||||
feed.Status = "pass"
|
||||
c.saveFeed(feed)
|
||||
|
||||
@@ -386,8 +386,8 @@ func parseRSSDate(s string) (time.Time, error) {
|
||||
return time.Time{}, fmt.Errorf("unable to parse date: %s", s)
|
||||
}
|
||||
|
||||
// calculateNextCrawl determines when to next crawl this feed
|
||||
func (c *Crawler) calculateNextCrawl(feed *Feed) time.Time {
|
||||
// calculateNextCheck determines when to next check this feed (feed_check)
|
||||
func (c *Crawler) calculateNextCheck(feed *Feed) time.Time {
|
||||
// Adaptive backoff: 100s base + 100s per consecutive no-change
|
||||
return time.Now().Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
}
|
||||
|
||||
@@ -109,6 +109,9 @@ func (c *Crawler) StartDashboard(addr string) error {
|
||||
http.HandleFunc("/api/tlds", withAuth(func(w http.ResponseWriter, r *http.Request) {
|
||||
c.handleAPITLDs(w, r)
|
||||
}))
|
||||
http.HandleFunc("/api/searchStats", withAuth(func(w http.ResponseWriter, r *http.Request) {
|
||||
c.handleAPISearchStats(w, r)
|
||||
}))
|
||||
http.HandleFunc("/api/tldDomains", withAuth(func(w http.ResponseWriter, r *http.Request) {
|
||||
c.handleAPITLDDomains(w, r)
|
||||
}))
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Deploy script - increments version, commits, pushes, and relaunches container
|
||||
# Usage: ./scripts/deploy.sh [optional commit message]
|
||||
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
# Extract current version number from templates.go
|
||||
CURRENT=$(grep -o '>v[0-9]*<' templates.go | grep -o '[0-9]*' | head -1)
|
||||
|
||||
if [ -z "$CURRENT" ]; then
|
||||
echo "Could not find version number in templates.go"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Increment version
|
||||
NEW=$((CURRENT + 1))
|
||||
|
||||
# Update templates.go
|
||||
sed -i '' "s/>v${CURRENT}</>v${NEW}</" templates.go
|
||||
|
||||
echo "Version: v${CURRENT} -> v${NEW}"
|
||||
|
||||
# Build commit message
|
||||
if [ -n "$1" ]; then
|
||||
COMMIT_MSG="v${NEW}: $1"
|
||||
else
|
||||
COMMIT_MSG="v${NEW}"
|
||||
fi
|
||||
|
||||
# Commit and push
|
||||
git add -A
|
||||
git commit -m "$COMMIT_MSG"
|
||||
git push
|
||||
|
||||
echo "Committed: $COMMIT_MSG"
|
||||
|
||||
# Rebuild and relaunch
|
||||
docker compose up -d --build
|
||||
|
||||
echo "Deployed v${NEW}"
|
||||
+451
-65
@@ -14,6 +14,142 @@ function initDashboard() {
|
||||
let infiniteScrollState = null;
|
||||
let isLoadingMore = false;
|
||||
let searchQuery = '';
|
||||
let domainFilter = 'all'; // all, pass, skip, hold, dead
|
||||
// Feed filter: multi-select with ALL as exclusion toggle
|
||||
// When allSelected=true, selected items are EXCLUDED; when false, selected items are INCLUDED
|
||||
let feedFilter = { allSelected: false, statuses: [], types: [] };
|
||||
let currentOpenTLD = null; // Track which TLD is currently open
|
||||
|
||||
// Smart sticky header - scroll normally, show fixed on scroll up
|
||||
let lastScrollY = 0;
|
||||
const topSection = document.getElementById('topSection');
|
||||
const spacer = document.getElementById('topSectionSpacer');
|
||||
let headerHeight = topSection.offsetHeight;
|
||||
let isFixed = false;
|
||||
|
||||
window.addEventListener('scroll', () => {
|
||||
const currentScrollY = window.scrollY;
|
||||
|
||||
// If at top, return to normal flow
|
||||
if (currentScrollY <= 0) {
|
||||
topSection.classList.remove('fixed', 'hidden');
|
||||
spacer.classList.remove('active');
|
||||
isFixed = false;
|
||||
lastScrollY = currentScrollY;
|
||||
return;
|
||||
}
|
||||
|
||||
// Only activate fixed mode after scrolling past the header
|
||||
if (currentScrollY > headerHeight) {
|
||||
if (currentScrollY < lastScrollY) {
|
||||
// Scrolling up - show fixed header
|
||||
if (!isFixed) {
|
||||
spacer.style.height = headerHeight + 'px';
|
||||
spacer.classList.add('active');
|
||||
topSection.classList.add('fixed');
|
||||
// Start hidden, then show
|
||||
topSection.classList.add('hidden');
|
||||
requestAnimationFrame(() => {
|
||||
topSection.classList.remove('hidden');
|
||||
});
|
||||
isFixed = true;
|
||||
} else {
|
||||
topSection.classList.remove('hidden');
|
||||
}
|
||||
} else if (currentScrollY > lastScrollY && isFixed) {
|
||||
// Scrolling down while fixed - hide it
|
||||
topSection.classList.add('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
lastScrollY = currentScrollY;
|
||||
}, { passive: true });
|
||||
|
||||
// Stat card click handler
|
||||
document.addEventListener('click', (e) => {
|
||||
const card = e.target.closest('.card.clickable');
|
||||
if (!card) return;
|
||||
|
||||
const filterType = card.dataset.filter;
|
||||
const status = card.dataset.status;
|
||||
const type = card.dataset.type;
|
||||
|
||||
if (filterType === 'domain') {
|
||||
// Remove active from domain cards only
|
||||
document.querySelectorAll('.card.clickable[data-filter="domain"]').forEach(c => c.classList.remove('active'));
|
||||
card.classList.add('active');
|
||||
domainFilter = status || 'all';
|
||||
|
||||
// Update placeholder
|
||||
const searchInput = document.getElementById('searchInput');
|
||||
searchInput.placeholder = domainFilter === 'all' ? 'Search domains...' : `Showing ${domainFilter} domains...`;
|
||||
|
||||
// Reload TLD list with new filter
|
||||
loadFeeds(searchQuery);
|
||||
} else if (filterType === 'feed') {
|
||||
const wasActive = card.classList.contains('active');
|
||||
|
||||
if (status === 'all') {
|
||||
// ALL card toggles exclusion mode
|
||||
if (wasActive) {
|
||||
card.classList.remove('active');
|
||||
feedFilter.allSelected = false;
|
||||
} else {
|
||||
card.classList.add('active');
|
||||
feedFilter.allSelected = true;
|
||||
}
|
||||
} else if (status) {
|
||||
// Status card (pass, skip, hold, dead) - multi-select
|
||||
if (wasActive) {
|
||||
card.classList.remove('active');
|
||||
feedFilter.statuses = feedFilter.statuses.filter(s => s !== status);
|
||||
} else {
|
||||
card.classList.add('active');
|
||||
feedFilter.statuses.push(status);
|
||||
}
|
||||
} else if (type) {
|
||||
// Type card (rss, atom, json, unknown, empty) - multi-select
|
||||
if (wasActive) {
|
||||
card.classList.remove('active');
|
||||
feedFilter.types = feedFilter.types.filter(t => t !== type);
|
||||
} else {
|
||||
card.classList.add('active');
|
||||
feedFilter.types.push(type);
|
||||
}
|
||||
}
|
||||
|
||||
// Reload TLD list with feed filter
|
||||
loadFeeds(searchQuery);
|
||||
}
|
||||
});
|
||||
|
||||
// Refresh only expanded TLD sections with new domain filter
|
||||
function refreshExpandedTLDs() {
|
||||
const expandedContainer = document.getElementById('expandedTLDContent');
|
||||
if (expandedContainer && expandedContainer.style.display !== 'none' && expandedContainer.dataset.tld) {
|
||||
// Mark as needing reload and reload
|
||||
expandedContainer.dataset.loaded = 'false';
|
||||
loadTLDDomains(expandedContainer, searchQuery);
|
||||
}
|
||||
}
|
||||
|
||||
// Apply feed filter to currently visible feeds
|
||||
function applyFeedFilter() {
|
||||
document.querySelectorAll('.inline-feed-block').forEach(block => {
|
||||
const feedStatus = block.dataset.status || 'hold';
|
||||
const feedType = block.dataset.type || 'unknown';
|
||||
|
||||
let show = true;
|
||||
if (feedFilter.status !== 'all' && feedStatus !== feedFilter.status) {
|
||||
show = false;
|
||||
}
|
||||
if (feedFilter.type && feedType !== feedFilter.type) {
|
||||
show = false;
|
||||
}
|
||||
|
||||
block.style.display = show ? 'block' : 'none';
|
||||
});
|
||||
}
|
||||
|
||||
// Event delegation for domain-spacer clicks (toggle feeds)
|
||||
document.addEventListener('click', (e) => {
|
||||
@@ -96,8 +232,8 @@ function initDashboard() {
|
||||
['Oldest Item', f.oldestItemDate],
|
||||
['Newest Item', f.newestItemDate],
|
||||
['Discovered', f.discoveredAt],
|
||||
['Last Crawled', f.lastCrawledAt],
|
||||
['Next Crawl', f.nextCrawlAt],
|
||||
['Last Checked', f.lastCheckedAt],
|
||||
['Next Check', f.nextCheckAt],
|
||||
['Publish Status', f.publishStatus],
|
||||
['Publish Account', f.publishAccount],
|
||||
];
|
||||
@@ -122,7 +258,8 @@ function initDashboard() {
|
||||
const items = await resp.json();
|
||||
|
||||
if (!items || items.length === 0) {
|
||||
itemsDiv.innerHTML = '<span style="color: #666;">No items</span>';
|
||||
// Just clear the items area, keep the feed visible
|
||||
itemsDiv.innerHTML = '';
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -173,7 +310,6 @@ function initDashboard() {
|
||||
function renderTLDHeader(tld) {
|
||||
return `<div class="tld-section" data-tld="${escapeHtml(tld)}">
|
||||
<div class="tld-header" style="display: flex; align-items: center; padding: 10px; background: #1a1a1a; border-bottom: 1px solid #333; cursor: pointer; user-select: none;">
|
||||
<span class="tld-toggle" style="color: #666; margin-right: 10px;">▼</span>
|
||||
<span style="color: #0af; font-weight: bold; font-size: 1.1em;">.${escapeHtml(tld)}</span>
|
||||
</div>
|
||||
<div class="tld-content" style="display: block;">`;
|
||||
@@ -192,45 +328,163 @@ function initDashboard() {
|
||||
}
|
||||
}
|
||||
|
||||
// Event delegation for TLD header/footer clicks (toggle section)
|
||||
// Event delegation for TLD clicks (toggle section)
|
||||
document.addEventListener('click', (e) => {
|
||||
const tldHeader = e.target.closest('.tld-header');
|
||||
const tldFooter = e.target.closest('.tld-footer');
|
||||
const expandedContainer = document.getElementById('expandedTLDContent');
|
||||
|
||||
// Handle clicks in expanded container header
|
||||
if (tldHeader && tldHeader.closest('#expandedTLDContent')) {
|
||||
// Close the expanded content
|
||||
const currentSection = document.querySelector('.tld-section.expanded');
|
||||
if (currentSection) {
|
||||
currentSection.classList.remove('expanded');
|
||||
}
|
||||
expandedContainer.style.display = 'none';
|
||||
expandedContainer.innerHTML = '';
|
||||
currentOpenTLD = null;
|
||||
// Show TLD list again
|
||||
const domainList = document.querySelector('.domain-list');
|
||||
if (domainList) domainList.style.display = '';
|
||||
updateStats(); // Revert to search or all stats
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle clicks on TLD cards
|
||||
if (tldHeader || tldFooter) {
|
||||
const section = (tldHeader || tldFooter).closest('.tld-section');
|
||||
if (section) {
|
||||
const content = section.querySelector('.tld-content');
|
||||
const toggle = section.querySelector('.tld-toggle');
|
||||
if (content) {
|
||||
const isVisible = content.style.display !== 'none';
|
||||
content.style.display = isVisible ? 'none' : 'block';
|
||||
if (toggle) toggle.textContent = isVisible ? '▶' : '▼';
|
||||
const tld = section.dataset.tld;
|
||||
const isExpanded = section.classList.contains('expanded');
|
||||
|
||||
if (isVisible) {
|
||||
// Closing - scroll to next TLD section
|
||||
const nextSection = section.nextElementSibling;
|
||||
if (nextSection && nextSection.classList.contains('tld-section')) {
|
||||
nextSection.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
}
|
||||
} else {
|
||||
// Opening - load domains if not already loaded
|
||||
if (section.dataset.loaded === 'false') {
|
||||
loadTLDDomains(section, searchQuery);
|
||||
}
|
||||
}
|
||||
if (isExpanded) {
|
||||
// Closing this TLD
|
||||
section.classList.remove('expanded');
|
||||
expandedContainer.style.display = 'none';
|
||||
expandedContainer.innerHTML = '';
|
||||
currentOpenTLD = null;
|
||||
// Show TLD list again
|
||||
const domainList = document.querySelector('.domain-list');
|
||||
if (domainList) domainList.style.display = '';
|
||||
updateStats(); // Revert to search or all stats
|
||||
} else {
|
||||
// Close any other open TLD first
|
||||
document.querySelectorAll('.tld-section.expanded').forEach(s => {
|
||||
s.classList.remove('expanded');
|
||||
});
|
||||
|
||||
// Opening this TLD
|
||||
section.classList.add('expanded');
|
||||
currentOpenTLD = tld;
|
||||
// Hide TLD list
|
||||
const domainList = document.querySelector('.domain-list');
|
||||
if (domainList) domainList.style.display = 'none';
|
||||
// Show TLD stats (filtered by search if active)
|
||||
const currentSearch = document.getElementById('searchInput').value.trim();
|
||||
updateStatsForTLD(tld, currentSearch);
|
||||
|
||||
// Set up expanded container with header
|
||||
expandedContainer.innerHTML = `
|
||||
<div class="tld-header">
|
||||
<span class="tld-name">.${escapeHtml(tld)}</span>
|
||||
</div>
|
||||
<div class="tld-content">
|
||||
<div class="tld-loading" style="padding: 10px; color: #666;">Loading...</div>
|
||||
</div>
|
||||
`;
|
||||
expandedContainer.style.display = 'block';
|
||||
expandedContainer.dataset.tld = tld;
|
||||
expandedContainer.dataset.loaded = 'false';
|
||||
|
||||
// Load domains
|
||||
loadTLDDomains(expandedContainer, searchQuery);
|
||||
|
||||
// Scroll to expanded container
|
||||
expandedContainer.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Update stats for a specific TLD (optionally filtered by search)
|
||||
async function updateStatsForTLD(tld, search = '') {
|
||||
try {
|
||||
let url = `/api/tldStats?tld=${encodeURIComponent(tld)}`;
|
||||
if (search) {
|
||||
url += `&search=${encodeURIComponent(search)}`;
|
||||
}
|
||||
const resp = await fetch(url);
|
||||
if (!resp.ok) return;
|
||||
const stats = await resp.json();
|
||||
|
||||
document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains || 0);
|
||||
document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains || 0);
|
||||
document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains || 0);
|
||||
document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains || 0);
|
||||
document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains || 0);
|
||||
|
||||
document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds || 0);
|
||||
document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds || 0);
|
||||
document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds || 0);
|
||||
document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds || 0);
|
||||
document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds || 0);
|
||||
document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds || 0);
|
||||
document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds || 0);
|
||||
document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds || 0);
|
||||
document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds || 0);
|
||||
document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds || 0);
|
||||
document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds || 0);
|
||||
|
||||
document.getElementById('updatedAt').textContent = search ? `Search "${search}" in .${tld}` : `Stats for .${tld}`;
|
||||
} catch (err) {
|
||||
console.error('TLD stats update failed:', err);
|
||||
}
|
||||
}
|
||||
|
||||
// Update stats for search results
|
||||
async function updateStatsForSearch(query) {
|
||||
try {
|
||||
const resp = await fetch(`/api/searchStats?search=${encodeURIComponent(query)}`);
|
||||
if (!resp.ok) {
|
||||
console.error('Search stats failed:', resp.status);
|
||||
return;
|
||||
}
|
||||
const stats = await resp.json();
|
||||
|
||||
document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains || 0);
|
||||
document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains || 0);
|
||||
document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains || 0);
|
||||
document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains || 0);
|
||||
document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains || 0);
|
||||
|
||||
document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds || 0);
|
||||
document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds || 0);
|
||||
document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds || 0);
|
||||
document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds || 0);
|
||||
document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds || 0);
|
||||
document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds || 0);
|
||||
document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds || 0);
|
||||
document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds || 0);
|
||||
document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds || 0);
|
||||
document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds || 0);
|
||||
document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds || 0);
|
||||
|
||||
document.getElementById('updatedAt').textContent = `Search: "${query}"`;
|
||||
} catch (err) {
|
||||
console.error('Search stats update failed:', err);
|
||||
}
|
||||
}
|
||||
|
||||
// Render domain row with feeds
|
||||
function renderDomainRow(d) {
|
||||
const status = d.status || 'hold';
|
||||
|
||||
let html = `<div class="domain-block" data-host="${escapeHtml(d.host)}" data-status="${status}">`;
|
||||
const fullDomain = d.tld ? d.host + '.' + d.tld : d.host;
|
||||
let html = `<div class="domain-block" data-host="${escapeHtml(fullDomain)}" data-status="${status}">`;
|
||||
html += `<div class="domain-row" style="display: flex; align-items: center; padding: 8px 10px; border-bottom: 1px solid #202020;">`;
|
||||
html += renderStatusBtns(status, 'domain', d.host);
|
||||
html += `<a class="domain-name" href="https://${escapeHtml(d.host)}" target="_blank" style="color: #0af; text-decoration: none;">${escapeHtml(d.host)}</a>`;
|
||||
html += renderStatusBtns(status, 'domain', fullDomain);
|
||||
html += `<a class="domain-name" href="https://${escapeHtml(fullDomain)}" target="_blank" style="color: #0af; text-decoration: none;">${escapeHtml(fullDomain)}</a>`;
|
||||
|
||||
if (d.last_error) {
|
||||
html += `<span class="domain-spacer" style="color: #f66; margin-left: 10px; flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; cursor: pointer;" title="${escapeHtml(d.last_error)}">${escapeHtml(d.last_error)}</span>`;
|
||||
@@ -244,7 +498,8 @@ function initDashboard() {
|
||||
html += '<div class="domain-feeds" style="display: block; margin-left: 10px; border-left: 2px solid #333; padding-left: 6px;">';
|
||||
d.feeds.forEach(f => {
|
||||
const feedStatus = f.publish_status || 'hold';
|
||||
html += `<div class="inline-feed-block" data-url="${escapeHtml(f.url)}" data-status="${feedStatus}">`;
|
||||
const feedType = f.type || 'unknown';
|
||||
html += `<div class="inline-feed-block" data-url="${escapeHtml(f.url)}" data-status="${feedStatus}" data-type="${feedType}">`;
|
||||
html += `<div class="feed-row" style="display: flex; align-items: center; padding: 4px 0;">`;
|
||||
|
||||
html += `<span style="width: 48px; flex-shrink: 0; white-space: nowrap; margin-right: 6px; color: #666; text-align: center;">${escapeHtml(f.language || '')} </span>`;
|
||||
@@ -341,7 +596,7 @@ function initDashboard() {
|
||||
|
||||
async function loadFeeds(query = '') {
|
||||
const output = document.getElementById('output');
|
||||
output.innerHTML = '<div class="domain-list"></div><div id="infiniteLoader" style="text-align: center; padding: 10px; color: #666;">Loading TLDs...</div>';
|
||||
output.innerHTML = '<div class="domain-list"></div><div id="expandedTLDContent" style="display: none;"></div><div id="infiniteLoader" style="text-align: center; padding: 10px; color: #666;">Loading TLDs...</div>';
|
||||
|
||||
// Disconnect previous observer if any
|
||||
if (tldObserver) {
|
||||
@@ -349,26 +604,59 @@ function initDashboard() {
|
||||
}
|
||||
|
||||
try {
|
||||
// Fetch all TLDs first
|
||||
const tldsResp = await fetch('/api/tlds?has_feeds=true');
|
||||
// Fetch TLDs with optional domain status filter, feed filter, and search
|
||||
let tldsUrl = '/api/tlds';
|
||||
const params = [];
|
||||
if (domainFilter !== 'all') {
|
||||
params.push(`status=${domainFilter}`);
|
||||
}
|
||||
// Add feed filter params if any are selected
|
||||
if (feedFilter.allSelected || feedFilter.statuses.length > 0 || feedFilter.types.length > 0) {
|
||||
if (feedFilter.allSelected) {
|
||||
params.push('feedMode=exclude');
|
||||
} else {
|
||||
params.push('feedMode=include');
|
||||
}
|
||||
if (feedFilter.statuses.length > 0) {
|
||||
params.push(`feedStatuses=${feedFilter.statuses.join(',')}`);
|
||||
}
|
||||
if (feedFilter.types.length > 0) {
|
||||
params.push(`feedTypes=${feedFilter.types.join(',')}`);
|
||||
}
|
||||
}
|
||||
if (query) {
|
||||
params.push(`search=${encodeURIComponent(query)}`);
|
||||
}
|
||||
if (params.length > 0) {
|
||||
tldsUrl += '?' + params.join('&');
|
||||
}
|
||||
const tldsResp = await fetch(tldsUrl);
|
||||
if (!tldsResp.ok) {
|
||||
const errText = await tldsResp.text();
|
||||
throw new Error(`HTTP ${tldsResp.status}: ${errText}`);
|
||||
}
|
||||
const tlds = await tldsResp.json();
|
||||
|
||||
if (!tlds || tlds.length === 0) {
|
||||
document.getElementById('infiniteLoader').textContent = 'No feeds found';
|
||||
// Update stats for empty results
|
||||
if (query) {
|
||||
await updateStatsForSearch(query);
|
||||
} else {
|
||||
await updateStats();
|
||||
}
|
||||
document.getElementById('infiniteLoader').textContent = query ? 'No matches found' : 'No feeds found';
|
||||
return;
|
||||
}
|
||||
|
||||
const container = output.querySelector('.domain-list');
|
||||
|
||||
// Render all TLD sections as collapsed placeholders
|
||||
// Render all TLD sections as card placeholders
|
||||
tlds.forEach(t => {
|
||||
const tld = t.tld || 'unknown';
|
||||
container.insertAdjacentHTML('beforeend', `
|
||||
<div class="tld-section" data-tld="${escapeHtml(tld)}" data-loaded="false">
|
||||
<div class="tld-header" style="display: flex; align-items: center; padding: 10px; background: #1a1a1a; border-bottom: 1px solid #333; cursor: pointer; user-select: none;">
|
||||
<span class="tld-toggle" style="color: #666; margin-right: 10px;">▶</span>
|
||||
<span style="color: #0af; font-weight: bold; font-size: 1.1em;">.${escapeHtml(tld)}</span>
|
||||
<span style="color: #666; margin-left: 10px; font-size: 0.9em;">(${t.domain_count} domains)</span>
|
||||
<div class="tld-header">
|
||||
<span class="tld-name">.${escapeHtml(tld)}</span>
|
||||
</div>
|
||||
<div class="tld-content" style="display: none;">
|
||||
<div class="tld-loading" style="padding: 10px; color: #666;">Loading...</div>
|
||||
@@ -377,25 +665,49 @@ function initDashboard() {
|
||||
`);
|
||||
});
|
||||
|
||||
document.getElementById('infiniteLoader').textContent = `${tlds.length} TLDs loaded`;
|
||||
document.getElementById('infiniteLoader').textContent = '';
|
||||
|
||||
// Set up IntersectionObserver for lazy loading (loads even when collapsed)
|
||||
tldObserver = new IntersectionObserver((entries) => {
|
||||
entries.forEach(entry => {
|
||||
if (entry.isIntersecting) {
|
||||
const section = entry.target;
|
||||
if (section.dataset.loaded === 'false') {
|
||||
loadTLDDomains(section, query);
|
||||
tldObserver.unobserve(section);
|
||||
}
|
||||
}
|
||||
});
|
||||
}, { rootMargin: '500px' });
|
||||
// Auto-expand if single TLD match, otherwise update stats for search/all
|
||||
if (tlds.length === 1) {
|
||||
const tld = tlds[0].tld;
|
||||
const expandedContainer = document.getElementById('expandedTLDContent');
|
||||
const section = output.querySelector('.tld-section');
|
||||
|
||||
// Observe all TLD sections
|
||||
container.querySelectorAll('.tld-section').forEach(section => {
|
||||
tldObserver.observe(section);
|
||||
});
|
||||
if (section && expandedContainer) {
|
||||
// Mark as expanded
|
||||
section.classList.add('expanded');
|
||||
currentOpenTLD = tld;
|
||||
// Hide TLD list
|
||||
const domainList = document.querySelector('.domain-list');
|
||||
if (domainList) domainList.style.display = 'none';
|
||||
|
||||
// Set up expanded container
|
||||
expandedContainer.innerHTML = `
|
||||
<div class="tld-header">
|
||||
<span class="tld-name">.${escapeHtml(tld)}</span>
|
||||
</div>
|
||||
<div class="tld-content">
|
||||
<div class="tld-loading" style="padding: 10px; color: #666;">Loading...</div>
|
||||
</div>
|
||||
`;
|
||||
expandedContainer.style.display = 'block';
|
||||
expandedContainer.dataset.tld = tld;
|
||||
expandedContainer.dataset.loaded = 'false';
|
||||
|
||||
// Load domains
|
||||
loadTLDDomains(expandedContainer, query);
|
||||
|
||||
// Show TLD stats (filtered by search if active)
|
||||
await updateStatsForTLD(tld, query);
|
||||
}
|
||||
} else {
|
||||
// Multiple TLDs - show search or global stats
|
||||
if (query) {
|
||||
await updateStatsForSearch(query);
|
||||
} else {
|
||||
await updateStats();
|
||||
}
|
||||
}
|
||||
|
||||
} catch (err) {
|
||||
document.getElementById('infiniteLoader').textContent = 'Error: ' + err.message;
|
||||
@@ -408,12 +720,30 @@ function initDashboard() {
|
||||
section.dataset.loaded = 'loading';
|
||||
|
||||
try {
|
||||
let url = `/api/domains?has_feeds=true&tld=${encodeURIComponent(tld)}&limit=500`;
|
||||
let url = `/api/domains?tld=${encodeURIComponent(tld)}&limit=500`;
|
||||
if (domainFilter !== 'all') {
|
||||
url += `&status=${domainFilter}`;
|
||||
}
|
||||
if (query) {
|
||||
url += `&search=${encodeURIComponent(query)}`;
|
||||
}
|
||||
// Apply feed filter if any feed cards are selected
|
||||
if (feedFilter.allSelected || feedFilter.statuses.length > 0 || feedFilter.types.length > 0) {
|
||||
if (feedFilter.allSelected) {
|
||||
url += '&feedMode=exclude';
|
||||
} else {
|
||||
url += '&feedMode=include';
|
||||
}
|
||||
if (feedFilter.statuses.length > 0) {
|
||||
url += `&feedStatuses=${feedFilter.statuses.join(',')}`;
|
||||
}
|
||||
if (feedFilter.types.length > 0) {
|
||||
url += `&feedTypes=${feedFilter.types.join(',')}`;
|
||||
}
|
||||
}
|
||||
|
||||
const resp = await fetch(url);
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||
const domains = await resp.json();
|
||||
|
||||
const content = section.querySelector('.tld-content');
|
||||
@@ -449,34 +779,90 @@ function initDashboard() {
|
||||
|
||||
// Search handler
|
||||
const searchInput = document.getElementById('searchInput');
|
||||
let searchTimeout;
|
||||
searchInput.addEventListener('input', () => {
|
||||
clearTimeout(searchTimeout);
|
||||
searchTimeout = setTimeout(() => {
|
||||
searchQuery = searchInput.value.trim();
|
||||
loadFeeds(searchQuery);
|
||||
}, 300);
|
||||
function doSearch() {
|
||||
searchQuery = searchInput.value.trim();
|
||||
loadFeeds(searchQuery);
|
||||
}
|
||||
|
||||
// Search on button click
|
||||
document.getElementById('searchBtn').addEventListener('click', doSearch);
|
||||
|
||||
// Clear button - clears search and resets all filters
|
||||
document.getElementById('clearBtn').addEventListener('click', () => {
|
||||
searchInput.value = '';
|
||||
searchQuery = '';
|
||||
// Reset filters to default
|
||||
domainFilter = 'all';
|
||||
feedFilter = { allSelected: false, statuses: [], types: [] };
|
||||
// Reset active card styling
|
||||
document.querySelectorAll('.card.clickable.active').forEach(c => c.classList.remove('active'));
|
||||
document.querySelector('.card.clickable[data-filter="domain"][data-status="all"]')?.classList.add('active');
|
||||
searchInput.placeholder = 'Search domains...';
|
||||
// Close any expanded TLD
|
||||
currentOpenTLD = null;
|
||||
const expandedContainer = document.getElementById('expandedTLDContent');
|
||||
if (expandedContainer) {
|
||||
expandedContainer.style.display = 'none';
|
||||
expandedContainer.innerHTML = '';
|
||||
}
|
||||
// Show TLD list if hidden
|
||||
const domainList = document.querySelector('.domain-list');
|
||||
if (domainList) domainList.style.display = '';
|
||||
// Reload and update stats
|
||||
loadFeeds();
|
||||
});
|
||||
|
||||
// Initial load
|
||||
// Search on Enter key
|
||||
searchInput.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Enter') {
|
||||
e.preventDefault();
|
||||
doSearch();
|
||||
}
|
||||
});
|
||||
|
||||
// Initial load - set default active cards and load
|
||||
document.querySelector('.card.clickable[data-filter="domain"][data-status="all"]')?.classList.add('active');
|
||||
loadFeeds();
|
||||
|
||||
// Update stats periodically
|
||||
async function updateStats() {
|
||||
// Check actual input value for current search state
|
||||
const currentSearch = document.getElementById('searchInput')?.value.trim() || '';
|
||||
|
||||
// Priority: open TLD > search query > all
|
||||
if (currentOpenTLD) {
|
||||
updateStatsForTLD(currentOpenTLD, currentSearch);
|
||||
return;
|
||||
}
|
||||
if (currentSearch) {
|
||||
updateStatsForSearch(currentSearch);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch('/api/stats');
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||
const stats = await resp.json();
|
||||
document.getElementById('totalDomains').textContent = commaFormat(stats.total_domains);
|
||||
document.getElementById('holdDomains').textContent = commaFormat(stats.hold_domains);
|
||||
document.getElementById('passDomains').textContent = commaFormat(stats.pass_domains);
|
||||
document.getElementById('skipDomains').textContent = commaFormat(stats.skip_domains);
|
||||
document.getElementById('crawlRate').textContent = commaFormat(stats.crawl_rate);
|
||||
document.getElementById('checkRate').textContent = commaFormat(stats.check_rate);
|
||||
document.getElementById('deadDomains').textContent = commaFormat(stats.dead_domains);
|
||||
document.getElementById('domainCheckRate').textContent = commaFormat(stats.domain_check_rate);
|
||||
document.getElementById('feedCrawlRate').textContent = commaFormat(stats.feed_crawl_rate);
|
||||
document.getElementById('feedCheckRate').textContent = commaFormat(stats.feed_check_rate);
|
||||
document.getElementById('totalFeeds').textContent = commaFormat(stats.total_feeds);
|
||||
document.getElementById('aliveFeeds').textContent = commaFormat(stats.alive_feeds);
|
||||
document.getElementById('publishFeeds').textContent = commaFormat(stats.publish_feeds);
|
||||
document.getElementById('skipFeeds').textContent = commaFormat(stats.skip_feeds);
|
||||
document.getElementById('holdFeeds').textContent = commaFormat(stats.hold_feeds);
|
||||
document.getElementById('deadFeeds').textContent = commaFormat(stats.dead_feeds);
|
||||
document.getElementById('emptyFeeds').textContent = commaFormat(stats.empty_feeds);
|
||||
document.getElementById('rssFeeds').textContent = commaFormat(stats.rss_feeds);
|
||||
document.getElementById('atomFeeds').textContent = commaFormat(stats.atom_feeds);
|
||||
document.getElementById('jsonFeeds').textContent = commaFormat(stats.json_feeds);
|
||||
document.getElementById('unknownFeeds').textContent = commaFormat(stats.unknown_feeds);
|
||||
document.getElementById('updatedAt').textContent = 'Last updated: ' + new Date().toLocaleString();
|
||||
document.getElementById('updatedAt').textContent = 'All TLDs - ' + new Date().toLocaleTimeString();
|
||||
} catch (err) {
|
||||
console.error('Stats update failed:', err);
|
||||
}
|
||||
|
||||
+2
-2
@@ -445,8 +445,8 @@ const dashboardHTML = `<!DOCTYPE html>
|
||||
<title>1440.news Feed Crawler</title>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="/static/dashboard.css?v=222">
|
||||
<script src="/static/dashboard.js?v=222"></script>
|
||||
<link rel="stylesheet" href="/static/dashboard.css?v=1769990750">
|
||||
<script src="/static/dashboard.js?v=1769990750"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="topSection">
|
||||
|
||||
Reference in New Issue
Block a user