- StartDomainCheckLoop: DNS verification for unchecked domains (1000 workers) - StartFeedCrawlLoop: Feed discovery on DNS-verified domains (100 workers) This fixes starvation where 104M unchecked domains blocked 1.2M DNS-verified domains from ever being crawled for feeds. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
64 lines
1.7 KiB
Go
64 lines
1.7 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
)
|
|
|
|
func main() {
|
|
// Connection string from environment (DATABASE_URL or DB_* vars)
|
|
crawler, err := NewCrawler("")
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error initializing crawler: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Setup graceful shutdown
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
|
|
// Start all loops independently
|
|
fmt.Println("Starting import and processing loops...")
|
|
|
|
// Import loop (background) - imports .com domains from vertices.txt.gz
|
|
go crawler.ImportDomainsInBackground("vertices.txt.gz")
|
|
|
|
// Add test domains (in addition to imported domains)
|
|
go crawler.ImportTestDomains([]string{
|
|
"news.ycombinator.com",
|
|
"ycombinator.com",
|
|
})
|
|
|
|
// feed_check loop (background) - checks feeds for new items
|
|
go crawler.StartFeedCheckLoop()
|
|
|
|
// Cleanup loop (background) - removes old items once per week
|
|
go crawler.StartCleanupLoop()
|
|
|
|
// Maintenance loop (background) - WAL checkpoints and integrity checks
|
|
go crawler.StartMaintenanceLoop()
|
|
|
|
// TLD sync loop (background) - syncs with IANA, marks dead TLDs, adds new ones
|
|
go crawler.startTLDSyncLoop()
|
|
|
|
// Domain check loop (background) - DNS verification
|
|
go crawler.StartDomainCheckLoop()
|
|
|
|
// Feed crawl loop (background) - feed discovery on DNS-verified domains
|
|
go crawler.StartFeedCrawlLoop()
|
|
|
|
// Wait for shutdown signal
|
|
sig := <-sigChan
|
|
fmt.Printf("\nReceived %v, shutting down gracefully...\n", sig)
|
|
|
|
// Close crawler (checkpoints WAL and closes database)
|
|
if err := crawler.Close(); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error closing crawler: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
fmt.Println("Shutdown complete")
|
|
}
|