Files
crawler/main.go
primal 3999e96f26 Dashboard UI overhaul: inline feed details, TLD filtering, status improvements
- Feed details now expand inline instead of navigating to new page
- Add TLD section headers with domains sorted by TLD then name
- Add TLD filter button to show/hide domain sections by TLD
- Feed status behavior: pass creates account, hold crawls only, skip stops, drop cleans up
- Auto-follow new accounts from directory account (1440.news)
- Fix handle derivation (removed duplicate .1440.news suffix)
- Increase domain import batch size to 100k
- Various bug fixes for account creation and profile updates

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 20:51:05 -05:00

77 lines
2.0 KiB
Go

package main
import (
"fmt"
"os"
"os/signal"
"syscall"
)
func main() {
// Connection string from environment (DATABASE_URL or DB_* vars)
crawler, err := NewCrawler("")
if err != nil {
fmt.Fprintf(os.Stderr, "Error initializing crawler: %v\n", err)
os.Exit(1)
}
// Setup graceful shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
// Start dashboard in background
go func() {
if err := crawler.StartDashboard("0.0.0.0:4321"); err != nil {
fmt.Fprintf(os.Stderr, "Dashboard error: %v\n", err)
}
}()
// Initialize stats in background (can be slow with large DBs)
go crawler.UpdateStats()
// Start all loops independently
fmt.Println("Starting import, crawl, check, and stats loops...")
// Import loop (background) - imports .com domains from vertices.txt.gz
go crawler.ImportDomainsInBackground("vertices.txt.gz")
// Add test domains (in addition to imported domains)
go crawler.ImportTestDomains([]string{
"news.ycombinator.com",
"ycombinator.com",
})
// Check loop (background)
go crawler.StartCheckLoop()
// Stats loop (background) - updates once per minute
go crawler.StartStatsLoop()
// Cleanup loop (background) - removes old items once per week
go crawler.StartCleanupLoop()
// Maintenance loop (background) - WAL checkpoints and integrity checks
go crawler.StartMaintenanceLoop()
// Publish loop (background) - autopublishes items for approved feeds
go crawler.StartPublishLoop()
// Domain check loop (background) - verifies approved domains are reachable
go crawler.StartDomainCheckLoop()
// Crawl loop (background) - crawls checked domains for feeds
go crawler.StartCrawlLoop()
// Wait for shutdown signal
sig := <-sigChan
fmt.Printf("\nReceived %v, shutting down gracefully...\n", sig)
// Close crawler (checkpoints WAL and closes database)
if err := crawler.Close(); err != nil {
fmt.Fprintf(os.Stderr, "Error closing crawler: %v\n", err)
os.Exit(1)
}
fmt.Println("Shutdown complete")
}