From edf54ca212176dfb168c90b4335b7aefa57d4248 Mon Sep 17 00:00:00 2001 From: primal Date: Sun, 1 Feb 2026 19:23:57 -0500 Subject: [PATCH] Add graceful shutdown for goroutines - Add shutdownCh channel to signal goroutines to stop - Check IsShuttingDown() in all main loops - Wait 2 seconds for goroutines to finish before closing DB Co-Authored-By: Claude Opus 4.5 --- crawler.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/crawler.go b/crawler.go index f452a21..4d0fd4d 100644 --- a/crawler.go +++ b/crawler.go @@ -33,6 +33,7 @@ type Crawler struct { cachedStats *DashboardStats cachedAllDomains []DomainStat statsMu sync.RWMutex + shutdownCh chan struct{} // closed on shutdown to signal goroutines } func NewCrawler(connString string) (*Crawler, error) { @@ -48,6 +49,7 @@ func NewCrawler(connString string) (*Crawler, error) { UserAgent: "FeedCrawler/1.0", startTime: time.Now(), db: db, + shutdownCh: make(chan struct{}), client: &http.Client{ Timeout: 10 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { @@ -60,7 +62,24 @@ func NewCrawler(connString string) (*Crawler, error) { }, nil } +// IsShuttingDown returns true if shutdown has been initiated +func (c *Crawler) IsShuttingDown() bool { + select { + case <-c.shutdownCh: + return true + default: + return false + } +} + func (c *Crawler) Close() error { + // Signal all goroutines to stop + close(c.shutdownCh) + + // Give goroutines time to finish current operations + fmt.Println("Waiting for goroutines to finish...") + time.Sleep(2 * time.Second) + if c.db != nil { fmt.Println("Closing database...") return c.db.Close() @@ -71,6 +90,9 @@ func (c *Crawler) Close() error { // StartStatsLoop updates cached stats every 10 seconds func (c *Crawler) StartStatsLoop() { for { + if c.IsShuttingDown() { + return + } c.UpdateStats() time.Sleep(10 * time.Second) } @@ -79,6 +101,9 @@ func (c *Crawler) StartStatsLoop() { // StartCleanupLoop runs item cleanup once per week func (c *Crawler) StartCleanupLoop() { for { + if c.IsShuttingDown() { + return + } deleted, err := c.CleanupOldItems() if err != nil { fmt.Printf("Cleanup error: %v\n", err) @@ -161,6 +186,10 @@ func (c *Crawler) StartPublishLoop() { c.RefreshAllProfiles(publisher, feedPassword) for { + if c.IsShuttingDown() { + return + } + // Get up to 50 unpublished items from approved feeds, sorted by discovered_at ASC items, err := c.GetAllUnpublishedItems(50) if err != nil { @@ -176,6 +205,9 @@ func (c *Crawler) StartPublishLoop() { // Publish one item per second for _, item := range items { + if c.IsShuttingDown() { + return + } // Get or create session for this feed's account account := c.getAccountForFeed(item.FeedURL) if account == "" { @@ -532,6 +564,11 @@ func (c *Crawler) StartDomainLoop() { const fetchSize = 1000 for { + if c.IsShuttingDown() { + close(workChan) + return + } + domains, err := c.GetDomainsToProcess(fetchSize) if err != nil { fmt.Printf("Error fetching domains to process: %v\n", err) @@ -581,6 +618,11 @@ func (c *Crawler) StartFeedCheckLoop() { const fetchSize = 100 for { + if c.IsShuttingDown() { + close(workChan) + return + } + feeds, err := c.GetFeedsDueForCheck(fetchSize) if err != nil { fmt.Printf("Error fetching feeds: %v\n", err)