package main import ( "fmt" "time" ) // DashboardStats holds all statistics for the dashboard type DashboardStats struct { // Domain stats TotalDomains int `json:"total_domains"` HoldDomains int `json:"hold_domains"` PassDomains int `json:"pass_domains"` SkipDomains int `json:"skip_domains"` DeadDomains int `json:"dead_domains"` // Feed stats TotalFeeds int `json:"total_feeds"` AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds) PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing) SkipFeeds int `json:"skip_feeds"` HoldFeeds int `json:"hold_feeds"` DeadFeeds int `json:"dead_feeds"` EmptyFeeds int `json:"empty_feeds"` RSSFeeds int `json:"rss_feeds"` AtomFeeds int `json:"atom_feeds"` JSONFeeds int `json:"json_feeds"` UnknownFeeds int `json:"unknown_feeds"` // Processing rates (per minute) DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute // Timing UpdatedAt time.Time `json:"updated_at"` } type TLDStat struct { TLD string `json:"tld"` Count int `json:"count"` } type RecentFeed struct { URL string `json:"url"` Title string `json:"title"` Type string `json:"type"` DiscoveredAt time.Time `json:"discovered_at"` } type DomainStat struct { Host string `json:"host"` FeedsFound int `json:"feeds_found"` } // commaFormat formats an integer with comma separators func commaFormat(n int) string { s := fmt.Sprintf("%d", n) if len(s) <= 3 { return s } var result []byte for i, c := range s { if i > 0 && (len(s)-i)%3 == 0 { result = append(result, ',') } result = append(result, byte(c)) } return string(result) } // UpdateStats recalculates and caches dashboard statistics func (c *Crawler) UpdateStats() { fmt.Println("UpdateStats: calculating stats...") stats, err := c.calculateStats() if err != nil { fmt.Printf("UpdateStats: error calculating stats: %v\n", err) return } // Cache all domains with feeds (runs in background, so slow query is OK) fmt.Println("UpdateStats: fetching all domains...") allDomains := c.fetchAllDomainsFromDB() fmt.Printf("UpdateStats: got %d domains\n", len(allDomains)) c.statsMu.Lock() c.cachedStats = stats c.cachedAllDomains = allDomains c.statsMu.Unlock() fmt.Println("UpdateStats: complete") } func (c *Crawler) fetchAllDomainsFromDB() []DomainStat { rows, err := c.db.Query(` SELECT tld, source_host, COUNT(*) as cnt FROM feeds GROUP BY tld, source_host ORDER BY tld, source_host `) if err != nil { fmt.Printf("fetchAllDomainsFromDB error: %v\n", err) return nil } defer rows.Close() var domains []DomainStat for rows.Next() { var ds DomainStat var tld string if err := rows.Scan(&tld, &ds.Host, &ds.FeedsFound); err != nil { continue } domains = append(domains, ds) } return domains } // GetDashboardStats returns cached statistics (returns empty stats if not yet cached) func (c *Crawler) GetDashboardStats() (*DashboardStats, error) { c.statsMu.RLock() stats := c.cachedStats c.statsMu.RUnlock() if stats != nil { return stats, nil } // Return empty stats while background calculation runs (don't block HTTP requests) return &DashboardStats{UpdatedAt: time.Now()}, nil } // calculateStats collects all statistics for the dashboard func (c *Crawler) calculateStats() (*DashboardStats, error) { stats := &DashboardStats{ UpdatedAt: time.Now(), DomainsCrawled: c.domainsCrawled, } // Calculate rates (per minute) elapsed := time.Since(c.startTime).Minutes() if elapsed > 0 { stats.DomainCheckRate = int(float64(c.domainsChecked) / elapsed) stats.FeedCrawlRate = int(float64(c.domainsCrawled) / elapsed) stats.FeedCheckRate = int(float64(c.feedsChecked) / elapsed) } // Get domain stats if err := c.collectDomainStats(stats); err != nil { return nil, err } // Get feed stats if err := c.collectFeedStats(stats); err != nil { return nil, err } return stats, nil } func (c *Crawler) collectDomainStats(stats *DashboardStats) error { // Use COUNT(*) for total count err := c.db.QueryRow("SELECT COUNT(*) FROM domains").Scan(&stats.TotalDomains) if err != nil { return err } // Single query to get all status counts (one index scan instead of three) rows, err := c.db.Query("SELECT status, COUNT(*) FROM domains GROUP BY status") if err != nil { return err } defer rows.Close() for rows.Next() { var status string var count int if err := rows.Scan(&status, &count); err != nil { continue } switch status { case "hold": stats.HoldDomains = count case "pass": stats.PassDomains = count case "skip": stats.SkipDomains = count case "dead": stats.DeadDomains = count } } if err := rows.Err(); err != nil { return err } return rows.Err() } func (c *Crawler) collectFeedStats(stats *DashboardStats) error { // Use COUNT(*) for total count err := c.db.QueryRow("SELECT COUNT(*) FROM feeds").Scan(&stats.TotalFeeds) if err != nil { return err } // Get status counts statusRows, err := c.db.Query("SELECT status, COUNT(*) FROM feeds GROUP BY status") if err != nil { return err } defer statusRows.Close() for statusRows.Next() { var status *string var count int if err := statusRows.Scan(&status, &count); err != nil { continue } if status != nil { switch *status { case "pass": stats.AliveFeeds = count case "skip": stats.SkipFeeds = count case "hold": stats.HoldFeeds = count case "dead": stats.DeadFeeds = count } } } // Count feeds approved for publishing (publish_status='pass') c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE publish_status = 'pass'").Scan(&stats.PublishFeeds) // Count empty feeds (item_count = 0 or NULL) c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE item_count IS NULL OR item_count = 0").Scan(&stats.EmptyFeeds) // Single query to get all type counts (one index scan instead of three) rows, err := c.db.Query("SELECT type, COUNT(*) FROM feeds GROUP BY type") if err != nil { return err } defer rows.Close() for rows.Next() { var feedType *string var count int if err := rows.Scan(&feedType, &count); err != nil { continue } if feedType == nil { stats.UnknownFeeds += count } else { switch *feedType { case "rss": stats.RSSFeeds = count case "atom": stats.AtomFeeds = count case "json": stats.JSONFeeds = count default: stats.UnknownFeeds += count } } } return rows.Err() }