266 lines
6.6 KiB
Go
266 lines
6.6 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"time"
|
|
)
|
|
|
|
// DashboardStats holds all statistics for the dashboard
|
|
type DashboardStats struct {
|
|
// Domain stats
|
|
TotalDomains int `json:"total_domains"`
|
|
HoldDomains int `json:"hold_domains"`
|
|
PassDomains int `json:"pass_domains"`
|
|
SkipDomains int `json:"skip_domains"`
|
|
DeadDomains int `json:"dead_domains"`
|
|
|
|
// Feed stats
|
|
TotalFeeds int `json:"total_feeds"`
|
|
AliveFeeds int `json:"alive_feeds"` // status='pass' (healthy feeds)
|
|
PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing)
|
|
SkipFeeds int `json:"skip_feeds"`
|
|
HoldFeeds int `json:"hold_feeds"`
|
|
DeadFeeds int `json:"dead_feeds"`
|
|
EmptyFeeds int `json:"empty_feeds"`
|
|
RSSFeeds int `json:"rss_feeds"`
|
|
AtomFeeds int `json:"atom_feeds"`
|
|
JSONFeeds int `json:"json_feeds"`
|
|
UnknownFeeds int `json:"unknown_feeds"`
|
|
|
|
// Processing rates (per minute)
|
|
DomainsCrawled int32 `json:"domains_crawled"` // feed_crawl count
|
|
DomainCheckRate int `json:"domain_check_rate"` // domain_check per minute
|
|
FeedCrawlRate int `json:"feed_crawl_rate"` // feed_crawl per minute
|
|
FeedCheckRate int `json:"feed_check_rate"` // feed_check per minute
|
|
|
|
// Timing
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
type TLDStat struct {
|
|
TLD string `json:"tld"`
|
|
Count int `json:"count"`
|
|
}
|
|
|
|
type RecentFeed struct {
|
|
URL string `json:"url"`
|
|
Title string `json:"title"`
|
|
Type string `json:"type"`
|
|
DiscoveredAt time.Time `json:"discovered_at"`
|
|
}
|
|
|
|
type DomainStat struct {
|
|
Host string `json:"host"`
|
|
FeedsFound int `json:"feeds_found"`
|
|
}
|
|
|
|
// commaFormat formats an integer with comma separators
|
|
func commaFormat(n int) string {
|
|
s := fmt.Sprintf("%d", n)
|
|
if len(s) <= 3 {
|
|
return s
|
|
}
|
|
var result []byte
|
|
for i, c := range s {
|
|
if i > 0 && (len(s)-i)%3 == 0 {
|
|
result = append(result, ',')
|
|
}
|
|
result = append(result, byte(c))
|
|
}
|
|
return string(result)
|
|
}
|
|
|
|
// UpdateStats recalculates and caches dashboard statistics
|
|
func (c *Crawler) UpdateStats() {
|
|
fmt.Println("UpdateStats: calculating stats...")
|
|
stats, err := c.calculateStats()
|
|
if err != nil {
|
|
fmt.Printf("UpdateStats: error calculating stats: %v\n", err)
|
|
return
|
|
}
|
|
// Cache all domains with feeds (runs in background, so slow query is OK)
|
|
fmt.Println("UpdateStats: fetching all domains...")
|
|
allDomains := c.fetchAllDomainsFromDB()
|
|
fmt.Printf("UpdateStats: got %d domains\n", len(allDomains))
|
|
|
|
c.statsMu.Lock()
|
|
c.cachedStats = stats
|
|
c.cachedAllDomains = allDomains
|
|
c.statsMu.Unlock()
|
|
fmt.Println("UpdateStats: complete")
|
|
}
|
|
|
|
func (c *Crawler) fetchAllDomainsFromDB() []DomainStat {
|
|
rows, err := c.db.Query(`
|
|
SELECT tld, source_host, COUNT(*) as cnt FROM feeds
|
|
GROUP BY tld, source_host
|
|
ORDER BY tld, source_host
|
|
`)
|
|
if err != nil {
|
|
fmt.Printf("fetchAllDomainsFromDB error: %v\n", err)
|
|
return nil
|
|
}
|
|
defer rows.Close()
|
|
|
|
var domains []DomainStat
|
|
for rows.Next() {
|
|
var ds DomainStat
|
|
var tld string
|
|
if err := rows.Scan(&tld, &ds.Host, &ds.FeedsFound); err != nil {
|
|
continue
|
|
}
|
|
domains = append(domains, ds)
|
|
}
|
|
return domains
|
|
}
|
|
|
|
// GetDashboardStats returns cached statistics (returns empty stats if not yet cached)
|
|
func (c *Crawler) GetDashboardStats() (*DashboardStats, error) {
|
|
c.statsMu.RLock()
|
|
stats := c.cachedStats
|
|
c.statsMu.RUnlock()
|
|
|
|
if stats != nil {
|
|
return stats, nil
|
|
}
|
|
// Return empty stats while background calculation runs (don't block HTTP requests)
|
|
return &DashboardStats{UpdatedAt: time.Now()}, nil
|
|
}
|
|
|
|
// calculateStats collects all statistics for the dashboard
|
|
func (c *Crawler) calculateStats() (*DashboardStats, error) {
|
|
stats := &DashboardStats{
|
|
UpdatedAt: time.Now(),
|
|
DomainsCrawled: c.domainsCrawled,
|
|
}
|
|
|
|
// Calculate rates (per minute)
|
|
elapsed := time.Since(c.startTime).Minutes()
|
|
if elapsed > 0 {
|
|
stats.DomainCheckRate = int(float64(c.domainsChecked) / elapsed)
|
|
stats.FeedCrawlRate = int(float64(c.domainsCrawled) / elapsed)
|
|
stats.FeedCheckRate = int(float64(c.feedsChecked) / elapsed)
|
|
}
|
|
|
|
// Get domain stats
|
|
if err := c.collectDomainStats(stats); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get feed stats
|
|
if err := c.collectFeedStats(stats); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return stats, nil
|
|
}
|
|
|
|
func (c *Crawler) collectDomainStats(stats *DashboardStats) error {
|
|
// Use COUNT(*) for total count
|
|
err := c.db.QueryRow("SELECT COUNT(*) FROM domains").Scan(&stats.TotalDomains)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Single query to get all status counts (one index scan instead of three)
|
|
rows, err := c.db.Query("SELECT status, COUNT(*) FROM domains GROUP BY status")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
var status string
|
|
var count int
|
|
if err := rows.Scan(&status, &count); err != nil {
|
|
continue
|
|
}
|
|
switch status {
|
|
case "hold":
|
|
stats.HoldDomains = count
|
|
case "pass":
|
|
stats.PassDomains = count
|
|
case "skip":
|
|
stats.SkipDomains = count
|
|
case "dead":
|
|
stats.DeadDomains = count
|
|
}
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return rows.Err()
|
|
}
|
|
|
|
func (c *Crawler) collectFeedStats(stats *DashboardStats) error {
|
|
// Use COUNT(*) for total count
|
|
err := c.db.QueryRow("SELECT COUNT(*) FROM feeds").Scan(&stats.TotalFeeds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Get status counts
|
|
statusRows, err := c.db.Query("SELECT status, COUNT(*) FROM feeds GROUP BY status")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer statusRows.Close()
|
|
|
|
for statusRows.Next() {
|
|
var status *string
|
|
var count int
|
|
if err := statusRows.Scan(&status, &count); err != nil {
|
|
continue
|
|
}
|
|
if status != nil {
|
|
switch *status {
|
|
case "pass":
|
|
stats.AliveFeeds = count
|
|
case "skip":
|
|
stats.SkipFeeds = count
|
|
case "hold":
|
|
stats.HoldFeeds = count
|
|
case "dead":
|
|
stats.DeadFeeds = count
|
|
}
|
|
}
|
|
}
|
|
|
|
// Count feeds approved for publishing (publish_status='pass')
|
|
c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE publish_status = 'pass'").Scan(&stats.PublishFeeds)
|
|
|
|
// Count empty feeds (item_count = 0 or NULL)
|
|
c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE item_count IS NULL OR item_count = 0").Scan(&stats.EmptyFeeds)
|
|
|
|
// Single query to get all type counts (one index scan instead of three)
|
|
rows, err := c.db.Query("SELECT type, COUNT(*) FROM feeds GROUP BY type")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
var feedType *string
|
|
var count int
|
|
if err := rows.Scan(&feedType, &count); err != nil {
|
|
continue
|
|
}
|
|
if feedType == nil {
|
|
stats.UnknownFeeds += count
|
|
} else {
|
|
switch *feedType {
|
|
case "rss":
|
|
stats.RSSFeeds = count
|
|
case "atom":
|
|
stats.AtomFeeds = count
|
|
case "json":
|
|
stats.JSONFeeds = count
|
|
default:
|
|
stats.UnknownFeeds += count
|
|
}
|
|
}
|
|
}
|
|
return rows.Err()
|
|
}
|