Phase 5: Remove dashboard code from crawler
Removed dashboard-related files (now in standalone dashboard/ service): - api_domains.go, api_feeds.go, api_publish.go, api_search.go - dashboard.go, templates.go - oauth.go, oauth_handlers.go, oauth_middleware.go, oauth_session.go - routes.go - static/dashboard.css, static/dashboard.js Updated crawler.go: - Removed cachedStats, cachedAllDomains, statsMu fields - Removed StartStatsLoop function Updated main.go: - Removed dashboard startup - Removed stats loop and UpdateStats calls The crawler now runs independently without dashboard. Use the standalone dashboard/ service for web interface.
This commit is contained in:
+10
-24
@@ -18,23 +18,20 @@ import (
|
||||
)
|
||||
|
||||
type Crawler struct {
|
||||
MaxDepth int
|
||||
MaxPagesPerHost int
|
||||
Timeout time.Duration
|
||||
UserAgent string
|
||||
visited sync.Map
|
||||
feedsMu sync.Mutex
|
||||
client *http.Client
|
||||
MaxDepth int
|
||||
MaxPagesPerHost int
|
||||
Timeout time.Duration
|
||||
UserAgent string
|
||||
visited sync.Map
|
||||
feedsMu sync.Mutex
|
||||
client *http.Client
|
||||
domainsCrawled int32 // feed_crawl: domains crawled for feed discovery
|
||||
domainsChecked int32 // domain_check: domains checked for liveness
|
||||
feedsChecked int32 // feed_check: feeds checked for new items
|
||||
startTime time.Time
|
||||
db *DB
|
||||
domainsImported int32
|
||||
cachedStats *DashboardStats
|
||||
cachedAllDomains []DomainStat
|
||||
statsMu sync.RWMutex
|
||||
shutdownCh chan struct{} // closed on shutdown to signal goroutines
|
||||
shutdownCh chan struct{} // closed on shutdown to signal goroutines
|
||||
}
|
||||
|
||||
func NewCrawler(connString string) (*Crawler, error) {
|
||||
@@ -107,17 +104,6 @@ func (c *Crawler) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartStatsLoop updates cached stats every 10 seconds
|
||||
func (c *Crawler) StartStatsLoop() {
|
||||
for {
|
||||
if c.IsShuttingDown() {
|
||||
return
|
||||
}
|
||||
c.UpdateStats()
|
||||
time.Sleep(10 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// StartCleanupLoop runs item cleanup once per week
|
||||
func (c *Crawler) StartCleanupLoop() {
|
||||
for {
|
||||
@@ -367,7 +353,7 @@ type FeedInfo struct {
|
||||
func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo {
|
||||
var title, description, siteURL, sourceHost *string
|
||||
err := c.db.QueryRow(`
|
||||
SELECT title, description, site_url, domain_host || '.' || domain_tld as source_host FROM feeds WHERE url = $1
|
||||
SELECT title, description, site_url, domain_host as source_host FROM feeds WHERE url = $1
|
||||
`, feedURL).Scan(&title, &description, &siteURL, &sourceHost)
|
||||
if err != nil {
|
||||
return nil
|
||||
@@ -383,7 +369,7 @@ func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo {
|
||||
// RefreshAllProfiles updates profiles for all existing accounts with feed URLs
|
||||
func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, title, description, site_url, domain_host || '.' || domain_tld as source_host, publish_account
|
||||
SELECT url, title, description, site_url, domain_host as source_host, publish_account
|
||||
FROM feeds
|
||||
WHERE publish_account IS NOT NULL AND publish_account <> ''
|
||||
`)
|
||||
|
||||
Reference in New Issue
Block a user