package main import ( "database/sql" "fmt" "io" "net/http" "net/url" "regexp" "strings" "sync/atomic" "time" ) // shouldSkipFeed checks if a feed URL should be filtered out // Returns true (and a reason) if the feed should be skipped func shouldSkipFeed(feedURL string) (bool, string) { lower := strings.ToLower(feedURL) // Skip explicit comment feeds if strings.Contains(lower, "/comment") { return true, "comment feed" } u, err := url.Parse(feedURL) if err != nil { return false, "" } path := strings.ToLower(strings.TrimSuffix(u.Path, "/")) // Skip category/tag feeds categoryPatterns := []string{"/category/", "/tag/", "/tags/", "/categories/", "/topic/", "/topics/", "/author/"} for _, pattern := range categoryPatterns { if strings.Contains(path, pattern) { return true, "category/tag feed" } } // Check for article comment feeds (path ending in /feed with content before it) if strings.HasSuffix(path, "/feed") { basePath := strings.TrimSuffix(path, "/feed") basePath = strings.Trim(basePath, "/") if basePath == "" { return false, "" // Just /feed - legitimate main feed } // Skip if path contains date patterns (likely article) if matched, _ := regexp.MatchString(`/\d{4}/\d{2}`, basePath); matched { return true, "article feed (date pattern)" } // Skip if path has multiple segments (likely article or nested content) segments := strings.Split(basePath, "/") if len(segments) >= 2 { return true, "article feed (nested path)" } // Skip if single segment looks like an article slug (contains hyphens, is long) if len(segments) == 1 && (strings.Contains(segments[0], "-") && len(segments[0]) > 20) { return true, "article feed (slug pattern)" } } return false, "" } // Item represents an individual entry/article from a feed type Item struct { ID int64 `json:"id,omitempty"` FeedURL string `json:"feed_url"` GUID string `json:"guid,omitempty"` Title string `json:"title,omitempty"` Link string `json:"link,omitempty"` Description string `json:"description,omitempty"` Content string `json:"content,omitempty"` Author string `json:"author,omitempty"` PubDate time.Time `json:"pub_date,omitempty"` DiscoveredAt time.Time `json:"discovered_at"` UpdatedAt time.Time `json:"updated_at,omitempty"` } // Feed represents a discovered RSS/Atom feed with metadata type Feed struct { URL string `json:"url"` Type string `json:"type"` // "rss", "atom", or "unknown" Title string `json:"title,omitempty"` Description string `json:"description,omitempty"` Language string `json:"language,omitempty"` SiteURL string `json:"site_url,omitempty"` // The website the feed belongs to // Timing DiscoveredAt time.Time `json:"discovered_at"` LastCrawledAt time.Time `json:"last_crawled_at,omitempty"` NextCrawlAt time.Time `json:"next_crawl_at,omitempty"` LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated // Cache headers for conditional requests ETag string `json:"etag,omitempty"` LastModified string `json:"last_modified,omitempty"` // Feed hints for crawl scheduling TTLMinutes int `json:"ttl_minutes,omitempty"` // From RSS element UpdatePeriod string `json:"update_period,omitempty"` // From sy:updatePeriod (hourly, daily, weekly, monthly, yearly) UpdateFreq int `json:"update_freq,omitempty"` // From sy:updateFrequency // Health tracking Status string `json:"status"` // "active", "dead", "redirect", "error" ErrorCount int `json:"error_count"` LastError string `json:"last_error,omitempty"` LastErrorAt time.Time `json:"last_error_at,omitempty"` // Discovery source SourceURL string `json:"source_url,omitempty"` // Where we found this feed SourceHost string `json:"source_host,omitempty"` TLD string `json:"tld,omitempty"` // Content stats ItemCount int `json:"item_count,omitempty"` // Number of items in last crawl AvgPostFreqHrs float64 `json:"avg_post_freq_hrs,omitempty"` // Average hours between posts OldestItemDate time.Time `json:"oldest_item_date,omitempty"` NewestItemDate time.Time `json:"newest_item_date,omitempty"` // Adaptive check interval NoUpdate int `json:"no_update"` // Consecutive checks with no change } // saveFeed stores a feed in SQLite func (c *Crawler) saveFeed(feed *Feed) error { _, err := c.db.Exec(` INSERT INTO feeds ( url, type, title, description, language, siteUrl, discoveredAt, lastCrawledAt, nextCrawlAt, lastBuildDate, etag, lastModified, ttlMinutes, updatePeriod, updateFreq, status, errorCount, lastError, lastErrorAt, sourceUrl, sourceHost, tld, itemCount, avgPostFreqHrs, oldestItemDate, newestItemDate, noUpdate ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(url) DO UPDATE SET type = excluded.type, title = excluded.title, description = excluded.description, language = excluded.language, siteUrl = excluded.siteUrl, lastCrawledAt = excluded.lastCrawledAt, nextCrawlAt = excluded.nextCrawlAt, lastBuildDate = excluded.lastBuildDate, etag = excluded.etag, lastModified = excluded.lastModified, ttlMinutes = excluded.ttlMinutes, updatePeriod = excluded.updatePeriod, updateFreq = excluded.updateFreq, status = excluded.status, errorCount = excluded.errorCount, lastError = excluded.lastError, lastErrorAt = excluded.lastErrorAt, itemCount = excluded.itemCount, avgPostFreqHrs = excluded.avgPostFreqHrs, oldestItemDate = excluded.oldestItemDate, newestItemDate = excluded.newestItemDate, noUpdate = excluded.noUpdate `, feed.URL, feed.Type, nullString(feed.Title), nullString(feed.Description), nullString(feed.Language), nullString(feed.SiteURL), feed.DiscoveredAt, nullTime(feed.LastCrawledAt), nullTime(feed.NextCrawlAt), nullTime(feed.LastBuildDate), nullString(feed.ETag), nullString(feed.LastModified), feed.TTLMinutes, nullString(feed.UpdatePeriod), feed.UpdateFreq, feed.Status, feed.ErrorCount, nullString(feed.LastError), nullTime(feed.LastErrorAt), nullString(feed.SourceURL), nullString(feed.SourceHost), nullString(feed.TLD), feed.ItemCount, feed.AvgPostFreqHrs, nullTime(feed.OldestItemDate), nullTime(feed.NewestItemDate), feed.NoUpdate, ) return err } // getFeed retrieves a feed from SQLite func (c *Crawler) getFeed(feedURL string) (*Feed, error) { feed := &Feed{} var title, description, language, siteURL sql.NullString var lastCrawledAt, nextCrawlAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate sql.NullTime var etag, lastModified, updatePeriod, lastError, sourceURL, sourceHost, tld sql.NullString var avgPostFreqHrs sql.NullFloat64 err := c.db.QueryRow(` SELECT url, type, title, description, language, siteUrl, discoveredAt, lastCrawledAt, nextCrawlAt, lastBuildDate, etag, lastModified, ttlMinutes, updatePeriod, updateFreq, status, errorCount, lastError, lastErrorAt, sourceUrl, sourceHost, tld, itemCount, avgPostFreqHrs, oldestItemDate, newestItemDate, noUpdate FROM feeds WHERE url = ? `, normalizeURL(feedURL)).Scan( &feed.URL, &feed.Type, &title, &description, &language, &siteURL, &feed.DiscoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate, &etag, &lastModified, &feed.TTLMinutes, &updatePeriod, &feed.UpdateFreq, &feed.Status, &feed.ErrorCount, &lastError, &lastErrorAt, &sourceURL, &sourceHost, &tld, &feed.ItemCount, &avgPostFreqHrs, &oldestItemDate, &newestItemDate, &feed.NoUpdate, ) if err == sql.ErrNoRows { return nil, nil } if err != nil { return nil, err } // Handle nullable fields if title.Valid { feed.Title = title.String } if description.Valid { feed.Description = description.String } if language.Valid { feed.Language = language.String } if siteURL.Valid { feed.SiteURL = siteURL.String } if lastCrawledAt.Valid { feed.LastCrawledAt = lastCrawledAt.Time } if nextCrawlAt.Valid { feed.NextCrawlAt = nextCrawlAt.Time } if lastBuildDate.Valid { feed.LastBuildDate = lastBuildDate.Time } if etag.Valid { feed.ETag = etag.String } if lastModified.Valid { feed.LastModified = lastModified.String } if updatePeriod.Valid { feed.UpdatePeriod = updatePeriod.String } if lastError.Valid { feed.LastError = lastError.String } if lastErrorAt.Valid { feed.LastErrorAt = lastErrorAt.Time } if sourceURL.Valid { feed.SourceURL = sourceURL.String } if sourceHost.Valid { feed.SourceHost = sourceHost.String } if tld.Valid { feed.TLD = tld.String } if avgPostFreqHrs.Valid { feed.AvgPostFreqHrs = avgPostFreqHrs.Float64 } if oldestItemDate.Valid { feed.OldestItemDate = oldestItemDate.Time } if newestItemDate.Valid { feed.NewestItemDate = newestItemDate.Time } return feed, nil } // feedExists checks if a feed URL already exists in the database func (c *Crawler) feedExists(feedURL string) bool { var exists bool err := c.db.QueryRow("SELECT EXISTS(SELECT 1 FROM feeds WHERE url = ?)", normalizeURL(feedURL)).Scan(&exists) return err == nil && exists } // GetAllFeeds returns all feeds from the database func (c *Crawler) GetAllFeeds() ([]*Feed, error) { rows, err := c.db.Query(` SELECT url, type, title, description, language, siteUrl, discoveredAt, lastCrawledAt, nextCrawlAt, lastBuildDate, etag, lastModified, ttlMinutes, updatePeriod, updateFreq, status, errorCount, lastError, lastErrorAt, sourceUrl, sourceHost, tld, itemCount, avgPostFreqHrs, oldestItemDate, newestItemDate, noUpdate FROM feeds `) if err != nil { return nil, err } defer rows.Close() return scanFeeds(rows) } // GetFeedCount returns the total number of feeds in the database func (c *Crawler) GetFeedCount() (int, error) { var count int err := c.db.QueryRow("SELECT COUNT(*) FROM feeds").Scan(&count) return count, err } // GetFeedCountByHost returns the number of feeds for a specific host func (c *Crawler) GetFeedCountByHost(host string) (int, error) { var count int err := c.db.QueryRow("SELECT COUNT(*) FROM feeds WHERE sourceHost = ?", host).Scan(&count) return count, err } // GetFeedsDueForCheck returns feeds where nextCrawlAt <= now, ordered randomly, limited to n func (c *Crawler) GetFeedsDueForCheck(limit int) ([]*Feed, error) { rows, err := c.db.Query(` SELECT url, type, title, description, language, siteUrl, discoveredAt, lastCrawledAt, nextCrawlAt, lastBuildDate, etag, lastModified, ttlMinutes, updatePeriod, updateFreq, status, errorCount, lastError, lastErrorAt, sourceUrl, sourceHost, tld, itemCount, avgPostFreqHrs, oldestItemDate, newestItemDate, noUpdate FROM feeds WHERE nextCrawlAt <= datetime('now') AND status != 'dead' ORDER BY RANDOM() LIMIT ? `, limit) if err != nil { return nil, err } defer rows.Close() return scanFeeds(rows) } // GetFeedsByHost returns all feeds from a specific host func (c *Crawler) GetFeedsByHost(host string) ([]*Feed, error) { rows, err := c.db.Query(` SELECT url, type, title, description, language, siteUrl, discoveredAt, lastCrawledAt, nextCrawlAt, lastBuildDate, etag, lastModified, ttlMinutes, updatePeriod, updateFreq, status, errorCount, lastError, lastErrorAt, sourceUrl, sourceHost, tld, itemCount, avgPostFreqHrs, oldestItemDate, newestItemDate, noUpdate FROM feeds WHERE sourceHost = ? `, host) if err != nil { return nil, err } defer rows.Close() return scanFeeds(rows) } // SearchFeeds performs a full-text search on feeds func (c *Crawler) SearchFeeds(query string) ([]*Feed, error) { rows, err := c.db.Query(` SELECT f.url, f.type, f.title, f.description, f.language, f.siteUrl, f.discoveredAt, f.lastCrawledAt, f.nextCrawlAt, f.lastBuildDate, f.etag, f.lastModified, f.ttlMinutes, f.updatePeriod, f.updateFreq, f.status, f.errorCount, f.lastError, f.lastErrorAt, f.sourceUrl, f.sourceHost, f.tld, f.itemCount, f.avgPostFreqHrs, f.oldestItemDate, f.newestItemDate, f.noUpdate FROM feeds f JOIN feeds_fts fts ON f.rowid = fts.rowid WHERE feeds_fts MATCH ? `, query) if err != nil { return nil, err } defer rows.Close() return scanFeeds(rows) } // scanFeeds is a helper to scan multiple feed rows func scanFeeds(rows *sql.Rows) ([]*Feed, error) { var feeds []*Feed for rows.Next() { feed := &Feed{} var title, description, language, siteURL sql.NullString var lastCrawledAt, nextCrawlAt, lastBuildDate, lastErrorAt, oldestItemDate, newestItemDate sql.NullTime var etag, lastModified, updatePeriod, lastError, sourceURL, sourceHost, tld sql.NullString var avgPostFreqHrs sql.NullFloat64 if err := rows.Scan( &feed.URL, &feed.Type, &title, &description, &language, &siteURL, &feed.DiscoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate, &etag, &lastModified, &feed.TTLMinutes, &updatePeriod, &feed.UpdateFreq, &feed.Status, &feed.ErrorCount, &lastError, &lastErrorAt, &sourceURL, &sourceHost, &tld, &feed.ItemCount, &avgPostFreqHrs, &oldestItemDate, &newestItemDate, &feed.NoUpdate, ); err != nil { continue } // Handle nullable fields if title.Valid { feed.Title = title.String } if description.Valid { feed.Description = description.String } if language.Valid { feed.Language = language.String } if siteURL.Valid { feed.SiteURL = siteURL.String } if lastCrawledAt.Valid { feed.LastCrawledAt = lastCrawledAt.Time } if nextCrawlAt.Valid { feed.NextCrawlAt = nextCrawlAt.Time } if lastBuildDate.Valid { feed.LastBuildDate = lastBuildDate.Time } if etag.Valid { feed.ETag = etag.String } if lastModified.Valid { feed.LastModified = lastModified.String } if updatePeriod.Valid { feed.UpdatePeriod = updatePeriod.String } if lastError.Valid { feed.LastError = lastError.String } if lastErrorAt.Valid { feed.LastErrorAt = lastErrorAt.Time } if sourceURL.Valid { feed.SourceURL = sourceURL.String } if sourceHost.Valid { feed.SourceHost = sourceHost.String } if tld.Valid { feed.TLD = tld.String } if avgPostFreqHrs.Valid { feed.AvgPostFreqHrs = avgPostFreqHrs.Float64 } if oldestItemDate.Valid { feed.OldestItemDate = oldestItemDate.Time } if newestItemDate.Valid { feed.NewestItemDate = newestItemDate.Time } feeds = append(feeds, feed) } return feeds, rows.Err() } // saveItem stores an item in SQLite (upsert by feedUrl + guid) func (c *Crawler) saveItem(item *Item) error { _, err := c.db.Exec(` INSERT INTO items (feedUrl, guid, title, link, description, content, author, pubDate, discoveredAt, updatedAt) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(feedUrl, guid) DO UPDATE SET title = excluded.title, link = excluded.link, description = excluded.description, content = excluded.content, author = excluded.author, pubDate = excluded.pubDate, updatedAt = excluded.updatedAt `, item.FeedURL, item.GUID, nullString(item.Title), nullString(item.Link), nullString(item.Description), nullString(item.Content), nullString(item.Author), nullTime(item.PubDate), item.DiscoveredAt, nullTime(item.UpdatedAt), ) return err } // saveItems stores multiple items efficiently func (c *Crawler) saveItems(items []*Item) error { if len(items) == 0 { return nil } tx, err := c.db.Begin() if err != nil { return err } defer tx.Rollback() stmt, err := tx.Prepare(` INSERT INTO items (feedUrl, guid, title, link, description, content, author, pubDate, discoveredAt, updatedAt) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(feedUrl, guid) DO UPDATE SET title = excluded.title, link = excluded.link, description = excluded.description, content = excluded.content, author = excluded.author, pubDate = excluded.pubDate, updatedAt = excluded.updatedAt `) if err != nil { return err } defer stmt.Close() for _, item := range items { if item == nil || item.GUID == "" { continue // Skip nil items or items without GUID } _, err := stmt.Exec( item.FeedURL, item.GUID, nullString(item.Title), nullString(item.Link), nullString(item.Description), nullString(item.Content), nullString(item.Author), nullTime(item.PubDate), item.DiscoveredAt, nullTime(item.UpdatedAt), ) if err != nil { continue // Skip failed items } } return tx.Commit() } // GetItemsByFeed returns all items for a specific feed func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) { rows, err := c.db.Query(` SELECT id, feedUrl, guid, title, link, description, content, author, pubDate, discoveredAt, updatedAt FROM items WHERE feedUrl = ? ORDER BY pubDate DESC LIMIT ? `, feedURL, limit) if err != nil { return nil, err } defer rows.Close() var items []*Item for rows.Next() { item := &Item{} var guid, title, link, description, content, author sql.NullString var pubDate, updatedAt sql.NullTime if err := rows.Scan( &item.ID, &item.FeedURL, &guid, &title, &link, &description, &content, &author, &pubDate, &item.DiscoveredAt, &updatedAt, ); err != nil { continue } if guid.Valid { item.GUID = guid.String } if title.Valid { item.Title = title.String } if link.Valid { item.Link = link.String } if description.Valid { item.Description = description.String } if content.Valid { item.Content = content.String } if author.Valid { item.Author = author.String } if pubDate.Valid { item.PubDate = pubDate.Time } if updatedAt.Valid { item.UpdatedAt = updatedAt.Time } items = append(items, item) } return items, rows.Err() } // SearchItems performs a full-text search on items func (c *Crawler) SearchItems(query string, limit int) ([]*Item, error) { rows, err := c.db.Query(` SELECT i.id, i.feedUrl, i.guid, i.title, i.link, i.description, i.content, i.author, i.pubDate, i.discoveredAt, i.updatedAt FROM items i JOIN items_fts fts ON i.id = fts.rowid WHERE items_fts MATCH ? ORDER BY i.pubDate DESC LIMIT ? `, query, limit) if err != nil { return nil, err } defer rows.Close() var items []*Item for rows.Next() { item := &Item{} var guid, title, link, description, content, author sql.NullString var pubDate, updatedAt sql.NullTime if err := rows.Scan( &item.ID, &item.FeedURL, &guid, &title, &link, &description, &content, &author, &pubDate, &item.DiscoveredAt, &updatedAt, ); err != nil { continue } if guid.Valid { item.GUID = guid.String } if title.Valid { item.Title = title.String } if link.Valid { item.Link = link.String } if description.Valid { item.Description = description.String } if content.Valid { item.Content = content.String } if author.Valid { item.Author = author.String } if pubDate.Valid { item.PubDate = pubDate.Time } if updatedAt.Valid { item.UpdatedAt = updatedAt.Time } items = append(items, item) } return items, rows.Err() } // CleanupOldItems removes items older than 12 months func (c *Crawler) CleanupOldItems() (int64, error) { cutoff := time.Now().AddDate(-1, 0, 0) // 12 months ago result, err := c.db.Exec(` DELETE FROM items WHERE pubDate < ? AND pubDate IS NOT NULL `, cutoff) if err != nil { return 0, err } return result.RowsAffected() } // processFeed parses and stores a feed with full metadata func (c *Crawler) processFeed(feedURL, sourceHost, body string, headers http.Header) { if strings.Contains(feedURL, "/comment") { return } // Fast path: check without lock if c.feedExists(feedURL) { return } c.feedsMu.Lock() defer c.feedsMu.Unlock() // Double-check after acquiring lock if c.feedExists(feedURL) { return } feedType := c.detectFeedType(body) now := time.Now() feed := &Feed{ URL: normalizeURL(feedURL), Type: feedType, DiscoveredAt: now, LastCrawledAt: now, Status: "active", SourceHost: sourceHost, TLD: getTLD(sourceHost), ETag: headers.Get("ETag"), LastModified: headers.Get("Last-Modified"), } // Parse feed-specific metadata and items var items []*Item switch feedType { case "rss": items = c.parseRSSMetadata(body, feed) case "atom": items = c.parseAtomMetadata(body, feed) } // Calculate next crawl time feed.NextCrawlAt = c.calculateNextCrawl(feed) if err := c.saveFeed(feed); err != nil { return } // Save items if len(items) > 0 { c.saveItems(items) } } // addFeed adds a discovered feed URL (not yet fetched) func (c *Crawler) addFeed(feedURL, feedType, sourceHost, sourceURL string) { // Skip comment, category, and article feeds if skip, _ := shouldSkipFeed(feedURL); skip { return } // Fast path: check without lock if c.feedExists(feedURL) { return } c.feedsMu.Lock() defer c.feedsMu.Unlock() // Double-check after acquiring lock if c.feedExists(feedURL) { return } now := time.Now() normalizedURL := normalizeURL(feedURL) feed := &Feed{ URL: normalizedURL, Type: feedType, DiscoveredAt: now, Status: "active", SourceURL: normalizeURL(sourceURL), SourceHost: sourceHost, TLD: getTLD(sourceHost), NextCrawlAt: now, // Should be crawled immediately } if err := c.saveFeed(feed); err != nil { return } } // CheckFeed performs a conditional request to check if a feed has been updated // Returns: changed (bool), error func (c *Crawler) CheckFeed(feed *Feed) (bool, error) { atomic.AddInt32(&c.feedsChecked, 1) // Try different scheme/www combinations since we store URLs without scheme urlVariants := []string{ "https://" + feed.URL, "http://" + feed.URL, "https://www." + feed.URL, "http://www." + feed.URL, } var resp *http.Response var err error var successURL string for _, tryURL := range urlVariants { req, reqErr := http.NewRequest("GET", tryURL, nil) if reqErr != nil { continue } req.Header.Set("User-Agent", c.UserAgent) // Add conditional headers if we have them if feed.ETag != "" { req.Header.Set("If-None-Match", feed.ETag) } if feed.LastModified != "" { req.Header.Set("If-Modified-Since", feed.LastModified) } resp, err = c.client.Do(req) if err == nil { successURL = tryURL break } } _ = successURL // May be used later for logging/debugging // If no request succeeded, resp will be nil if resp == nil { if err == nil { err = fmt.Errorf("all URL variants failed") } now := time.Now() feed.LastCrawledAt = now feed.ErrorCount++ feed.NoUpdate++ feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.LastError = err.Error() feed.LastErrorAt = now feed.Status = "error" c.saveFeed(feed) return false, err } defer resp.Body.Close() now := time.Now() feed.LastCrawledAt = now // 304 Not Modified - feed hasn't changed if resp.StatusCode == http.StatusNotModified { feed.NoUpdate++ // Adaptive backoff: 100s base + 100s per consecutive no-change feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.ErrorCount = 0 feed.LastError = "" feed.Status = "active" c.saveFeed(feed) return false, nil } // Non-200 response if resp.StatusCode != http.StatusOK { feed.ErrorCount++ feed.NoUpdate++ feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.LastError = resp.Status feed.LastErrorAt = now if resp.StatusCode == http.StatusNotFound || resp.StatusCode == http.StatusGone { feed.Status = "dead" } else { feed.Status = "error" } c.saveFeed(feed) return false, nil } // 200 OK - feed has new content bodyBytes, err := io.ReadAll(resp.Body) if err != nil { feed.ErrorCount++ feed.NoUpdate++ feed.NextCrawlAt = now.Add(time.Duration(100+100*feed.NoUpdate) * time.Second) feed.LastError = err.Error() feed.LastErrorAt = now feed.Status = "error" c.saveFeed(feed) return false, err } body := string(bodyBytes) // Update cache headers feed.ETag = resp.Header.Get("ETag") feed.LastModified = resp.Header.Get("Last-Modified") // Re-detect type and parse metadata feedType := c.detectFeedType(body) feed.Type = feedType var items []*Item switch feedType { case "rss": items = c.parseRSSMetadata(body, feed) case "atom": items = c.parseAtomMetadata(body, feed) } // Content changed - reset backoff feed.NoUpdate = 0 feed.NextCrawlAt = now.Add(100 * time.Second) feed.ErrorCount = 0 feed.LastError = "" feed.Status = "active" c.saveFeed(feed) // Save items if len(items) > 0 { c.saveItems(items) } return true, nil }