v100
This commit is contained in:
@@ -163,9 +163,6 @@ func (c *Crawler) parseRSSMetadata(body string, feed *Feed) []*Item {
|
||||
feed.Description = ch.Description
|
||||
feed.Language = ch.Language
|
||||
feed.SiteURL = normalizeURL(ch.Link)
|
||||
feed.TTLMinutes = ch.TTL
|
||||
feed.UpdatePeriod = ch.UpdatePeriod
|
||||
feed.UpdateFreq = ch.UpdateFreq
|
||||
feed.ItemCount = len(ch.Items)
|
||||
|
||||
// Detect podcast
|
||||
@@ -251,10 +248,6 @@ func (c *Crawler) parseRSSMetadata(body string, feed *Feed) []*Item {
|
||||
feed.OldestItemDate = oldest
|
||||
feed.NewestItemDate = newest
|
||||
|
||||
if len(dates) > 1 {
|
||||
totalHours := newest.Sub(oldest).Hours()
|
||||
feed.AvgPostFreqHrs = totalHours / float64(len(dates)-1)
|
||||
}
|
||||
}
|
||||
|
||||
return items
|
||||
@@ -367,10 +360,6 @@ func (c *Crawler) parseAtomMetadata(body string, feed *Feed) []*Item {
|
||||
feed.OldestItemDate = oldest
|
||||
feed.NewestItemDate = newest
|
||||
|
||||
if len(dates) > 1 {
|
||||
totalHours := newest.Sub(oldest).Hours()
|
||||
feed.AvgPostFreqHrs = totalHours / float64(len(dates)-1)
|
||||
}
|
||||
}
|
||||
|
||||
return items
|
||||
@@ -399,48 +388,8 @@ func parseRSSDate(s string) (time.Time, error) {
|
||||
|
||||
// calculateNextCrawl determines when to next crawl this feed
|
||||
func (c *Crawler) calculateNextCrawl(feed *Feed) time.Time {
|
||||
now := time.Now()
|
||||
|
||||
// If TTL is specified, use it
|
||||
if feed.TTLMinutes > 0 {
|
||||
return now.Add(time.Duration(feed.TTLMinutes) * time.Minute)
|
||||
}
|
||||
|
||||
// If updatePeriod is specified
|
||||
if feed.UpdatePeriod != "" {
|
||||
freq := feed.UpdateFreq
|
||||
if freq == 0 {
|
||||
freq = 1
|
||||
}
|
||||
switch strings.ToLower(feed.UpdatePeriod) {
|
||||
case "hourly":
|
||||
return now.Add(time.Duration(freq) * time.Hour)
|
||||
case "daily":
|
||||
return now.Add(time.Duration(freq) * 24 * time.Hour)
|
||||
case "weekly":
|
||||
return now.Add(time.Duration(freq) * 7 * 24 * time.Hour)
|
||||
case "monthly":
|
||||
return now.Add(time.Duration(freq) * 30 * 24 * time.Hour)
|
||||
case "yearly":
|
||||
return now.Add(time.Duration(freq) * 365 * 24 * time.Hour)
|
||||
}
|
||||
}
|
||||
|
||||
// If we have average post frequency, use that
|
||||
if feed.AvgPostFreqHrs > 0 {
|
||||
// Crawl at half the average frequency, but at least every hour and at most once per day
|
||||
crawlInterval := feed.AvgPostFreqHrs / 2
|
||||
if crawlInterval < 1 {
|
||||
crawlInterval = 1
|
||||
}
|
||||
if crawlInterval > 24 {
|
||||
crawlInterval = 24
|
||||
}
|
||||
return now.Add(time.Duration(crawlInterval * float64(time.Hour)))
|
||||
}
|
||||
|
||||
// Default: crawl every 6 hours
|
||||
return now.Add(6 * time.Hour)
|
||||
// Adaptive backoff: 100s base + 100s per consecutive no-change
|
||||
return time.Now().Add(time.Duration(100+100*feed.NoUpdate) * time.Second)
|
||||
}
|
||||
|
||||
// extractItemImages extracts image URLs from an RSS item
|
||||
@@ -661,10 +610,6 @@ func (c *Crawler) parseJSONFeedMetadata(body string, feed *Feed) []*Item {
|
||||
feed.OldestItemDate = oldest
|
||||
feed.NewestItemDate = newest
|
||||
|
||||
if len(dates) > 1 {
|
||||
totalHours := newest.Sub(oldest).Hours()
|
||||
feed.AvgPostFreqHrs = totalHours / float64(len(dates)-1)
|
||||
}
|
||||
}
|
||||
|
||||
return items
|
||||
|
||||
Reference in New Issue
Block a user