Add PebbleDB storage, domain tracking, and web dashboard
- Split main.go into separate files for better organization: crawler.go, domain.go, feed.go, parser.go, html.go, util.go - Add PebbleDB for persistent storage of feeds and domains - Store feeds with metadata: title, TTL, update frequency, ETag, etc. - Track domains with crawl status (uncrawled/crawled/error) - Normalize URLs by stripping scheme and www. prefix - Add web dashboard on port 4321 with real-time stats: - Crawl progress with completion percentage - Feed counts by type (RSS/Atom) - Top TLDs and domains by feed count - Recent feeds table - Filter out comment feeds from results Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,655 +1,30 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
type Feed struct {
|
||||
URL string
|
||||
Type string // "rss" or "atom"
|
||||
}
|
||||
|
||||
// RSS structs
|
||||
type RSS struct {
|
||||
Channel Channel `xml:"channel"`
|
||||
}
|
||||
|
||||
type Channel struct {
|
||||
Items []RSSItem `xml:"item"`
|
||||
}
|
||||
|
||||
type RSSItem struct {
|
||||
Link string `xml:"link"`
|
||||
}
|
||||
|
||||
// Atom structs
|
||||
type AtomFeed struct {
|
||||
Entries []AtomEntry `xml:"entry"`
|
||||
}
|
||||
|
||||
type AtomEntry struct {
|
||||
Links []AtomLink `xml:"link"`
|
||||
}
|
||||
|
||||
type AtomLink struct {
|
||||
Href string `xml:"href,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type Crawler struct {
|
||||
MaxDepth int
|
||||
MaxPagesPerHost int
|
||||
Timeout time.Duration
|
||||
UserAgent string
|
||||
visited sync.Map
|
||||
feeds []Feed
|
||||
feedsMu sync.Mutex
|
||||
client *http.Client
|
||||
hostsProcessed int32
|
||||
|
||||
// TLD file management
|
||||
currentTLD string
|
||||
tldFile *os.File
|
||||
tldFeeds map[string]bool
|
||||
tldMu sync.Mutex
|
||||
}
|
||||
|
||||
func NewCrawler() *Crawler {
|
||||
return &Crawler{
|
||||
MaxDepth: 10,
|
||||
MaxPagesPerHost: 10,
|
||||
Timeout: 10 * time.Second,
|
||||
UserAgent: "FeedCrawler/1.0",
|
||||
feeds: make([]Feed, 0),
|
||||
tldFeeds: make(map[string]bool),
|
||||
client: &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// reverseHost converts a reverse domain notation back to normal
|
||||
// e.g., "com.example.www" -> "www.example.com"
|
||||
func reverseHost(reverseHost string) string {
|
||||
parts := strings.Split(reverseHost, ".")
|
||||
// Reverse the parts
|
||||
for i, j := 0, len(parts)-1; i < j; i, j = i+1, j-1 {
|
||||
parts[i], parts[j] = parts[j], parts[i]
|
||||
}
|
||||
return strings.Join(parts, ".")
|
||||
}
|
||||
|
||||
// getTLD extracts the TLD from a hostname
|
||||
func getTLD(host string) string {
|
||||
parts := strings.Split(host, ".")
|
||||
if len(parts) > 0 {
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (c *Crawler) GetCommonCrawlHostsFromFile(filename string, limit int) ([]string, error) {
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
hosts, err := c.parseVerticesFile(file, limit)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse vertices: %v", err)
|
||||
}
|
||||
|
||||
// Randomize the order
|
||||
rand.Shuffle(len(hosts), func(i, j int) {
|
||||
hosts[i], hosts[j] = hosts[j], hosts[i]
|
||||
})
|
||||
|
||||
return hosts, nil
|
||||
}
|
||||
|
||||
func (c *Crawler) parseVerticesFile(reader io.Reader, limit int) ([]string, error) {
|
||||
// Try to detect if it's gzipped
|
||||
var bodyReader io.Reader
|
||||
|
||||
// Create a buffered reader so we can peek
|
||||
bufReader := bufio.NewReader(reader)
|
||||
peekBytes, err := bufReader.Peek(2)
|
||||
if err != nil && err != io.EOF {
|
||||
return nil, fmt.Errorf("failed to peek at file: %v", err)
|
||||
}
|
||||
|
||||
// Check for gzip magic number (0x1f 0x8b)
|
||||
if len(peekBytes) >= 2 && peekBytes[0] == 0x1f && peekBytes[1] == 0x8b {
|
||||
gzReader, err := gzip.NewReader(bufReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create gzip reader: %v", err)
|
||||
}
|
||||
defer gzReader.Close()
|
||||
bodyReader = gzReader
|
||||
} else {
|
||||
bodyReader = bufReader
|
||||
}
|
||||
|
||||
hosts := make([]string, 0)
|
||||
scanner := bufio.NewScanner(bodyReader)
|
||||
|
||||
// Set a larger buffer for scanning
|
||||
buf := make([]byte, 0, 64*1024)
|
||||
scanner.Buffer(buf, 1024*1024)
|
||||
|
||||
count := 0
|
||||
for scanner.Scan() {
|
||||
if limit > 0 && count >= limit {
|
||||
break
|
||||
}
|
||||
|
||||
line := scanner.Text()
|
||||
// Vertices file format: line_number\treverse_hostname\tinteger
|
||||
// Example: 0\tcom.example\t42
|
||||
parts := strings.Split(line, "\t")
|
||||
if len(parts) >= 2 {
|
||||
reverseHostName := strings.TrimSpace(parts[1])
|
||||
if reverseHostName != "" {
|
||||
// Convert from reverse notation (com.example) to normal (example.com)
|
||||
normalHost := reverseHost(reverseHostName)
|
||||
hosts = append(hosts, normalHost)
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error reading file: %v", err)
|
||||
}
|
||||
|
||||
return hosts, nil
|
||||
}
|
||||
|
||||
func (c *Crawler) openTLDFile(tld string) error {
|
||||
// Close previous file if open
|
||||
if c.tldFile != nil {
|
||||
c.sortAndDeduplicateTLDFile()
|
||||
c.tldFile.Close()
|
||||
c.tldFile = nil
|
||||
c.tldFeeds = make(map[string]bool)
|
||||
}
|
||||
|
||||
// Open new file
|
||||
if tld != "" {
|
||||
filename := "feeds/" + tld + ".feed"
|
||||
file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open TLD file %s: %v", filename, err)
|
||||
}
|
||||
c.tldFile = file
|
||||
c.currentTLD = tld
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Crawler) sortAndDeduplicateTLDFile() {
|
||||
if c.currentTLD == "" {
|
||||
return
|
||||
}
|
||||
|
||||
filename := "feeds/" + c.currentTLD + ".feed"
|
||||
|
||||
// Read all lines from the file
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
feedSet := make(map[string]bool)
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line != "" {
|
||||
feedSet[line] = true
|
||||
}
|
||||
}
|
||||
file.Close()
|
||||
|
||||
// Sort the unique feeds
|
||||
feeds := make([]string, 0, len(feedSet))
|
||||
for feed := range feedSet {
|
||||
feeds = append(feeds, feed)
|
||||
}
|
||||
sort.Strings(feeds)
|
||||
|
||||
// Write back to file
|
||||
file, err = os.Create(filename)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
writer := bufio.NewWriter(file)
|
||||
for _, feed := range feeds {
|
||||
writer.WriteString(feed + "\n")
|
||||
}
|
||||
writer.Flush()
|
||||
}
|
||||
|
||||
func (c *Crawler) writeFeedToTLDFile(feedURL, host string) {
|
||||
c.tldMu.Lock()
|
||||
defer c.tldMu.Unlock()
|
||||
|
||||
tld := getTLD(host)
|
||||
|
||||
// Check if TLD changed
|
||||
if tld != c.currentTLD {
|
||||
c.openTLDFile(tld)
|
||||
}
|
||||
|
||||
// Write feed to file if not already written
|
||||
if c.tldFile != nil && !c.tldFeeds[feedURL] {
|
||||
c.tldFile.WriteString(feedURL + "\n")
|
||||
c.tldFeeds[feedURL] = true
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Crawler) Crawl(startURL string) ([]Feed, error) {
|
||||
pagesVisited := 0
|
||||
c.crawlPage(startURL, 0, make(map[string]bool), &pagesVisited)
|
||||
return c.feeds, nil
|
||||
}
|
||||
|
||||
func (c *Crawler) CrawlHosts(hosts []string) ([]Feed, error) {
|
||||
numWorkers := runtime.NumCPU() - 1
|
||||
if numWorkers < 1 {
|
||||
numWorkers = 1
|
||||
}
|
||||
|
||||
hostChan := make(chan string, numWorkers*2)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Start workers
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for host := range hostChan {
|
||||
c.crawlHost(host)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Send hosts to workers
|
||||
for _, host := range hosts {
|
||||
hostChan <- host
|
||||
}
|
||||
|
||||
close(hostChan)
|
||||
wg.Wait()
|
||||
|
||||
// Close final TLD file
|
||||
c.tldMu.Lock()
|
||||
c.openTLDFile("")
|
||||
c.tldMu.Unlock()
|
||||
|
||||
return c.feeds, nil
|
||||
}
|
||||
|
||||
func (c *Crawler) crawlHost(host string) {
|
||||
atomic.AddInt32(&c.hostsProcessed, 1)
|
||||
|
||||
hostFeeds := make([]Feed, 0)
|
||||
localVisited := make(map[string]bool)
|
||||
pagesVisited := 0
|
||||
|
||||
// Try both http and https
|
||||
urls := []string{
|
||||
"https://" + host,
|
||||
"http://" + host,
|
||||
}
|
||||
|
||||
for _, url := range urls {
|
||||
c.crawlPage(url, 0, localVisited, &pagesVisited)
|
||||
break // If first succeeds, don't try second
|
||||
}
|
||||
|
||||
// Collect feeds found for this host
|
||||
c.feedsMu.Lock()
|
||||
for _, feed := range c.feeds {
|
||||
// Check if feed belongs to this host
|
||||
feedHost := ""
|
||||
if u, err := url.Parse(feed.URL); err == nil {
|
||||
feedHost = u.Host
|
||||
}
|
||||
if feedHost == host || strings.HasSuffix(feedHost, "."+host) {
|
||||
hostFeeds = append(hostFeeds, feed)
|
||||
}
|
||||
}
|
||||
c.feedsMu.Unlock()
|
||||
|
||||
// Print and write feeds found for this host
|
||||
if len(hostFeeds) > 0 {
|
||||
for _, feed := range hostFeeds {
|
||||
fmt.Printf("%s\n", feed.URL)
|
||||
c.writeFeedToTLDFile(feed.URL, host)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Crawler) crawlPage(pageURL string, depth int, localVisited map[string]bool, pagesVisited *int) {
|
||||
if *pagesVisited >= c.MaxPagesPerHost || depth > c.MaxDepth {
|
||||
return
|
||||
}
|
||||
|
||||
if localVisited[pageURL] {
|
||||
return
|
||||
}
|
||||
|
||||
// Check global visited
|
||||
if _, visited := c.visited.LoadOrStore(pageURL, true); visited {
|
||||
return
|
||||
}
|
||||
|
||||
localVisited[pageURL] = true
|
||||
*pagesVisited++
|
||||
|
||||
body, contentType, err := c.fetchPage(pageURL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if this page itself is a feed
|
||||
if c.isFeedContent(body, contentType) {
|
||||
feedType := c.detectFeedType(body)
|
||||
c.addFeed(pageURL, feedType)
|
||||
|
||||
// Extract links from the feed and crawl them
|
||||
feedLinks := c.extractLinksFromFeed(body, feedType)
|
||||
|
||||
for _, link := range feedLinks {
|
||||
c.crawlPage(link, depth+1, localVisited, pagesVisited)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Parse HTML and look for feed links
|
||||
doc, err := html.Parse(strings.NewReader(body))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Find feed links in <link> tags
|
||||
feedLinks := c.extractFeedLinks(doc, pageURL)
|
||||
for _, feed := range feedLinks {
|
||||
c.addFeed(feed.URL, feed.Type)
|
||||
}
|
||||
|
||||
// Find feed links in anchor tags
|
||||
anchorFeeds := c.extractAnchorFeeds(doc, pageURL)
|
||||
for _, feed := range anchorFeeds {
|
||||
c.addFeed(feed.URL, feed.Type)
|
||||
}
|
||||
|
||||
// Extract all links for further crawling
|
||||
if depth < c.MaxDepth {
|
||||
links := c.extractLinks(doc, pageURL)
|
||||
for _, link := range links {
|
||||
if c.shouldCrawl(link, pageURL) {
|
||||
c.crawlPage(link, depth+1, localVisited, pagesVisited)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Crawler) fetchPage(pageURL string) (string, string, error) {
|
||||
req, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
req.Header.Set("User-Agent", c.UserAgent)
|
||||
|
||||
resp, err := c.client.Do(req)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", "", fmt.Errorf("status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
bodyBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
return string(bodyBytes), contentType, nil
|
||||
}
|
||||
|
||||
func (c *Crawler) isFeedContent(body, contentType string) bool {
|
||||
if strings.Contains(contentType, "application/rss+xml") ||
|
||||
strings.Contains(contentType, "application/atom+xml") ||
|
||||
strings.Contains(contentType, "application/xml") ||
|
||||
strings.Contains(contentType, "text/xml") {
|
||||
return true
|
||||
}
|
||||
|
||||
body = strings.TrimSpace(body)
|
||||
if strings.HasPrefix(body, "<?xml") {
|
||||
if strings.Contains(body, "<rss") || strings.Contains(body, "<feed") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *Crawler) detectFeedType(body string) string {
|
||||
if strings.Contains(body, "<rss") {
|
||||
return "rss"
|
||||
}
|
||||
if strings.Contains(body, "<feed") && strings.Contains(body, "xmlns=\"http://www.w3.org/2005/Atom\"") {
|
||||
return "atom"
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func (c *Crawler) extractLinksFromFeed(body, feedType string) []string {
|
||||
links := make([]string, 0)
|
||||
|
||||
switch feedType {
|
||||
case "rss":
|
||||
var rss RSS
|
||||
if err := xml.Unmarshal([]byte(body), &rss); err != nil {
|
||||
return links
|
||||
}
|
||||
|
||||
for _, item := range rss.Channel.Items {
|
||||
if item.Link != "" {
|
||||
links = append(links, item.Link)
|
||||
}
|
||||
}
|
||||
|
||||
case "atom":
|
||||
var atom AtomFeed
|
||||
if err := xml.Unmarshal([]byte(body), &atom); err != nil {
|
||||
return links
|
||||
}
|
||||
|
||||
for _, entry := range atom.Entries {
|
||||
for _, link := range entry.Links {
|
||||
if link.Rel == "" || link.Rel == "alternate" {
|
||||
if link.Href != "" {
|
||||
links = append(links, link.Href)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
func (c *Crawler) extractFeedLinks(n *html.Node, baseURL string) []Feed {
|
||||
feeds := make([]Feed, 0)
|
||||
var f func(*html.Node)
|
||||
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "link" {
|
||||
var rel, href, typeAttr string
|
||||
for _, attr := range n.Attr {
|
||||
switch attr.Key {
|
||||
case "rel":
|
||||
rel = attr.Val
|
||||
case "href":
|
||||
href = attr.Val
|
||||
case "type":
|
||||
typeAttr = attr.Val
|
||||
}
|
||||
}
|
||||
|
||||
if rel == "alternate" && (typeAttr == "application/rss+xml" || typeAttr == "application/atom+xml") {
|
||||
absURL := c.makeAbsoluteURL(href, baseURL)
|
||||
feedType := "rss"
|
||||
if typeAttr == "application/atom+xml" {
|
||||
feedType = "atom"
|
||||
}
|
||||
feeds = append(feeds, Feed{URL: absURL, Type: feedType})
|
||||
}
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
f(child)
|
||||
}
|
||||
}
|
||||
f(n)
|
||||
return feeds
|
||||
}
|
||||
|
||||
func (c *Crawler) extractAnchorFeeds(n *html.Node, baseURL string) []Feed {
|
||||
feeds := make([]Feed, 0)
|
||||
feedPattern := regexp.MustCompile(`(?i)(rss|atom|feed)`)
|
||||
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Key == "href" {
|
||||
href := attr.Val
|
||||
if feedPattern.MatchString(href) {
|
||||
absURL := c.makeAbsoluteURL(href, baseURL)
|
||||
feeds = append(feeds, Feed{URL: absURL, Type: "unknown"})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
f(child)
|
||||
}
|
||||
}
|
||||
f(n)
|
||||
return feeds
|
||||
}
|
||||
|
||||
func (c *Crawler) extractLinks(n *html.Node, baseURL string) []string {
|
||||
links := make([]string, 0)
|
||||
var f func(*html.Node)
|
||||
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Key == "href" {
|
||||
link := c.makeAbsoluteURL(attr.Val, baseURL)
|
||||
links = append(links, link)
|
||||
}
|
||||
}
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
f(child)
|
||||
}
|
||||
}
|
||||
f(n)
|
||||
return links
|
||||
}
|
||||
|
||||
func (c *Crawler) makeAbsoluteURL(href, baseURL string) string {
|
||||
base, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
link, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return base.ResolveReference(link).String()
|
||||
}
|
||||
|
||||
func (c *Crawler) shouldCrawl(link, baseURL string) bool {
|
||||
linkURL, err := url.Parse(link)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
baseURLParsed, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return linkURL.Host == baseURLParsed.Host
|
||||
}
|
||||
|
||||
func (c *Crawler) addFeed(feedURL, feedType string) {
|
||||
c.feedsMu.Lock()
|
||||
defer c.feedsMu.Unlock()
|
||||
|
||||
for _, f := range c.feeds {
|
||||
if f.URL == feedURL {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
feed := Feed{URL: feedURL, Type: feedType}
|
||||
c.feeds = append(c.feeds, feed)
|
||||
}
|
||||
|
||||
func main() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
|
||||
crawler := NewCrawler()
|
||||
|
||||
hosts, err := crawler.GetCommonCrawlHostsFromFile("vertices.txt.gz", 0)
|
||||
crawler, err := NewCrawler("feeds.db")
|
||||
if err != nil {
|
||||
fmt.Printf("Error reading hosts from file: %v\n", err)
|
||||
return
|
||||
fmt.Fprintf(os.Stderr, "Error initializing crawler: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer crawler.Close()
|
||||
|
||||
feeds, err := crawler.CrawlHosts(hosts)
|
||||
if err != nil {
|
||||
fmt.Printf("Error: %v\n", err)
|
||||
return
|
||||
// Start dashboard in background
|
||||
go func() {
|
||||
if err := crawler.StartDashboard("0.0.0.0:4321"); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Dashboard error: %v\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Import domains from vertices file (only adds new ones as "uncrawled")
|
||||
crawler.ImportDomainsFromFile("vertices.txt.gz", 0)
|
||||
|
||||
// Crawl all uncrawled domains (runs continuously)
|
||||
for {
|
||||
crawler.CrawlUncrawledDomains()
|
||||
}
|
||||
|
||||
fmt.Printf("=== Total feeds found: %d ===\n", len(feeds))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user