Enable .com domain import from vertices.txt.gz
Filter imported domains to only .com TLD for now. Re-enabled the import loop that was disabled for testing. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -241,7 +241,11 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
|
||||
reverseHostName := strings.TrimSpace(parts[1])
|
||||
if reverseHostName != "" {
|
||||
host := normalizeHost(reverseHost(reverseHostName))
|
||||
domains = append(domains, domainEntry{host: host, tld: getTLD(host)})
|
||||
tld := getTLD(host)
|
||||
// Only import .com domains for now
|
||||
if tld == "com" {
|
||||
domains = append(domains, domainEntry{host: host, tld: tld})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,10 +32,10 @@ func main() {
|
||||
// Start all loops independently
|
||||
fmt.Println("Starting import, crawl, check, and stats loops...")
|
||||
|
||||
// Import loop (background) - DISABLED for testing, using manual domains
|
||||
// go crawler.ImportDomainsInBackground("vertices.txt.gz")
|
||||
// Import loop (background) - imports .com domains from vertices.txt.gz
|
||||
go crawler.ImportDomainsInBackground("vertices.txt.gz")
|
||||
|
||||
// Add only ycombinator domains for testing
|
||||
// Add test domains (in addition to imported domains)
|
||||
go crawler.ImportTestDomains([]string{
|
||||
"news.ycombinator.com",
|
||||
"ycombinator.com",
|
||||
|
||||
Reference in New Issue
Block a user