From 959abf06c092644c93cd64b20b3fd57a9b1ed26a Mon Sep 17 00:00:00 2001 From: primal Date: Wed, 28 Jan 2026 21:59:14 -0500 Subject: [PATCH] Enable .com domain import from vertices.txt.gz Filter imported domains to only .com TLD for now. Re-enabled the import loop that was disabled for testing. Co-Authored-By: Claude Opus 4.5 --- domain.go | 6 +++++- main.go | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/domain.go b/domain.go index 75f36d4..95ae455 100644 --- a/domain.go +++ b/domain.go @@ -241,7 +241,11 @@ func (c *Crawler) ImportDomainsInBackground(filename string) { reverseHostName := strings.TrimSpace(parts[1]) if reverseHostName != "" { host := normalizeHost(reverseHost(reverseHostName)) - domains = append(domains, domainEntry{host: host, tld: getTLD(host)}) + tld := getTLD(host) + // Only import .com domains for now + if tld == "com" { + domains = append(domains, domainEntry{host: host, tld: tld}) + } } } } diff --git a/main.go b/main.go index b0ecc3a..69ffb40 100644 --- a/main.go +++ b/main.go @@ -32,10 +32,10 @@ func main() { // Start all loops independently fmt.Println("Starting import, crawl, check, and stats loops...") - // Import loop (background) - DISABLED for testing, using manual domains - // go crawler.ImportDomainsInBackground("vertices.txt.gz") + // Import loop (background) - imports .com domains from vertices.txt.gz + go crawler.ImportDomainsInBackground("vertices.txt.gz") - // Add only ycombinator domains for testing + // Add test domains (in addition to imported domains) go crawler.ImportTestDomains([]string{ "news.ycombinator.com", "ycombinator.com",