Auto-deny all-digit domains, whitelist 1440.news
- Deny domains where hostname is all digits (e.g., 0000114.com) - Never auto-deny 1440.news or subdomains - Auto-pass feeds from 1440.news sources - Updated 554,085 domains and 3,213 feeds in database Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -27,10 +27,37 @@ type Domain struct {
|
||||
|
||||
// shouldAutoDenyDomain checks if a domain should be auto-denied based on patterns
|
||||
func shouldAutoDenyDomain(host string) bool {
|
||||
// Never deny our own domain
|
||||
if strings.HasSuffix(host, "1440.news") || host == "1440.news" {
|
||||
return false
|
||||
}
|
||||
// Deny domains starting with digit followed by dash (e.g., "0-example.com")
|
||||
if len(host) >= 2 && host[0] >= '0' && host[0] <= '9' && host[1] == '-' {
|
||||
return true
|
||||
}
|
||||
// Deny domains where hostname is all digits (e.g., "0000114.com")
|
||||
// Extract the part before the TLD
|
||||
dotIdx := strings.LastIndex(host, ".")
|
||||
if dotIdx > 0 {
|
||||
name := host[:dotIdx]
|
||||
// Remove any subdomain prefix (get last part before TLD)
|
||||
if lastDot := strings.LastIndex(name, "."); lastDot >= 0 {
|
||||
name = name[lastDot+1:]
|
||||
}
|
||||
// Check if it's all digits
|
||||
if len(name) > 0 {
|
||||
allDigits := true
|
||||
for _, c := range name {
|
||||
if c < '0' || c > '9' {
|
||||
allDigits = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allDigits {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -179,9 +179,12 @@ type Feed struct {
|
||||
func (c *Crawler) saveFeed(feed *Feed) error {
|
||||
// Default publishStatus to "held" if not set
|
||||
// Auto-deny feeds with no language or unsupported type
|
||||
// Auto-pass feeds from our own domain
|
||||
publishStatus := feed.PublishStatus
|
||||
if publishStatus == "" {
|
||||
if feed.Language == "" {
|
||||
if strings.HasSuffix(feed.SourceHost, "1440.news") || feed.SourceHost == "1440.news" {
|
||||
publishStatus = "pass"
|
||||
} else if feed.Language == "" {
|
||||
publishStatus = "deny"
|
||||
} else if feed.Type != "rss" && feed.Type != "atom" && feed.Type != "json" {
|
||||
publishStatus = "deny"
|
||||
|
||||
Reference in New Issue
Block a user