diff --git a/db.go b/db.go index 5d1aef7..f16483e 100644 --- a/db.go +++ b/db.go @@ -27,6 +27,8 @@ CREATE TABLE IF NOT EXISTS domains ( CREATE INDEX IF NOT EXISTS idx_domains_status ON domains(status); CREATE INDEX IF NOT EXISTS idx_domains_tld ON domains(tld); CREATE INDEX IF NOT EXISTS idx_domains_feeds_found ON domains(feeds_found DESC) WHERE feeds_found > 0; +CREATE INDEX IF NOT EXISTS idx_domains_to_check ON domains(status) WHERE last_checked_at IS NULL; +CREATE INDEX IF NOT EXISTS idx_domains_to_crawl ON domains(status) WHERE last_checked_at IS NOT NULL AND last_crawled_at IS NULL; CREATE TABLE IF NOT EXISTS feeds ( url TEXT PRIMARY KEY, diff --git a/scripts/batch_update_status.sh b/scripts/batch_update_status.sh new file mode 100755 index 0000000..59e1a7e --- /dev/null +++ b/scripts/batch_update_status.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Batch update domains from hold to pass +# Usage: ./batch_update_status.sh + +BATCH_SIZE=10000 + +echo "Starting batch update: hold -> pass" + +while true; do + # Update a batch + docker exec atproto-postgres psql -U news_1440 -d news_1440 -c " + UPDATE domains SET status = 'pass' + WHERE host IN ( + SELECT host FROM domains WHERE status = 'hold' LIMIT $BATCH_SIZE + ); + " > /dev/null 2>&1 + + # Get rows remaining + REMAINING=$(docker exec atproto-postgres psql -U news_1440 -d news_1440 -t -c "SELECT COUNT(*) FROM domains WHERE status = 'hold';" | tr -d ' \n') + + echo "Remaining: $REMAINING" + + if [ "$REMAINING" = "0" ]; then + echo "Complete!" + break + fi + + sleep 0.1 +done