Add partial indexes for domain check and crawl loops

- idx_domains_to_check: status WHERE last_checked_at IS NULL
- idx_domains_to_crawl: status WHERE last_checked_at IS NOT NULL AND last_crawled_at IS NULL

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-01-30 13:25:04 -05:00
parent 522233c4a2
commit eb83ca3e5d
2 changed files with 31 additions and 0 deletions
+29
View File
@@ -0,0 +1,29 @@
#!/bin/bash
# Batch update domains from hold to pass
# Usage: ./batch_update_status.sh
BATCH_SIZE=10000
echo "Starting batch update: hold -> pass"
while true; do
# Update a batch
docker exec atproto-postgres psql -U news_1440 -d news_1440 -c "
UPDATE domains SET status = 'pass'
WHERE host IN (
SELECT host FROM domains WHERE status = 'hold' LIMIT $BATCH_SIZE
);
" > /dev/null 2>&1
# Get rows remaining
REMAINING=$(docker exec atproto-postgres psql -U news_1440 -d news_1440 -t -c "SELECT COUNT(*) FROM domains WHERE status = 'hold';" | tr -d ' \n')
echo "Remaining: $REMAINING"
if [ "$REMAINING" = "0" ]; then
echo "Complete!"
break
fi
sleep 0.1
done