Files
crawler/scripts/batch_update_status.sh
primal eb83ca3e5d Add partial indexes for domain check and crawl loops
- idx_domains_to_check: status WHERE last_checked_at IS NULL
- idx_domains_to_crawl: status WHERE last_checked_at IS NOT NULL AND last_crawled_at IS NULL

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 13:25:04 -05:00

30 lines
747 B
Bash
Executable File

#!/bin/bash
# Batch update domains from hold to pass
# Usage: ./batch_update_status.sh
BATCH_SIZE=10000
echo "Starting batch update: hold -> pass"
while true; do
# Update a batch
docker exec atproto-postgres psql -U news_1440 -d news_1440 -c "
UPDATE domains SET status = 'pass'
WHERE host IN (
SELECT host FROM domains WHERE status = 'hold' LIMIT $BATCH_SIZE
);
" > /dev/null 2>&1
# Get rows remaining
REMAINING=$(docker exec atproto-postgres psql -U news_1440 -d news_1440 -t -c "SELECT COUNT(*) FROM domains WHERE status = 'hold';" | tr -d ' \n')
echo "Remaining: $REMAINING"
if [ "$REMAINING" = "0" ]; then
echo "Complete!"
break
fi
sleep 0.1
done