The enclosure length was never used when publishing to the PDS. Added migration to drop the column. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
484 lines
17 KiB
Go
484 lines
17 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5"
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
)
|
|
|
|
const schema = `
|
|
CREATE TABLE IF NOT EXISTS feeds (
|
|
url TEXT PRIMARY KEY,
|
|
type TEXT,
|
|
title TEXT,
|
|
description TEXT,
|
|
language TEXT,
|
|
|
|
last_checked_at TIMESTAMP, -- feed_check: when last checked for new items
|
|
|
|
etag TEXT,
|
|
last_modified TEXT,
|
|
|
|
-- Status: PUBLISH, STANDBY, IGNORE
|
|
status TEXT NOT NULL DEFAULT 'STANDBY',
|
|
last_error TEXT,
|
|
|
|
miss_count INTEGER NOT NULL DEFAULT 0,
|
|
|
|
-- Publishing to PDS
|
|
publish_account TEXT
|
|
);
|
|
|
|
-- Indexes will be added as needed based on query patterns
|
|
|
|
CREATE TABLE IF NOT EXISTS items (
|
|
link TEXT NOT NULL,
|
|
feed_url TEXT NOT NULL REFERENCES feeds(url) ON DELETE CASCADE,
|
|
title TEXT,
|
|
description TEXT,
|
|
author TEXT,
|
|
pub_date TIMESTAMP,
|
|
|
|
-- Media attachments
|
|
enclosure_url TEXT,
|
|
enclosure_type TEXT,
|
|
image_urls JSONB,
|
|
tags JSONB,
|
|
|
|
-- Item status: 'pass' (default, eligible for publishing), 'fail' (rejected)
|
|
status TEXT NOT NULL DEFAULT 'pass',
|
|
|
|
-- Publishing to PDS
|
|
published_at TIMESTAMP,
|
|
published_uri TEXT,
|
|
|
|
PRIMARY KEY (link, feed_url)
|
|
);
|
|
|
|
-- Indexes will be added as needed based on query patterns
|
|
|
|
-- OAuth sessions
|
|
CREATE TABLE IF NOT EXISTS oauth_sessions (
|
|
id TEXT PRIMARY KEY,
|
|
did TEXT NOT NULL,
|
|
handle TEXT NOT NULL,
|
|
access_token TEXT,
|
|
refresh_token TEXT,
|
|
token_type TEXT NOT NULL DEFAULT 'DPoP',
|
|
expires_at TIMESTAMP NOT NULL,
|
|
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
dpop_private_jwk TEXT,
|
|
dpop_authserver_nonce TEXT,
|
|
dpop_pds_nonce TEXT,
|
|
pds_url TEXT,
|
|
authserver_iss TEXT,
|
|
token_expiry TIMESTAMP
|
|
);
|
|
|
|
-- CDX parquet file processing tracker
|
|
CREATE TABLE IF NOT EXISTS cdx_parquet_files (
|
|
crawl_id TEXT NOT NULL,
|
|
file_name TEXT NOT NULL,
|
|
feeds_found INTEGER NOT NULL DEFAULT 0,
|
|
processed_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
PRIMARY KEY (crawl_id, file_name)
|
|
);
|
|
|
|
-- Trigger to normalize feed URLs on insert/update (strips https://, http://, www.)
|
|
CREATE OR REPLACE FUNCTION normalize_feed_url()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.url = regexp_replace(NEW.url, '^https?://', '');
|
|
NEW.url = regexp_replace(NEW.url, '^www\.', '');
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
DROP TRIGGER IF EXISTS normalize_feed_url_trigger ON feeds;
|
|
CREATE TRIGGER normalize_feed_url_trigger
|
|
BEFORE INSERT OR UPDATE ON feeds
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION normalize_feed_url();
|
|
`
|
|
|
|
// DB wraps pgxpool.Pool with helper methods
|
|
type DB struct {
|
|
*pgxpool.Pool
|
|
}
|
|
|
|
func OpenDatabase(connString string) (*DB, error) {
|
|
fmt.Printf("Connecting to database...\n")
|
|
|
|
// If connection string not provided, try environment variables
|
|
if connString == "" {
|
|
connString = os.Getenv("DATABASE_URL")
|
|
}
|
|
if connString == "" {
|
|
// Build from individual env vars
|
|
host := getEnvOrDefault("DB_HOST", "atproto-postgres")
|
|
port := getEnvOrDefault("DB_PORT", "5432")
|
|
user := getEnvOrDefault("DB_USER", "dba_1440_news")
|
|
dbname := getEnvOrDefault("DB_NAME", "db_1440_news")
|
|
|
|
// Support Docker secrets (password file) or direct password
|
|
password := os.Getenv("DB_PASSWORD")
|
|
if password == "" {
|
|
if passwordFile := os.Getenv("DB_PASSWORD_FILE"); passwordFile != "" {
|
|
data, err := os.ReadFile(passwordFile)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read password file: %v", err)
|
|
}
|
|
password = strings.TrimSpace(string(data))
|
|
}
|
|
}
|
|
|
|
connString = fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable",
|
|
user, url.QueryEscape(password), host, port, dbname)
|
|
}
|
|
|
|
config, err := pgxpool.ParseConfig(connString)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse connection string: %v", err)
|
|
}
|
|
|
|
// Connection pool settings
|
|
config.MaxConns = 10
|
|
config.MinConns = 0 // Don't pre-create connections to avoid schema race conditions
|
|
config.MaxConnLifetime = 5 * time.Minute
|
|
config.MaxConnIdleTime = 1 * time.Minute
|
|
|
|
ctx := context.Background()
|
|
pool, err := pgxpool.NewWithConfig(ctx, config)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to connect to database: %v", err)
|
|
}
|
|
|
|
// Verify connection
|
|
if err := pool.Ping(ctx); err != nil {
|
|
pool.Close()
|
|
return nil, fmt.Errorf("failed to ping database: %v", err)
|
|
}
|
|
fmt.Println(" Connected to PostgreSQL")
|
|
|
|
db := &DB{pool}
|
|
|
|
// Check if schema already exists (check for feeds table)
|
|
var tableExists bool
|
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'feeds')").Scan(&tableExists)
|
|
|
|
if !tableExists {
|
|
// Create schema only if tables don't exist
|
|
if _, err := pool.Exec(ctx, schema); err != nil {
|
|
pool.Close()
|
|
return nil, fmt.Errorf("failed to create schema: %v", err)
|
|
}
|
|
}
|
|
fmt.Println(" Schema OK")
|
|
|
|
// Migration: add trigram extension for fast LIKE searches
|
|
pool.Exec(ctx, "CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
|
|
|
// Migration: drop domains table (no longer used - feeds are imported from CDX)
|
|
pool.Exec(ctx, "DROP TABLE IF EXISTS domains CASCADE")
|
|
|
|
// Migration: drop domain_host and domain_tld columns from feeds (use URL for domain searches)
|
|
pool.Exec(ctx, "DROP INDEX IF EXISTS idx_feeds_domain_host_trgm")
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS domain_host")
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS domain_tld")
|
|
|
|
// Migration: drop category column from feeds (not used)
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS category")
|
|
|
|
// Migration: drop site_url column from feeds (derive from feed URL instead)
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS site_url")
|
|
|
|
// Migration: drop last_error_at column from feeds (last_checked_at + miss_count sufficient)
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS last_error_at")
|
|
|
|
// Migration: drop item_count column from feeds (compute from items table instead)
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS item_count")
|
|
|
|
// Migration: drop content column from items (only description is used for posts)
|
|
pool.Exec(ctx, "ALTER TABLE items DROP COLUMN IF EXISTS content")
|
|
|
|
// Migration: replace guid with link as primary key for items
|
|
// Check if guid column still exists
|
|
var guidExists bool
|
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_name = 'items' AND column_name = 'guid')").Scan(&guidExists)
|
|
if guidExists {
|
|
// Delete items without links (useless for publishing)
|
|
pool.Exec(ctx, "DELETE FROM items WHERE link IS NULL OR link = ''")
|
|
// Drop old primary key, drop guid, add new primary key
|
|
pool.Exec(ctx, "ALTER TABLE items DROP CONSTRAINT IF EXISTS items_pkey")
|
|
pool.Exec(ctx, "ALTER TABLE items DROP COLUMN guid")
|
|
pool.Exec(ctx, "ALTER TABLE items ADD PRIMARY KEY (link, feed_url)")
|
|
// Make link NOT NULL
|
|
pool.Exec(ctx, "ALTER TABLE items ALTER COLUMN link SET NOT NULL")
|
|
}
|
|
|
|
// Migration: rename old 'sessions' table to 'oauth_sessions'
|
|
var oldSessionsExists, newSessionsExists bool
|
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'sessions')").Scan(&oldSessionsExists)
|
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'oauth_sessions')").Scan(&newSessionsExists)
|
|
if oldSessionsExists && !newSessionsExists {
|
|
pool.Exec(ctx, "ALTER TABLE sessions RENAME TO oauth_sessions")
|
|
}
|
|
// Add token_expiry column if missing (used by OAuth library)
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ADD COLUMN IF NOT EXISTS token_expiry TIMESTAMP")
|
|
// Make access_token nullable (session created before tokens obtained)
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN access_token DROP NOT NULL")
|
|
// Add missing OAuth session columns
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ADD COLUMN IF NOT EXISTS dpop_authserver_nonce TEXT")
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ADD COLUMN IF NOT EXISTS dpop_pds_nonce TEXT")
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ADD COLUMN IF NOT EXISTS pds_url TEXT")
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ADD COLUMN IF NOT EXISTS authserver_iss TEXT")
|
|
// Drop old dpop_nonce column if it exists
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions DROP COLUMN IF EXISTS dpop_nonce")
|
|
|
|
// Migration: rename feed columns for consistent terminology
|
|
// last_crawled_at -> last_checked_at (feed_check = checking feeds for new items)
|
|
// Check if old column names exist before renaming
|
|
var colExists bool
|
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_name='feeds' AND column_name='last_crawled_at')").Scan(&colExists)
|
|
if colExists {
|
|
pool.Exec(ctx, "ALTER TABLE feeds RENAME COLUMN last_crawled_at TO last_checked_at")
|
|
}
|
|
// Drop legacy columns if they exist (no longer used)
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS next_check_at")
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS source_url")
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS discovered_at")
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS last_build_date")
|
|
pool.Exec(ctx, "ALTER TABLE items DROP COLUMN IF EXISTS discovered_at")
|
|
// Create index for feed check scheduling
|
|
pool.Exec(ctx, "DROP INDEX IF EXISTS idx_feeds_to_check")
|
|
pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_feeds_to_check ON feeds(last_checked_at NULLS FIRST, miss_count) WHERE status IN ('PUBLISH', 'STANDBY')")
|
|
// Drop old index name if it exists
|
|
pool.Exec(ctx, "DROP INDEX IF EXISTS idx_feeds_due_check")
|
|
|
|
// Migration: convert TIMESTAMPTZ to TIMESTAMP (all times are GMT)
|
|
// Helper to check if column is already TIMESTAMP (skip if already migrated)
|
|
isTimestamp := func(table, column string) bool {
|
|
var dataType string
|
|
pool.QueryRow(ctx, `
|
|
SELECT data_type FROM information_schema.columns
|
|
WHERE table_name = $1 AND column_name = $2
|
|
`, table, column).Scan(&dataType)
|
|
return dataType == "timestamp without time zone"
|
|
}
|
|
// feeds table
|
|
if !isTimestamp("feeds", "last_checked_at") {
|
|
pool.Exec(ctx, "ALTER TABLE feeds ALTER COLUMN last_checked_at TYPE TIMESTAMP USING last_checked_at AT TIME ZONE 'UTC'")
|
|
}
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS oldest_item_date")
|
|
pool.Exec(ctx, "ALTER TABLE feeds DROP COLUMN IF EXISTS newest_item_date")
|
|
// items table
|
|
if !isTimestamp("items", "pub_date") {
|
|
pool.Exec(ctx, "ALTER TABLE items ALTER COLUMN pub_date TYPE TIMESTAMP USING pub_date AT TIME ZONE 'UTC'")
|
|
}
|
|
// Migration: drop updated_at column from items (was never populated by parsers)
|
|
pool.Exec(ctx, "ALTER TABLE items DROP COLUMN IF EXISTS updated_at")
|
|
// Migration: drop enclosure_length column from items (was never used)
|
|
pool.Exec(ctx, "ALTER TABLE items DROP COLUMN IF EXISTS enclosure_length")
|
|
if !isTimestamp("items", "published_at") {
|
|
pool.Exec(ctx, "ALTER TABLE items ALTER COLUMN published_at TYPE TIMESTAMP USING published_at AT TIME ZONE 'UTC'")
|
|
}
|
|
// short_urls table
|
|
if !isTimestamp("short_urls", "created_at") {
|
|
pool.Exec(ctx, "ALTER TABLE short_urls ALTER COLUMN created_at TYPE TIMESTAMP USING created_at AT TIME ZONE 'UTC'")
|
|
}
|
|
// clicks table
|
|
if !isTimestamp("clicks", "clicked_at") {
|
|
pool.Exec(ctx, "ALTER TABLE clicks ALTER COLUMN clicked_at TYPE TIMESTAMP USING clicked_at AT TIME ZONE 'UTC'")
|
|
}
|
|
// oauth_sessions table
|
|
if !isTimestamp("oauth_sessions", "created_at") {
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN created_at TYPE TIMESTAMP USING created_at AT TIME ZONE 'UTC'")
|
|
}
|
|
if !isTimestamp("oauth_sessions", "expires_at") {
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN expires_at TYPE TIMESTAMP USING expires_at AT TIME ZONE 'UTC'")
|
|
}
|
|
if !isTimestamp("oauth_sessions", "token_expiry") {
|
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN token_expiry TYPE TIMESTAMP USING token_expiry AT TIME ZONE 'UTC'")
|
|
}
|
|
|
|
// Migration: rename item_id to item_guid in short_urls table (items now use composite PK)
|
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_name='short_urls' AND column_name='item_id')").Scan(&colExists)
|
|
if colExists {
|
|
// Drop the column and add item_guid instead (can't convert int64 to text meaningfully)
|
|
pool.Exec(ctx, "ALTER TABLE short_urls DROP COLUMN IF EXISTS item_id")
|
|
pool.Exec(ctx, "ALTER TABLE short_urls ADD COLUMN IF NOT EXISTS item_guid TEXT")
|
|
}
|
|
|
|
// Migration: add status column to items table (pass/fail for publishing)
|
|
pool.Exec(ctx, "ALTER TABLE items ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'pass'")
|
|
|
|
// Migration: add cdx_progress table to track current import progress
|
|
// current_file empty = done with this crawl
|
|
pool.Exec(ctx, `
|
|
CREATE TABLE IF NOT EXISTS cdx_progress (
|
|
id INTEGER PRIMARY KEY DEFAULT 1,
|
|
crawl_id TEXT NOT NULL,
|
|
current_file TEXT NOT NULL DEFAULT '',
|
|
total_feeds INTEGER NOT NULL DEFAULT 0,
|
|
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
|
)
|
|
`)
|
|
// Drop old tables if they exist (migrating to simpler approach)
|
|
pool.Exec(ctx, "DROP TABLE IF EXISTS cdx_imports")
|
|
pool.Exec(ctx, "DROP TABLE IF EXISTS cdx_parquet_files")
|
|
|
|
fmt.Println(" Schema OK")
|
|
|
|
// Run stats and background index creation
|
|
go func() {
|
|
var feedCount int
|
|
pool.QueryRow(context.Background(), "SELECT COUNT(*) FROM feeds").Scan(&feedCount)
|
|
fmt.Printf(" Existing data: %d feeds\n", feedCount)
|
|
|
|
fmt.Println(" Running ANALYZE...")
|
|
if _, err := pool.Exec(context.Background(), "ANALYZE"); err != nil {
|
|
fmt.Printf(" Warning: ANALYZE failed: %v\n", err)
|
|
} else {
|
|
fmt.Println(" ANALYZE complete")
|
|
}
|
|
|
|
// Create trigram index on items.title in background (CONCURRENTLY = no table lock)
|
|
// Check if index already exists first
|
|
var indexExists bool
|
|
pool.QueryRow(context.Background(),
|
|
"SELECT EXISTS(SELECT 1 FROM pg_indexes WHERE indexname = 'idx_items_title_trgm')").Scan(&indexExists)
|
|
if !indexExists {
|
|
fmt.Println(" Creating trigram index on items.title (background, may take a while)...")
|
|
if _, err := pool.Exec(context.Background(),
|
|
"CREATE INDEX CONCURRENTLY idx_items_title_trgm ON items USING gin (LOWER(title) gin_trgm_ops)"); err != nil {
|
|
fmt.Printf(" Warning: items title trigram index failed: %v\n", err)
|
|
} else {
|
|
fmt.Println(" Trigram index on items.title complete")
|
|
}
|
|
}
|
|
}()
|
|
|
|
return db, nil
|
|
}
|
|
|
|
func getEnvOrDefault(key, defaultVal string) string {
|
|
if val := os.Getenv(key); val != "" {
|
|
return val
|
|
}
|
|
return defaultVal
|
|
}
|
|
|
|
// QueryRow wraps pool.QueryRow for compatibility
|
|
func (db *DB) QueryRow(query string, args ...interface{}) pgx.Row {
|
|
return db.Pool.QueryRow(context.Background(), query, args...)
|
|
}
|
|
|
|
// Query wraps pool.Query for compatibility
|
|
func (db *DB) Query(query string, args ...interface{}) (pgx.Rows, error) {
|
|
return db.Pool.Query(context.Background(), query, args...)
|
|
}
|
|
|
|
// Exec wraps pool.Exec for compatibility
|
|
func (db *DB) Exec(query string, args ...interface{}) (int64, error) {
|
|
result, err := db.Pool.Exec(context.Background(), query, args...)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return result.RowsAffected(), nil
|
|
}
|
|
|
|
// Begin starts a transaction
|
|
func (db *DB) Begin() (pgx.Tx, error) {
|
|
return db.Pool.Begin(context.Background())
|
|
}
|
|
|
|
// Close closes the connection pool
|
|
func (db *DB) Close() error {
|
|
db.Pool.Close()
|
|
return nil
|
|
}
|
|
|
|
// NullableString returns nil for empty strings, otherwise the string pointer
|
|
func NullableString(s string) *string {
|
|
if s == "" {
|
|
return nil
|
|
}
|
|
return &s
|
|
}
|
|
|
|
// NullableTime returns nil for zero times, otherwise the time pointer
|
|
func NullableTime(t time.Time) *time.Time {
|
|
if t.IsZero() {
|
|
return nil
|
|
}
|
|
return &t
|
|
}
|
|
|
|
// StringValue returns empty string for nil, otherwise the dereferenced value
|
|
func StringValue(s *string) string {
|
|
if s == nil {
|
|
return ""
|
|
}
|
|
return *s
|
|
}
|
|
|
|
// TimeValue returns zero time for nil, otherwise the dereferenced value
|
|
func TimeValue(t *time.Time) time.Time {
|
|
if t == nil {
|
|
return time.Time{}
|
|
}
|
|
return *t
|
|
}
|
|
|
|
// ToSearchQuery converts a user query to PostgreSQL tsquery format
|
|
func ToSearchQuery(query string) string {
|
|
// Simple conversion: split on spaces and join with &
|
|
words := strings.Fields(query)
|
|
if len(words) == 0 {
|
|
return ""
|
|
}
|
|
return strings.Join(words, " & ")
|
|
}
|
|
|
|
// CDXProgress represents the current CDX import progress
|
|
type CDXProgress struct {
|
|
CrawlID string
|
|
CurrentFile string // empty = done
|
|
TotalFeeds int
|
|
}
|
|
|
|
// GetCDXProgress returns the current CDX import progress
|
|
func (db *DB) GetCDXProgress() CDXProgress {
|
|
var p CDXProgress
|
|
db.QueryRow(`
|
|
SELECT COALESCE(crawl_id, ''), COALESCE(current_file, ''), COALESCE(total_feeds, 0)
|
|
FROM cdx_progress WHERE id = 1
|
|
`).Scan(&p.CrawlID, &p.CurrentFile, &p.TotalFeeds)
|
|
return p
|
|
}
|
|
|
|
// SetCDXProgress updates the current CDX import progress
|
|
func (db *DB) SetCDXProgress(crawlID, currentFile string, totalFeeds int) error {
|
|
_, err := db.Exec(`
|
|
INSERT INTO cdx_progress (id, crawl_id, current_file, total_feeds, updated_at)
|
|
VALUES (1, $1, $2, $3, NOW())
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
crawl_id = $1,
|
|
current_file = $2,
|
|
total_feeds = $3,
|
|
updated_at = NOW()
|
|
`, crawlID, currentFile, totalFeeds)
|
|
return err
|
|
}
|
|
|
|
// CompleteCDXProgress marks the current crawl as complete (empty current_file)
|
|
func (db *DB) CompleteCDXProgress(crawlID string, totalFeeds int) error {
|
|
return db.SetCDXProgress(crawlID, "", totalFeeds)
|
|
}
|