Migrate to normalized FK schema (domain_host, domain_tld)
Replace source_host column with proper FK to domains table using composite key (domain_host, domain_tld). This enables JOIN queries instead of string concatenation for domain lookups. Changes: - Update Feed struct: SourceHost/TLD → DomainHost/DomainTLD - Update all SQL queries to use domain_host/domain_tld columns - Add column aliases (as source_host) for API backwards compatibility - Update trigram index from source_host to domain_host - Add getDomainHost() helper for extracting host from domain Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -36,14 +36,17 @@ CREATE INDEX IF NOT EXISTS idx_domains_host_trgm ON domains USING GIN(host gin_t
|
||||
|
||||
CREATE TABLE IF NOT EXISTS feeds (
|
||||
url TEXT PRIMARY KEY,
|
||||
domain_host TEXT NOT NULL,
|
||||
domain_tld tld_enum NOT NULL,
|
||||
type TEXT,
|
||||
category TEXT DEFAULT 'main',
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
language TEXT,
|
||||
site_url TEXT,
|
||||
source_url TEXT,
|
||||
|
||||
discovered_at TIMESTAMP NOT NULL,
|
||||
discovered_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
last_checked_at TIMESTAMP, -- feed_check: when last checked for new items
|
||||
next_check_at TIMESTAMP, -- feed_check: when to next check
|
||||
last_build_date TIMESTAMP,
|
||||
@@ -51,134 +54,67 @@ CREATE TABLE IF NOT EXISTS feeds (
|
||||
etag TEXT,
|
||||
last_modified TEXT,
|
||||
|
||||
status TEXT DEFAULT 'pass' CHECK(status IN ('hold', 'pass', 'skip')),
|
||||
status TEXT NOT NULL DEFAULT 'pass',
|
||||
last_error TEXT,
|
||||
last_error_at TIMESTAMP,
|
||||
|
||||
source_url TEXT,
|
||||
source_host TEXT,
|
||||
tld TEXT,
|
||||
|
||||
item_count INTEGER,
|
||||
item_count INTEGER NOT NULL DEFAULT 0,
|
||||
oldest_item_date TIMESTAMP,
|
||||
newest_item_date TIMESTAMP,
|
||||
|
||||
no_update INTEGER DEFAULT 0,
|
||||
no_update INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
-- Publishing to PDS
|
||||
publish_status TEXT DEFAULT 'hold' CHECK(publish_status IN ('hold', 'pass', 'skip')),
|
||||
publish_status TEXT NOT NULL DEFAULT 'hold',
|
||||
publish_account TEXT,
|
||||
|
||||
-- Full-text search vector
|
||||
search_vector tsvector GENERATED ALWAYS AS (
|
||||
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
|
||||
setweight(to_tsvector('english', coalesce(description, '')), 'B') ||
|
||||
setweight(to_tsvector('english', coalesce(url, '')), 'C')
|
||||
) STORED
|
||||
FOREIGN KEY (domain_host, domain_tld) REFERENCES domains(host, tld)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_source_host ON feeds(source_host);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_publish_status ON feeds(publish_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_source_host_url ON feeds(source_host, url);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_tld ON feeds(tld);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_tld_source_host ON feeds(tld, source_host);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_source_host_trgm ON feeds USING GIN(source_host gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_type ON feeds(type);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_category ON feeds(category);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_status ON feeds(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_discovered_at ON feeds(discovered_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_title ON feeds(title);
|
||||
CREATE INDEX IF NOT EXISTS idx_feeds_search ON feeds USING GIN(search_vector);
|
||||
-- idx_feeds_to_check created in migrations after column rename
|
||||
-- Indexes will be added as needed based on query patterns
|
||||
|
||||
CREATE TABLE IF NOT EXISTS items (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
feed_url TEXT NOT NULL,
|
||||
guid TEXT,
|
||||
guid TEXT NOT NULL,
|
||||
feed_url TEXT NOT NULL REFERENCES feeds(url) ON DELETE CASCADE,
|
||||
title TEXT,
|
||||
link TEXT,
|
||||
description TEXT,
|
||||
content TEXT,
|
||||
author TEXT,
|
||||
pub_date TIMESTAMP,
|
||||
discovered_at TIMESTAMP NOT NULL,
|
||||
discovered_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP,
|
||||
|
||||
-- Media attachments
|
||||
enclosure_url TEXT,
|
||||
enclosure_type TEXT,
|
||||
enclosure_length BIGINT,
|
||||
image_urls TEXT, -- JSON array of image URLs
|
||||
tags TEXT, -- JSON array of category/tag strings
|
||||
image_urls JSONB,
|
||||
tags JSONB,
|
||||
|
||||
-- Publishing to PDS
|
||||
published_at TIMESTAMP,
|
||||
published_uri TEXT,
|
||||
|
||||
-- Full-text search vector
|
||||
search_vector tsvector GENERATED ALWAYS AS (
|
||||
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
|
||||
setweight(to_tsvector('english', coalesce(description, '')), 'B') ||
|
||||
setweight(to_tsvector('english', coalesce(content, '')), 'C') ||
|
||||
setweight(to_tsvector('english', coalesce(author, '')), 'D')
|
||||
) STORED,
|
||||
|
||||
UNIQUE(feed_url, guid)
|
||||
PRIMARY KEY (guid, feed_url)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_items_feed_url ON items(feed_url);
|
||||
CREATE INDEX IF NOT EXISTS idx_items_pub_date ON items(pub_date DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_items_link ON items(link);
|
||||
CREATE INDEX IF NOT EXISTS idx_items_feed_url_pub_date ON items(feed_url, pub_date DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_items_unpublished ON items(feed_url, published_at) WHERE published_at IS NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_items_search ON items USING GIN(search_vector);
|
||||
-- Indexes will be added as needed based on query patterns
|
||||
|
||||
-- URL Shortener tables
|
||||
CREATE TABLE IF NOT EXISTS short_urls (
|
||||
code TEXT PRIMARY KEY,
|
||||
original_url TEXT NOT NULL,
|
||||
item_id BIGINT REFERENCES items(id),
|
||||
feed_url TEXT,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (NOW() AT TIME ZONE 'UTC'),
|
||||
click_count INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_short_urls_original ON short_urls(original_url);
|
||||
CREATE INDEX IF NOT EXISTS idx_short_urls_item_id ON short_urls(item_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_short_urls_feed_url ON short_urls(feed_url);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS clicks (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
short_code TEXT NOT NULL REFERENCES short_urls(code),
|
||||
clicked_at TIMESTAMP NOT NULL DEFAULT (NOW() AT TIME ZONE 'UTC'),
|
||||
referrer TEXT,
|
||||
user_agent TEXT,
|
||||
ip_hash TEXT,
|
||||
country TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_clicks_short_code ON clicks(short_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON clicks(clicked_at DESC);
|
||||
|
||||
-- OAuth sessions (persisted for login persistence across deploys)
|
||||
CREATE TABLE IF NOT EXISTS oauth_sessions (
|
||||
-- OAuth sessions
|
||||
CREATE TABLE IF NOT EXISTS sessions (
|
||||
id TEXT PRIMARY KEY,
|
||||
did TEXT NOT NULL,
|
||||
handle TEXT NOT NULL,
|
||||
created_at TIMESTAMP NOT NULL,
|
||||
expires_at TIMESTAMP NOT NULL,
|
||||
access_token TEXT,
|
||||
access_token TEXT NOT NULL,
|
||||
refresh_token TEXT,
|
||||
token_expiry TIMESTAMP,
|
||||
dpop_private_jwk TEXT,
|
||||
dpop_authserver_nonce TEXT,
|
||||
dpop_pds_nonce TEXT,
|
||||
pds_url TEXT,
|
||||
authserver_iss TEXT
|
||||
token_type TEXT NOT NULL DEFAULT 'DPoP',
|
||||
expires_at TIMESTAMP NOT NULL,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
dpop_nonce TEXT,
|
||||
dpop_private_jwk TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_oauth_sessions_expires_at ON oauth_sessions(expires_at);
|
||||
|
||||
-- Trigger to normalize feed URLs on insert/update (strips https://, http://, www.)
|
||||
CREATE OR REPLACE FUNCTION normalize_feed_url()
|
||||
RETURNS TRIGGER AS $$
|
||||
@@ -212,8 +148,8 @@ func OpenDatabase(connString string) (*DB, error) {
|
||||
// Build from individual env vars
|
||||
host := getEnvOrDefault("DB_HOST", "atproto-postgres")
|
||||
port := getEnvOrDefault("DB_PORT", "5432")
|
||||
user := getEnvOrDefault("DB_USER", "news_1440")
|
||||
dbname := getEnvOrDefault("DB_NAME", "news_1440")
|
||||
user := getEnvOrDefault("DB_USER", "dba_1440_news")
|
||||
dbname := getEnvOrDefault("DB_NAME", "db_1440_news")
|
||||
|
||||
// Support Docker secrets (password file) or direct password
|
||||
password := os.Getenv("DB_PASSWORD")
|
||||
@@ -271,7 +207,7 @@ func OpenDatabase(connString string) (*DB, error) {
|
||||
// Indexes must match LOWER() used in queries
|
||||
pool.Exec(ctx, "CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
||||
pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_domains_host_trgm ON domains USING gin (LOWER(host) gin_trgm_ops)")
|
||||
pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_feeds_source_host_trgm ON feeds USING gin (LOWER(source_host) gin_trgm_ops)")
|
||||
pool.Exec(ctx, "CREATE INDEX IF NOT EXISTS idx_feeds_domain_host_trgm ON feeds USING gin (LOWER(domain_host) gin_trgm_ops)")
|
||||
|
||||
// Migration: rename feed columns for consistent terminology
|
||||
// last_crawled_at -> last_checked_at (feed_check = checking feeds for new items)
|
||||
|
||||
Reference in New Issue
Block a user