From 3f277ec165acc134fa2481e53107904a7f589aaa Mon Sep 17 00:00:00 2001 From: primal Date: Sun, 1 Feb 2026 23:51:44 -0500 Subject: [PATCH] Remove item ID column references - items now use composite PK (guid, feed_url) - Remove ID field from Item struct - Remove ID field from SearchItem struct - Update all SQL queries to not select id column - Change MarkItemPublished to use feedURL/guid instead of id - Update shortener to use item_guid instead of item_id - Add migration to convert item_id to item_guid in short_urls table - Update API endpoints to use feedUrl/guid instead of itemId Co-Authored-By: Claude Opus 4.5 --- api_publish.go | 65 ++++++++++++++++++++++++---------------------- api_search.go | 7 ++--- crawler.go | 10 +++---- db.go | 24 +++++++++++++---- docker-compose.yml | 6 ++--- item.go | 15 +++++------ shortener.go | 39 +++++++++++++++------------- templates.go | 4 +-- 8 files changed, 93 insertions(+), 77 deletions(-) diff --git a/api_publish.go b/api_publish.go index 176b8b4..54f9296 100644 --- a/api_publish.go +++ b/api_publish.go @@ -504,15 +504,16 @@ func (c *Crawler) handleAPIUnpublishedItems(w http.ResponseWriter, r *http.Reque } // handleAPITestPublish tests publishing a single item to PDS -// Requires: url (feed), itemId, handle, password, pds (optional, defaults to https://1440.news) +// Requires: feedUrl, guid, handle, password, pds (optional, defaults to https://1440.news) func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { - itemIDStr := r.URL.Query().Get("itemId") + feedURL := r.URL.Query().Get("feedUrl") + guidParam := r.URL.Query().Get("guid") handle := r.URL.Query().Get("handle") password := r.URL.Query().Get("password") pdsHost := r.URL.Query().Get("pds") - if itemIDStr == "" { - http.Error(w, "itemId parameter required", http.StatusBadRequest) + if feedURL == "" || guidParam == "" { + http.Error(w, "feedUrl and guid parameters required", http.StatusBadRequest) return } if handle == "" || password == "" { @@ -523,9 +524,6 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { pdsHost = "https://1440.news" } - var itemID int64 - fmt.Sscanf(itemIDStr, "%d", &itemID) - // Get the item var item Item var guid, title, link, description, content, author *string @@ -533,10 +531,10 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { var publishedUri *string err := c.db.QueryRow(` - SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri - FROM items WHERE id = $1 - `, itemID).Scan( - &item.ID, &item.FeedURL, &guid, &title, &link, + SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri + FROM items WHERE feed_url = $1 AND guid = $2 + `, feedURL, guidParam).Scan( + &item.FeedURL, &guid, &title, &link, &description, &content, &author, &pubDate, &item.DiscoveredAt, &updatedAt, &publishedAt, &publishedUri, ) @@ -571,7 +569,7 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { } // Mark as published - c.MarkItemPublished(item.ID, uri) + c.MarkItemPublished(item.FeedURL, item.GUID, uri) // Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt rkeyTime := item.PubDate @@ -580,11 +578,12 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ - "status": "published", - "uri": uri, - "itemId": item.ID, - "title": item.Title, - "rkey": GenerateRkey(item.GUID, rkeyTime), + "status": "published", + "uri": uri, + "feedUrl": item.FeedURL, + "guid": item.GUID, + "title": item.Title, + "rkey": GenerateRkey(item.GUID, rkeyTime), }) } @@ -643,10 +642,11 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) { } type PublishResult struct { - ItemID int64 `json:"item_id"` - Title string `json:"title"` - URI string `json:"uri,omitempty"` - Error string `json:"error,omitempty"` + FeedURL string `json:"feed_url"` + GUID string `json:"guid"` + Title string `json:"title"` + URI string `json:"uri,omitempty"` + Error string `json:"error,omitempty"` } var results []PublishResult @@ -655,8 +655,9 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) { for i, item := range items { result := PublishResult{ - ItemID: item.ID, - Title: item.Title, + FeedURL: item.FeedURL, + GUID: item.GUID, + Title: item.Title, } uri, err := publisher.PublishItem(session, item) @@ -665,7 +666,7 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) { failed++ } else { result.URI = uri - c.MarkItemPublished(item.ID, uri) + c.MarkItemPublished(item.FeedURL, item.GUID, uri) published++ } @@ -867,10 +868,11 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques } type PublishResult struct { - ItemID int64 `json:"item_id"` - Title string `json:"title"` - URI string `json:"uri,omitempty"` - Error string `json:"error,omitempty"` + FeedURL string `json:"feed_url"` + GUID string `json:"guid"` + Title string `json:"title"` + URI string `json:"uri,omitempty"` + Error string `json:"error,omitempty"` } var results []PublishResult @@ -879,8 +881,9 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques for i, item := range items { result := PublishResult{ - ItemID: item.ID, - Title: item.Title, + FeedURL: item.FeedURL, + GUID: item.GUID, + Title: item.Title, } uri, err := publisher.PublishItem(session, item) @@ -889,7 +892,7 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques failed++ } else { result.URI = uri - c.MarkItemPublished(item.ID, uri) + c.MarkItemPublished(item.FeedURL, item.GUID, uri) published++ } diff --git a/api_search.go b/api_search.go index 4a9b003..f6945c5 100644 --- a/api_search.go +++ b/api_search.go @@ -41,7 +41,6 @@ type SearchFeed struct { } type SearchItem struct { - ID int64 `json:"id"` FeedURL string `json:"feed_url"` GUID string `json:"guid"` Title string `json:"title"` @@ -187,7 +186,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) { // Search items via full-text search itemRows, err := c.db.Query(` - SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at + SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at FROM items i WHERE i.search_vector @@ to_tsquery('english', $1) ORDER BY i.pub_date DESC @@ -196,16 +195,14 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) { if err == nil { defer itemRows.Close() for itemRows.Next() { - var id int64 var feedUrl string var guid, title, link, description, content, author *string var pubDate, discoveredAt, updatedAt *time.Time - if err := itemRows.Scan(&id, &feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil { + if err := itemRows.Scan(&feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil { continue } item := SearchItem{ - ID: id, FeedURL: feedUrl, GUID: StringValue(guid), Title: StringValue(title), diff --git a/crawler.go b/crawler.go index 0bee146..bc6be12 100644 --- a/crawler.go +++ b/crawler.go @@ -313,7 +313,7 @@ func (c *Crawler) StartPublishLoop() { // Shorten URLs before publishing itemToPublish := item if item.Link != "" { - if shortURL, err := c.GetShortURLForPost(item.Link, &item.ID, item.FeedURL); err == nil { + if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil { fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL) itemToPublish.Link = shortURL } else { @@ -324,13 +324,13 @@ func (c *Crawler) StartPublishLoop() { // Publish the item uri, err := publisher.PublishItem(session, &itemToPublish) if err != nil { - fmt.Printf("Publish: failed item %d: %v\n", item.ID, err) + fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err) // Clear session cache on auth errors if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") { delete(sessions, account) } } else { - c.MarkItemPublished(item.ID, uri) + c.MarkItemPublished(item.FeedURL, item.GUID, uri) fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account) } @@ -453,7 +453,7 @@ func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) // GetAllUnpublishedItems returns unpublished items from all approved feeds func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) { rows, err := c.db.Query(` - SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content, + SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags, i.enclosure_url, i.enclosure_type, i.enclosure_length FROM items i @@ -477,7 +477,7 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) { var enclosureURL, enclosureType *string var enclosureLength *int64 - err := rows.Scan(&item.ID, &item.FeedURL, &guid, &title, &link, &description, + err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON, &enclosureURL, &enclosureType, &enclosureLength) if err != nil { diff --git a/db.go b/db.go index f8738ad..00b14f2 100644 --- a/db.go +++ b/db.go @@ -174,7 +174,7 @@ func OpenDatabase(connString string) (*DB, error) { // Connection pool settings config.MaxConns = 10 - config.MinConns = 2 + config.MinConns = 0 // Don't pre-create connections to avoid schema race conditions config.MaxConnLifetime = 5 * time.Minute config.MaxConnIdleTime = 1 * time.Minute @@ -193,10 +193,16 @@ func OpenDatabase(connString string) (*DB, error) { db := &DB{pool} - // Create schema - if _, err := pool.Exec(ctx, schema); err != nil { - pool.Close() - return nil, fmt.Errorf("failed to create schema: %v", err) + // Check if schema already exists (check for domains table) + var tableExists bool + pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'domains')").Scan(&tableExists) + + if !tableExists { + // Create schema only if tables don't exist + if _, err := pool.Exec(ctx, schema); err != nil { + pool.Close() + return nil, fmt.Errorf("failed to create schema: %v", err) + } } fmt.Println(" Schema OK") @@ -291,6 +297,14 @@ func OpenDatabase(connString string) (*DB, error) { pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN token_expiry TYPE TIMESTAMP USING token_expiry AT TIME ZONE 'UTC'") } + // Migration: rename item_id to item_guid in short_urls table (items now use composite PK) + pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_name='short_urls' AND column_name='item_id')").Scan(&colExists) + if colExists { + // Drop the column and add item_guid instead (can't convert int64 to text meaningfully) + pool.Exec(ctx, "ALTER TABLE short_urls DROP COLUMN IF EXISTS item_id") + pool.Exec(ctx, "ALTER TABLE short_urls ADD COLUMN IF NOT EXISTS item_guid TEXT") + } + fmt.Println(" Schema OK") // Run stats and background index creation diff --git a/docker-compose.yml b/docker-compose.yml index e8265d6..570e192 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,9 +11,9 @@ services: environment: DB_HOST: infra-postgres DB_PORT: 5432 - DB_USER: news_1440 + DB_USER: dba_1440_news DB_PASSWORD_FILE: /run/secrets/db_password - DB_NAME: news_1440 + DB_NAME: db_1440_news secrets: - db_password volumes: @@ -55,7 +55,7 @@ services: secrets: db_password: - file: ../../../infra/postgres/secrets/news_1440_password.txt + file: ../../../infra/postgres/secrets/dba_1440_news_password.txt networks: proxy: diff --git a/item.go b/item.go index 7364a47..8101e69 100644 --- a/item.go +++ b/item.go @@ -17,7 +17,6 @@ type Enclosure struct { // Item represents an individual entry/article from a feed type Item struct { - ID int64 `json:"id,omitempty"` FeedURL string `json:"feed_url"` GUID string `json:"guid,omitempty"` Title string `json:"title,omitempty"` @@ -176,7 +175,7 @@ func (c *Crawler) saveItems(items []*Item) error { // GetItemsByFeed returns all items for a specific feed func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) { rows, err := c.db.Query(` - SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, + SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, enclosure_url, enclosure_type, enclosure_length, image_urls, tags, published_at, published_uri FROM items @@ -196,7 +195,7 @@ func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) { func (c *Crawler) SearchItems(query string, limit int) ([]*Item, error) { tsquery := ToSearchQuery(query) rows, err := c.db.Query(` - SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, + SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, enclosure_url, enclosure_type, enclosure_length, image_urls, tags, published_at, published_uri FROM items @@ -225,7 +224,7 @@ func scanItems(rows pgx.Rows) ([]*Item, error) { var publishedUri *string if err := rows.Scan( - &item.ID, &item.FeedURL, &guid, &title, &link, + &item.FeedURL, &guid, &title, &link, &description, &content, &author, &pubDate, &item.DiscoveredAt, &updatedAt, &enclosureUrl, &enclosureType, &enclosureLength, &imageUrlsJSON, &tagsJSON, @@ -294,7 +293,7 @@ func (c *Crawler) CleanupOldItems() (int64, error) { // GetUnpublishedItems returns items for a feed that haven't been published yet func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error) { rows, err := c.db.Query(` - SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, + SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, enclosure_url, enclosure_type, enclosure_length, image_urls, tags, published_at, published_uri FROM items @@ -311,10 +310,10 @@ func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error } // MarkItemPublished marks an item as published with the given URI -func (c *Crawler) MarkItemPublished(itemID int64, uri string) error { +func (c *Crawler) MarkItemPublished(feedURL, guid, uri string) error { _, err := c.db.Exec(` - UPDATE items SET published_at = NOW(), published_uri = $1 WHERE id = $2 - `, uri, itemID) + UPDATE items SET published_at = NOW(), published_uri = $1 WHERE feed_url = $2 AND guid = $3 + `, uri, feedURL, guid) return err } diff --git a/shortener.go b/shortener.go index de50143..ce94624 100644 --- a/shortener.go +++ b/shortener.go @@ -13,7 +13,7 @@ import ( type ShortURL struct { Code string `json:"code"` OriginalURL string `json:"original_url"` - ItemID *int64 `json:"item_id,omitempty"` + ItemGUID string `json:"item_guid,omitempty"` FeedURL string `json:"feed_url,omitempty"` CreatedAt time.Time `json:"created_at"` ClickCount int `json:"click_count"` @@ -46,16 +46,19 @@ func generateShortCode(url string) string { } // CreateShortURL creates or retrieves a short URL for the given original URL -func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL string) (*ShortURL, error) { +func (c *Crawler) CreateShortURL(originalURL string, itemGUID, feedURL string) (*ShortURL, error) { // Check if we already have this URL var existing ShortURL + var itemGUIDPtr, feedURLPtr *string err := c.db.QueryRow(` - SELECT code, original_url, item_id, feed_url, created_at, click_count + SELECT code, original_url, item_guid, feed_url, created_at, click_count FROM short_urls WHERE original_url = $1 - `, originalURL).Scan(&existing.Code, &existing.OriginalURL, &existing.ItemID, - &existing.FeedURL, &existing.CreatedAt, &existing.ClickCount) + `, originalURL).Scan(&existing.Code, &existing.OriginalURL, &itemGUIDPtr, + &feedURLPtr, &existing.CreatedAt, &existing.ClickCount) if err == nil { + existing.ItemGUID = StringValue(itemGUIDPtr) + existing.FeedURL = StringValue(feedURLPtr) return &existing, nil } @@ -85,9 +88,9 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri // Insert new short URL now := time.Now() _, err = c.db.Exec(` - INSERT INTO short_urls (code, original_url, item_id, feed_url, created_at, click_count) + INSERT INTO short_urls (code, original_url, item_guid, feed_url, created_at, click_count) VALUES ($1, $2, $3, $4, $5, 0) - `, code, originalURL, itemID, NullableString(feedURL), now) + `, code, originalURL, NullableString(itemGUID), NullableString(feedURL), now) if err != nil { return nil, fmt.Errorf("failed to create short URL: %v", err) @@ -96,7 +99,7 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri return &ShortURL{ Code: code, OriginalURL: originalURL, - ItemID: itemID, + ItemGUID: itemGUID, FeedURL: feedURL, CreatedAt: now, ClickCount: 0, @@ -106,19 +109,19 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri // GetShortURL retrieves a short URL by code func (c *Crawler) GetShortURL(code string) (*ShortURL, error) { var s ShortURL - var itemID *int64 + var itemGUID *string var feedURL *string err := c.db.QueryRow(` - SELECT code, original_url, item_id, feed_url, created_at, click_count + SELECT code, original_url, item_guid, feed_url, created_at, click_count FROM short_urls WHERE code = $1 - `, code).Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount) + `, code).Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount) if err != nil { return nil, err } - s.ItemID = itemID + s.ItemGUID = StringValue(itemGUID) s.FeedURL = StringValue(feedURL) return &s, nil } @@ -163,8 +166,8 @@ func (c *Crawler) RecordClick(code string, r *http.Request) error { // GetShortURLForPost returns the short URL string for use in posts // Format: https://url.1440.news/{code} -func (c *Crawler) GetShortURLForPost(originalURL string, itemID *int64, feedURL string) (string, error) { - shortURL, err := c.CreateShortURL(originalURL, itemID, feedURL) +func (c *Crawler) GetShortURLForPost(originalURL, itemGUID, feedURL string) (string, error) { + shortURL, err := c.CreateShortURL(originalURL, itemGUID, feedURL) if err != nil { return "", err } @@ -216,7 +219,7 @@ func (c *Crawler) GetRecentClicks(limit int) ([]Click, error) { // GetTopShortURLs returns the most clicked short URLs func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) { rows, err := c.db.Query(` - SELECT code, original_url, item_id, feed_url, created_at, click_count + SELECT code, original_url, item_guid, feed_url, created_at, click_count FROM short_urls ORDER BY click_count DESC LIMIT $1 @@ -229,15 +232,15 @@ func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) { var urls []ShortURL for rows.Next() { var s ShortURL - var itemID *int64 + var itemGUID *string var feedURL *string - err := rows.Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount) + err := rows.Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount) if err != nil { continue } - s.ItemID = itemID + s.ItemGUID = StringValue(itemGUID) s.FeedURL = StringValue(feedURL) urls = append(urls, s) } diff --git a/templates.go b/templates.go index f7b1efe..55ed57f 100644 --- a/templates.go +++ b/templates.go @@ -445,8 +445,8 @@ const dashboardHTML = ` 1440.news Feed Crawler - - + +