Remove item ID column references - items now use composite PK (guid, feed_url)

- Remove ID field from Item struct - Remove ID field from SearchItem struct - Update all SQL queries to not select id column - Change MarkItemPublished to use feedURL/guid instead of id - Update shortener to use item_guid instead of item_id - Add migration to convert item_id to item_guid in short_urls table - Update API endpoints to use feedUrl/guid instead of itemId Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 23:51:44 -05:00
parent 7ec4207173
commit 3f277ec165
8 changed files with 93 additions and 77 deletions
@@ -504,15 +504,16 @@ func (c *Crawler) handleAPIUnpublishedItems(w http.ResponseWriter, r *http.Reque
 }

 // handleAPITestPublish tests publishing a single item to PDS
-// Requires: url (feed), itemId, handle, password, pds (optional, defaults to https://1440.news)
+// Requires: feedUrl, guid, handle, password, pds (optional, defaults to https://1440.news)
 func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
-	itemIDStr := r.URL.Query().Get("itemId")
+	feedURL := r.URL.Query().Get("feedUrl")
+	guidParam := r.URL.Query().Get("guid")
 	handle := r.URL.Query().Get("handle")
 	password := r.URL.Query().Get("password")
 	pdsHost := r.URL.Query().Get("pds")

-	if itemIDStr == "" {
-		http.Error(w, "itemId parameter required", http.StatusBadRequest)
+	if feedURL == "" || guidParam == "" {
+		http.Error(w, "feedUrl and guid parameters required", http.StatusBadRequest)
 		return
 	}
 	if handle == "" || password == "" {
@@ -523,9 +524,6 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
 		pdsHost = "https://1440.news"
 	}

-	var itemID int64
-	fmt.Sscanf(itemIDStr, "%d", &itemID)
-
 	// Get the item
 	var item Item
 	var guid, title, link, description, content, author *string
@@ -533,10 +531,10 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
 	var publishedUri *string

 	err := c.db.QueryRow(`
-		SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri
-		FROM items WHERE id = $1
-	`, itemID).Scan(
-		&item.ID, &item.FeedURL, &guid, &title, &link,
+		SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri
+		FROM items WHERE feed_url = $1 AND guid = $2
+	`, feedURL, guidParam).Scan(
+		&item.FeedURL, &guid, &title, &link,
 		&description, &content, &author, &pubDate,
 		&item.DiscoveredAt, &updatedAt, &publishedAt, &publishedUri,
 	)
@@ -571,7 +569,7 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
 	}

 	// Mark as published
-	c.MarkItemPublished(item.ID, uri)
+	c.MarkItemPublished(item.FeedURL, item.GUID, uri)

 	// Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt
 	rkeyTime := item.PubDate
@@ -580,11 +578,12 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
 	}
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(map[string]interface{}{
-		"status": "published",
-		"uri":    uri,
-		"itemId": item.ID,
-		"title":  item.Title,
-		"rkey":   GenerateRkey(item.GUID, rkeyTime),
+		"status":  "published",
+		"uri":     uri,
+		"feedUrl": item.FeedURL,
+		"guid":    item.GUID,
+		"title":   item.Title,
+		"rkey":    GenerateRkey(item.GUID, rkeyTime),
 	})
 }

@@ -643,10 +642,11 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
 	}

 	type PublishResult struct {
-		ItemID int64  `json:"item_id"`
-		Title  string `json:"title"`
-		URI    string `json:"uri,omitempty"`
-		Error  string `json:"error,omitempty"`
+		FeedURL string `json:"feed_url"`
+		GUID    string `json:"guid"`
+		Title   string `json:"title"`
+		URI     string `json:"uri,omitempty"`
+		Error   string `json:"error,omitempty"`
 	}

 	var results []PublishResult
@@ -655,8 +655,9 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {

 	for i, item := range items {
 		result := PublishResult{
-			ItemID: item.ID,
-			Title:  item.Title,
+			FeedURL: item.FeedURL,
+			GUID:    item.GUID,
+			Title:   item.Title,
 		}

 		uri, err := publisher.PublishItem(session, item)
@@ -665,7 +666,7 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
 			failed++
 		} else {
 			result.URI = uri
-			c.MarkItemPublished(item.ID, uri)
+			c.MarkItemPublished(item.FeedURL, item.GUID, uri)
 			published++
 		}

@@ -867,10 +868,11 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
 	}

 	type PublishResult struct {
-		ItemID int64  `json:"item_id"`
-		Title  string `json:"title"`
-		URI    string `json:"uri,omitempty"`
-		Error  string `json:"error,omitempty"`
+		FeedURL string `json:"feed_url"`
+		GUID    string `json:"guid"`
+		Title   string `json:"title"`
+		URI     string `json:"uri,omitempty"`
+		Error   string `json:"error,omitempty"`
 	}

 	var results []PublishResult
@@ -879,8 +881,9 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques

 	for i, item := range items {
 		result := PublishResult{
-			ItemID: item.ID,
-			Title:  item.Title,
+			FeedURL: item.FeedURL,
+			GUID:    item.GUID,
+			Title:   item.Title,
 		}

 		uri, err := publisher.PublishItem(session, item)
@@ -889,7 +892,7 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
 			failed++
 		} else {
 			result.URI = uri
-			c.MarkItemPublished(item.ID, uri)
+			c.MarkItemPublished(item.FeedURL, item.GUID, uri)
 			published++
 		}

@@ -41,7 +41,6 @@ type SearchFeed struct {
 }

 type SearchItem struct {
-	ID           int64  `json:"id"`
 	FeedURL      string `json:"feed_url"`
 	GUID         string `json:"guid"`
 	Title        string `json:"title"`
@@ -187,7 +186,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {

 	// Search items via full-text search
 	itemRows, err := c.db.Query(`
-		SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
+		SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
 		FROM items i
 		WHERE i.search_vector @@ to_tsquery('english', $1)
 		ORDER BY i.pub_date DESC
@@ -196,16 +195,14 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
 	if err == nil {
 		defer itemRows.Close()
 		for itemRows.Next() {
-			var id int64
 			var feedUrl string
 			var guid, title, link, description, content, author *string
 			var pubDate, discoveredAt, updatedAt *time.Time
-			if err := itemRows.Scan(&id, &feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
+			if err := itemRows.Scan(&feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
 				continue
 			}

 			item := SearchItem{
-				ID:          id,
 				FeedURL:     feedUrl,
 				GUID:        StringValue(guid),
 				Title:       StringValue(title),
@@ -313,7 +313,7 @@ func (c *Crawler) StartPublishLoop() {
 			// Shorten URLs before publishing
 			itemToPublish := item
 			if item.Link != "" {
-				if shortURL, err := c.GetShortURLForPost(item.Link, &item.ID, item.FeedURL); err == nil {
+				if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil {
 					fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL)
 					itemToPublish.Link = shortURL
 				} else {
@@ -324,13 +324,13 @@ func (c *Crawler) StartPublishLoop() {
 			// Publish the item
 			uri, err := publisher.PublishItem(session, &itemToPublish)
 			if err != nil {
-				fmt.Printf("Publish: failed item %d: %v\n", item.ID, err)
+				fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err)
 				// Clear session cache on auth errors
 				if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") {
 					delete(sessions, account)
 				}
 			} else {
-				c.MarkItemPublished(item.ID, uri)
+				c.MarkItemPublished(item.FeedURL, item.GUID, uri)
 				fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account)
 			}

@@ -453,7 +453,7 @@ func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string)
 // GetAllUnpublishedItems returns unpublished items from all approved feeds
 func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
 	rows, err := c.db.Query(`
-		SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content,
+		SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content,
 		       i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags,
 		       i.enclosure_url, i.enclosure_type, i.enclosure_length
 		FROM items i
@@ -477,7 +477,7 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
 		var enclosureURL, enclosureType *string
 		var enclosureLength *int64

-		err := rows.Scan(&item.ID, &item.FeedURL, &guid, &title, &link, &description,
+		err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description,
 			&content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON,
 			&enclosureURL, &enclosureType, &enclosureLength)
 		if err != nil {
@@ -174,7 +174,7 @@ func OpenDatabase(connString string) (*DB, error) {

 	// Connection pool settings
 	config.MaxConns = 10
-	config.MinConns = 2
+	config.MinConns = 0 // Don't pre-create connections to avoid schema race conditions
 	config.MaxConnLifetime = 5 * time.Minute
 	config.MaxConnIdleTime = 1 * time.Minute

@@ -193,10 +193,16 @@ func OpenDatabase(connString string) (*DB, error) {

 	db := &DB{pool}

-	// Create schema
-	if _, err := pool.Exec(ctx, schema); err != nil {
-		pool.Close()
-		return nil, fmt.Errorf("failed to create schema: %v", err)
+	// Check if schema already exists (check for domains table)
+	var tableExists bool
+	pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'domains')").Scan(&tableExists)
+
+	if !tableExists {
+		// Create schema only if tables don't exist
+		if _, err := pool.Exec(ctx, schema); err != nil {
+			pool.Close()
+			return nil, fmt.Errorf("failed to create schema: %v", err)
+		}
 	}
 	fmt.Println("  Schema OK")

@@ -291,6 +297,14 @@ func OpenDatabase(connString string) (*DB, error) {
 		pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN token_expiry TYPE TIMESTAMP USING token_expiry AT TIME ZONE 'UTC'")
 	}

+	// Migration: rename item_id to item_guid in short_urls table (items now use composite PK)
+	pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_name='short_urls' AND column_name='item_id')").Scan(&colExists)
+	if colExists {
+		// Drop the column and add item_guid instead (can't convert int64 to text meaningfully)
+		pool.Exec(ctx, "ALTER TABLE short_urls DROP COLUMN IF EXISTS item_id")
+		pool.Exec(ctx, "ALTER TABLE short_urls ADD COLUMN IF NOT EXISTS item_guid TEXT")
+	}
+
 	fmt.Println("  Schema OK")

 	// Run stats and background index creation
@@ -11,9 +11,9 @@ services:
    environment:
      DB_HOST: infra-postgres
      DB_PORT: 5432
-      DB_USER: news_1440
+      DB_USER: dba_1440_news
      DB_PASSWORD_FILE: /run/secrets/db_password
-      DB_NAME: news_1440
+      DB_NAME: db_1440_news
    secrets:
      - db_password
    volumes:
@@ -55,7 +55,7 @@ services:

 secrets:
  db_password:
-    file: ../../../infra/postgres/secrets/news_1440_password.txt
+    file: ../../../infra/postgres/secrets/dba_1440_news_password.txt

 networks:
  proxy:
@@ -17,7 +17,6 @@ type Enclosure struct {

 // Item represents an individual entry/article from a feed
 type Item struct {
-	ID           int64     `json:"id,omitempty"`
 	FeedURL      string    `json:"feed_url"`
 	GUID         string    `json:"guid,omitempty"`
 	Title        string    `json:"title,omitempty"`
@@ -176,7 +175,7 @@ func (c *Crawler) saveItems(items []*Item) error {
 // GetItemsByFeed returns all items for a specific feed
 func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) {
 	rows, err := c.db.Query(`
-		SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
+		SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
 			enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
 			published_at, published_uri
 		FROM items
@@ -196,7 +195,7 @@ func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) {
 func (c *Crawler) SearchItems(query string, limit int) ([]*Item, error) {
 	tsquery := ToSearchQuery(query)
 	rows, err := c.db.Query(`
-		SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
+		SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
 			enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
 			published_at, published_uri
 		FROM items
@@ -225,7 +224,7 @@ func scanItems(rows pgx.Rows) ([]*Item, error) {
 		var publishedUri *string

 		if err := rows.Scan(
-			&item.ID, &item.FeedURL, &guid, &title, &link,
+			&item.FeedURL, &guid, &title, &link,
 			&description, &content, &author, &pubDate,
 			&item.DiscoveredAt, &updatedAt,
 			&enclosureUrl, &enclosureType, &enclosureLength, &imageUrlsJSON, &tagsJSON,
@@ -294,7 +293,7 @@ func (c *Crawler) CleanupOldItems() (int64, error) {
 // GetUnpublishedItems returns items for a feed that haven't been published yet
 func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error) {
 	rows, err := c.db.Query(`
-		SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
+		SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
 			enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
 			published_at, published_uri
 		FROM items
@@ -311,10 +310,10 @@ func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error
 }

 // MarkItemPublished marks an item as published with the given URI
-func (c *Crawler) MarkItemPublished(itemID int64, uri string) error {
+func (c *Crawler) MarkItemPublished(feedURL, guid, uri string) error {
 	_, err := c.db.Exec(`
-		UPDATE items SET published_at = NOW(), published_uri = $1 WHERE id = $2
-	`, uri, itemID)
+		UPDATE items SET published_at = NOW(), published_uri = $1 WHERE feed_url = $2 AND guid = $3
+	`, uri, feedURL, guid)
 	return err
 }

@@ -13,7 +13,7 @@ import (
 type ShortURL struct {
 	Code        string    `json:"code"`
 	OriginalURL string    `json:"original_url"`
-	ItemID      *int64    `json:"item_id,omitempty"`
+	ItemGUID    string    `json:"item_guid,omitempty"`
 	FeedURL     string    `json:"feed_url,omitempty"`
 	CreatedAt   time.Time `json:"created_at"`
 	ClickCount  int       `json:"click_count"`
@@ -46,16 +46,19 @@ func generateShortCode(url string) string {
 }

 // CreateShortURL creates or retrieves a short URL for the given original URL
-func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL string) (*ShortURL, error) {
+func (c *Crawler) CreateShortURL(originalURL string, itemGUID, feedURL string) (*ShortURL, error) {
 	// Check if we already have this URL
 	var existing ShortURL
+	var itemGUIDPtr, feedURLPtr *string
 	err := c.db.QueryRow(`
-		SELECT code, original_url, item_id, feed_url, created_at, click_count
+		SELECT code, original_url, item_guid, feed_url, created_at, click_count
 		FROM short_urls WHERE original_url = $1
-	`, originalURL).Scan(&existing.Code, &existing.OriginalURL, &existing.ItemID,
-		&existing.FeedURL, &existing.CreatedAt, &existing.ClickCount)
+	`, originalURL).Scan(&existing.Code, &existing.OriginalURL, &itemGUIDPtr,
+		&feedURLPtr, &existing.CreatedAt, &existing.ClickCount)

 	if err == nil {
+		existing.ItemGUID = StringValue(itemGUIDPtr)
+		existing.FeedURL = StringValue(feedURLPtr)
 		return &existing, nil
 	}

@@ -85,9 +88,9 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
 	// Insert new short URL
 	now := time.Now()
 	_, err = c.db.Exec(`
-		INSERT INTO short_urls (code, original_url, item_id, feed_url, created_at, click_count)
+		INSERT INTO short_urls (code, original_url, item_guid, feed_url, created_at, click_count)
 		VALUES ($1, $2, $3, $4, $5, 0)
-	`, code, originalURL, itemID, NullableString(feedURL), now)
+	`, code, originalURL, NullableString(itemGUID), NullableString(feedURL), now)

 	if err != nil {
 		return nil, fmt.Errorf("failed to create short URL: %v", err)
@@ -96,7 +99,7 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
 	return &ShortURL{
 		Code:        code,
 		OriginalURL: originalURL,
-		ItemID:      itemID,
+		ItemGUID:    itemGUID,
 		FeedURL:     feedURL,
 		CreatedAt:   now,
 		ClickCount:  0,
@@ -106,19 +109,19 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
 // GetShortURL retrieves a short URL by code
 func (c *Crawler) GetShortURL(code string) (*ShortURL, error) {
 	var s ShortURL
-	var itemID *int64
+	var itemGUID *string
 	var feedURL *string

 	err := c.db.QueryRow(`
-		SELECT code, original_url, item_id, feed_url, created_at, click_count
+		SELECT code, original_url, item_guid, feed_url, created_at, click_count
 		FROM short_urls WHERE code = $1
-	`, code).Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount)
+	`, code).Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)

 	if err != nil {
 		return nil, err
 	}

-	s.ItemID = itemID
+	s.ItemGUID = StringValue(itemGUID)
 	s.FeedURL = StringValue(feedURL)
 	return &s, nil
 }
@@ -163,8 +166,8 @@ func (c *Crawler) RecordClick(code string, r *http.Request) error {

 // GetShortURLForPost returns the short URL string for use in posts
 // Format: https://url.1440.news/{code}
-func (c *Crawler) GetShortURLForPost(originalURL string, itemID *int64, feedURL string) (string, error) {
-	shortURL, err := c.CreateShortURL(originalURL, itemID, feedURL)
+func (c *Crawler) GetShortURLForPost(originalURL, itemGUID, feedURL string) (string, error) {
+	shortURL, err := c.CreateShortURL(originalURL, itemGUID, feedURL)
 	if err != nil {
 		return "", err
 	}
@@ -216,7 +219,7 @@ func (c *Crawler) GetRecentClicks(limit int) ([]Click, error) {
 // GetTopShortURLs returns the most clicked short URLs
 func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
 	rows, err := c.db.Query(`
-		SELECT code, original_url, item_id, feed_url, created_at, click_count
+		SELECT code, original_url, item_guid, feed_url, created_at, click_count
 		FROM short_urls
 		ORDER BY click_count DESC
 		LIMIT $1
@@ -229,15 +232,15 @@ func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
 	var urls []ShortURL
 	for rows.Next() {
 		var s ShortURL
-		var itemID *int64
+		var itemGUID *string
 		var feedURL *string

-		err := rows.Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount)
+		err := rows.Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)
 		if err != nil {
 			continue
 		}

-		s.ItemID = itemID
+		s.ItemGUID = StringValue(itemGUID)
 		s.FeedURL = StringValue(feedURL)
 		urls = append(urls, s)
 	}
@@ -445,8 +445,8 @@ const dashboardHTML = `<!DOCTYPE html>
    <title>1440.news Feed Crawler</title>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <link rel="stylesheet" href="/static/dashboard.css?v=1769995130">
-    <script src="/static/dashboard.js?v=1769995130"></script>
+    <link rel="stylesheet" href="/static/dashboard.css?v=1770006945">
+    <script src="/static/dashboard.js?v=1770006945"></script>
 </head>
 <body>
    <div id="topSection">