Remove item ID column references - items now use composite PK (guid, feed_url)

- Remove ID field from Item struct
- Remove ID field from SearchItem struct
- Update all SQL queries to not select id column
- Change MarkItemPublished to use feedURL/guid instead of id
- Update shortener to use item_guid instead of item_id
- Add migration to convert item_id to item_guid in short_urls table
- Update API endpoints to use feedUrl/guid instead of itemId

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-02-01 23:51:44 -05:00
parent 7ec4207173
commit 3f277ec165
8 changed files with 93 additions and 77 deletions
+34 -31
View File
@@ -504,15 +504,16 @@ func (c *Crawler) handleAPIUnpublishedItems(w http.ResponseWriter, r *http.Reque
}
// handleAPITestPublish tests publishing a single item to PDS
// Requires: url (feed), itemId, handle, password, pds (optional, defaults to https://1440.news)
// Requires: feedUrl, guid, handle, password, pds (optional, defaults to https://1440.news)
func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
itemIDStr := r.URL.Query().Get("itemId")
feedURL := r.URL.Query().Get("feedUrl")
guidParam := r.URL.Query().Get("guid")
handle := r.URL.Query().Get("handle")
password := r.URL.Query().Get("password")
pdsHost := r.URL.Query().Get("pds")
if itemIDStr == "" {
http.Error(w, "itemId parameter required", http.StatusBadRequest)
if feedURL == "" || guidParam == "" {
http.Error(w, "feedUrl and guid parameters required", http.StatusBadRequest)
return
}
if handle == "" || password == "" {
@@ -523,9 +524,6 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
pdsHost = "https://1440.news"
}
var itemID int64
fmt.Sscanf(itemIDStr, "%d", &itemID)
// Get the item
var item Item
var guid, title, link, description, content, author *string
@@ -533,10 +531,10 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
var publishedUri *string
err := c.db.QueryRow(`
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri
FROM items WHERE id = $1
`, itemID).Scan(
&item.ID, &item.FeedURL, &guid, &title, &link,
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri
FROM items WHERE feed_url = $1 AND guid = $2
`, feedURL, guidParam).Scan(
&item.FeedURL, &guid, &title, &link,
&description, &content, &author, &pubDate,
&item.DiscoveredAt, &updatedAt, &publishedAt, &publishedUri,
)
@@ -571,7 +569,7 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
}
// Mark as published
c.MarkItemPublished(item.ID, uri)
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
// Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt
rkeyTime := item.PubDate
@@ -580,11 +578,12 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"status": "published",
"uri": uri,
"itemId": item.ID,
"title": item.Title,
"rkey": GenerateRkey(item.GUID, rkeyTime),
"status": "published",
"uri": uri,
"feedUrl": item.FeedURL,
"guid": item.GUID,
"title": item.Title,
"rkey": GenerateRkey(item.GUID, rkeyTime),
})
}
@@ -643,10 +642,11 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
}
type PublishResult struct {
ItemID int64 `json:"item_id"`
Title string `json:"title"`
URI string `json:"uri,omitempty"`
Error string `json:"error,omitempty"`
FeedURL string `json:"feed_url"`
GUID string `json:"guid"`
Title string `json:"title"`
URI string `json:"uri,omitempty"`
Error string `json:"error,omitempty"`
}
var results []PublishResult
@@ -655,8 +655,9 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
for i, item := range items {
result := PublishResult{
ItemID: item.ID,
Title: item.Title,
FeedURL: item.FeedURL,
GUID: item.GUID,
Title: item.Title,
}
uri, err := publisher.PublishItem(session, item)
@@ -665,7 +666,7 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
failed++
} else {
result.URI = uri
c.MarkItemPublished(item.ID, uri)
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
published++
}
@@ -867,10 +868,11 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
}
type PublishResult struct {
ItemID int64 `json:"item_id"`
Title string `json:"title"`
URI string `json:"uri,omitempty"`
Error string `json:"error,omitempty"`
FeedURL string `json:"feed_url"`
GUID string `json:"guid"`
Title string `json:"title"`
URI string `json:"uri,omitempty"`
Error string `json:"error,omitempty"`
}
var results []PublishResult
@@ -879,8 +881,9 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
for i, item := range items {
result := PublishResult{
ItemID: item.ID,
Title: item.Title,
FeedURL: item.FeedURL,
GUID: item.GUID,
Title: item.Title,
}
uri, err := publisher.PublishItem(session, item)
@@ -889,7 +892,7 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
failed++
} else {
result.URI = uri
c.MarkItemPublished(item.ID, uri)
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
published++
}
+2 -5
View File
@@ -41,7 +41,6 @@ type SearchFeed struct {
}
type SearchItem struct {
ID int64 `json:"id"`
FeedURL string `json:"feed_url"`
GUID string `json:"guid"`
Title string `json:"title"`
@@ -187,7 +186,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
// Search items via full-text search
itemRows, err := c.db.Query(`
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
FROM items i
WHERE i.search_vector @@ to_tsquery('english', $1)
ORDER BY i.pub_date DESC
@@ -196,16 +195,14 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
if err == nil {
defer itemRows.Close()
for itemRows.Next() {
var id int64
var feedUrl string
var guid, title, link, description, content, author *string
var pubDate, discoveredAt, updatedAt *time.Time
if err := itemRows.Scan(&id, &feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
if err := itemRows.Scan(&feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
continue
}
item := SearchItem{
ID: id,
FeedURL: feedUrl,
GUID: StringValue(guid),
Title: StringValue(title),
+5 -5
View File
@@ -313,7 +313,7 @@ func (c *Crawler) StartPublishLoop() {
// Shorten URLs before publishing
itemToPublish := item
if item.Link != "" {
if shortURL, err := c.GetShortURLForPost(item.Link, &item.ID, item.FeedURL); err == nil {
if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil {
fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL)
itemToPublish.Link = shortURL
} else {
@@ -324,13 +324,13 @@ func (c *Crawler) StartPublishLoop() {
// Publish the item
uri, err := publisher.PublishItem(session, &itemToPublish)
if err != nil {
fmt.Printf("Publish: failed item %d: %v\n", item.ID, err)
fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err)
// Clear session cache on auth errors
if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") {
delete(sessions, account)
}
} else {
c.MarkItemPublished(item.ID, uri)
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account)
}
@@ -453,7 +453,7 @@ func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string)
// GetAllUnpublishedItems returns unpublished items from all approved feeds
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
rows, err := c.db.Query(`
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content,
SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content,
i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags,
i.enclosure_url, i.enclosure_type, i.enclosure_length
FROM items i
@@ -477,7 +477,7 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
var enclosureURL, enclosureType *string
var enclosureLength *int64
err := rows.Scan(&item.ID, &item.FeedURL, &guid, &title, &link, &description,
err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description,
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON,
&enclosureURL, &enclosureType, &enclosureLength)
if err != nil {
+19 -5
View File
@@ -174,7 +174,7 @@ func OpenDatabase(connString string) (*DB, error) {
// Connection pool settings
config.MaxConns = 10
config.MinConns = 2
config.MinConns = 0 // Don't pre-create connections to avoid schema race conditions
config.MaxConnLifetime = 5 * time.Minute
config.MaxConnIdleTime = 1 * time.Minute
@@ -193,10 +193,16 @@ func OpenDatabase(connString string) (*DB, error) {
db := &DB{pool}
// Create schema
if _, err := pool.Exec(ctx, schema); err != nil {
pool.Close()
return nil, fmt.Errorf("failed to create schema: %v", err)
// Check if schema already exists (check for domains table)
var tableExists bool
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'domains')").Scan(&tableExists)
if !tableExists {
// Create schema only if tables don't exist
if _, err := pool.Exec(ctx, schema); err != nil {
pool.Close()
return nil, fmt.Errorf("failed to create schema: %v", err)
}
}
fmt.Println(" Schema OK")
@@ -291,6 +297,14 @@ func OpenDatabase(connString string) (*DB, error) {
pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN token_expiry TYPE TIMESTAMP USING token_expiry AT TIME ZONE 'UTC'")
}
// Migration: rename item_id to item_guid in short_urls table (items now use composite PK)
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_name='short_urls' AND column_name='item_id')").Scan(&colExists)
if colExists {
// Drop the column and add item_guid instead (can't convert int64 to text meaningfully)
pool.Exec(ctx, "ALTER TABLE short_urls DROP COLUMN IF EXISTS item_id")
pool.Exec(ctx, "ALTER TABLE short_urls ADD COLUMN IF NOT EXISTS item_guid TEXT")
}
fmt.Println(" Schema OK")
// Run stats and background index creation
+3 -3
View File
@@ -11,9 +11,9 @@ services:
environment:
DB_HOST: infra-postgres
DB_PORT: 5432
DB_USER: news_1440
DB_USER: dba_1440_news
DB_PASSWORD_FILE: /run/secrets/db_password
DB_NAME: news_1440
DB_NAME: db_1440_news
secrets:
- db_password
volumes:
@@ -55,7 +55,7 @@ services:
secrets:
db_password:
file: ../../../infra/postgres/secrets/news_1440_password.txt
file: ../../../infra/postgres/secrets/dba_1440_news_password.txt
networks:
proxy:
+7 -8
View File
@@ -17,7 +17,6 @@ type Enclosure struct {
// Item represents an individual entry/article from a feed
type Item struct {
ID int64 `json:"id,omitempty"`
FeedURL string `json:"feed_url"`
GUID string `json:"guid,omitempty"`
Title string `json:"title,omitempty"`
@@ -176,7 +175,7 @@ func (c *Crawler) saveItems(items []*Item) error {
// GetItemsByFeed returns all items for a specific feed
func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) {
rows, err := c.db.Query(`
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
published_at, published_uri
FROM items
@@ -196,7 +195,7 @@ func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) {
func (c *Crawler) SearchItems(query string, limit int) ([]*Item, error) {
tsquery := ToSearchQuery(query)
rows, err := c.db.Query(`
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
published_at, published_uri
FROM items
@@ -225,7 +224,7 @@ func scanItems(rows pgx.Rows) ([]*Item, error) {
var publishedUri *string
if err := rows.Scan(
&item.ID, &item.FeedURL, &guid, &title, &link,
&item.FeedURL, &guid, &title, &link,
&description, &content, &author, &pubDate,
&item.DiscoveredAt, &updatedAt,
&enclosureUrl, &enclosureType, &enclosureLength, &imageUrlsJSON, &tagsJSON,
@@ -294,7 +293,7 @@ func (c *Crawler) CleanupOldItems() (int64, error) {
// GetUnpublishedItems returns items for a feed that haven't been published yet
func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error) {
rows, err := c.db.Query(`
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
published_at, published_uri
FROM items
@@ -311,10 +310,10 @@ func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error
}
// MarkItemPublished marks an item as published with the given URI
func (c *Crawler) MarkItemPublished(itemID int64, uri string) error {
func (c *Crawler) MarkItemPublished(feedURL, guid, uri string) error {
_, err := c.db.Exec(`
UPDATE items SET published_at = NOW(), published_uri = $1 WHERE id = $2
`, uri, itemID)
UPDATE items SET published_at = NOW(), published_uri = $1 WHERE feed_url = $2 AND guid = $3
`, uri, feedURL, guid)
return err
}
+21 -18
View File
@@ -13,7 +13,7 @@ import (
type ShortURL struct {
Code string `json:"code"`
OriginalURL string `json:"original_url"`
ItemID *int64 `json:"item_id,omitempty"`
ItemGUID string `json:"item_guid,omitempty"`
FeedURL string `json:"feed_url,omitempty"`
CreatedAt time.Time `json:"created_at"`
ClickCount int `json:"click_count"`
@@ -46,16 +46,19 @@ func generateShortCode(url string) string {
}
// CreateShortURL creates or retrieves a short URL for the given original URL
func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL string) (*ShortURL, error) {
func (c *Crawler) CreateShortURL(originalURL string, itemGUID, feedURL string) (*ShortURL, error) {
// Check if we already have this URL
var existing ShortURL
var itemGUIDPtr, feedURLPtr *string
err := c.db.QueryRow(`
SELECT code, original_url, item_id, feed_url, created_at, click_count
SELECT code, original_url, item_guid, feed_url, created_at, click_count
FROM short_urls WHERE original_url = $1
`, originalURL).Scan(&existing.Code, &existing.OriginalURL, &existing.ItemID,
&existing.FeedURL, &existing.CreatedAt, &existing.ClickCount)
`, originalURL).Scan(&existing.Code, &existing.OriginalURL, &itemGUIDPtr,
&feedURLPtr, &existing.CreatedAt, &existing.ClickCount)
if err == nil {
existing.ItemGUID = StringValue(itemGUIDPtr)
existing.FeedURL = StringValue(feedURLPtr)
return &existing, nil
}
@@ -85,9 +88,9 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
// Insert new short URL
now := time.Now()
_, err = c.db.Exec(`
INSERT INTO short_urls (code, original_url, item_id, feed_url, created_at, click_count)
INSERT INTO short_urls (code, original_url, item_guid, feed_url, created_at, click_count)
VALUES ($1, $2, $3, $4, $5, 0)
`, code, originalURL, itemID, NullableString(feedURL), now)
`, code, originalURL, NullableString(itemGUID), NullableString(feedURL), now)
if err != nil {
return nil, fmt.Errorf("failed to create short URL: %v", err)
@@ -96,7 +99,7 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
return &ShortURL{
Code: code,
OriginalURL: originalURL,
ItemID: itemID,
ItemGUID: itemGUID,
FeedURL: feedURL,
CreatedAt: now,
ClickCount: 0,
@@ -106,19 +109,19 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
// GetShortURL retrieves a short URL by code
func (c *Crawler) GetShortURL(code string) (*ShortURL, error) {
var s ShortURL
var itemID *int64
var itemGUID *string
var feedURL *string
err := c.db.QueryRow(`
SELECT code, original_url, item_id, feed_url, created_at, click_count
SELECT code, original_url, item_guid, feed_url, created_at, click_count
FROM short_urls WHERE code = $1
`, code).Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount)
`, code).Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)
if err != nil {
return nil, err
}
s.ItemID = itemID
s.ItemGUID = StringValue(itemGUID)
s.FeedURL = StringValue(feedURL)
return &s, nil
}
@@ -163,8 +166,8 @@ func (c *Crawler) RecordClick(code string, r *http.Request) error {
// GetShortURLForPost returns the short URL string for use in posts
// Format: https://url.1440.news/{code}
func (c *Crawler) GetShortURLForPost(originalURL string, itemID *int64, feedURL string) (string, error) {
shortURL, err := c.CreateShortURL(originalURL, itemID, feedURL)
func (c *Crawler) GetShortURLForPost(originalURL, itemGUID, feedURL string) (string, error) {
shortURL, err := c.CreateShortURL(originalURL, itemGUID, feedURL)
if err != nil {
return "", err
}
@@ -216,7 +219,7 @@ func (c *Crawler) GetRecentClicks(limit int) ([]Click, error) {
// GetTopShortURLs returns the most clicked short URLs
func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
rows, err := c.db.Query(`
SELECT code, original_url, item_id, feed_url, created_at, click_count
SELECT code, original_url, item_guid, feed_url, created_at, click_count
FROM short_urls
ORDER BY click_count DESC
LIMIT $1
@@ -229,15 +232,15 @@ func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
var urls []ShortURL
for rows.Next() {
var s ShortURL
var itemID *int64
var itemGUID *string
var feedURL *string
err := rows.Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount)
err := rows.Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)
if err != nil {
continue
}
s.ItemID = itemID
s.ItemGUID = StringValue(itemGUID)
s.FeedURL = StringValue(feedURL)
urls = append(urls, s)
}
+2 -2
View File
@@ -445,8 +445,8 @@ const dashboardHTML = `<!DOCTYPE html>
<title>1440.news Feed Crawler</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/static/dashboard.css?v=1769995130">
<script src="/static/dashboard.js?v=1769995130"></script>
<link rel="stylesheet" href="/static/dashboard.css?v=1770006945">
<script src="/static/dashboard.js?v=1770006945"></script>
</head>
<body>
<div id="topSection">