Remove item ID column references - items now use composite PK (guid, feed_url)
- Remove ID field from Item struct - Remove ID field from SearchItem struct - Update all SQL queries to not select id column - Change MarkItemPublished to use feedURL/guid instead of id - Update shortener to use item_guid instead of item_id - Add migration to convert item_id to item_guid in short_urls table - Update API endpoints to use feedUrl/guid instead of itemId Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+22
-19
@@ -504,15 +504,16 @@ func (c *Crawler) handleAPIUnpublishedItems(w http.ResponseWriter, r *http.Reque
|
|||||||
}
|
}
|
||||||
|
|
||||||
// handleAPITestPublish tests publishing a single item to PDS
|
// handleAPITestPublish tests publishing a single item to PDS
|
||||||
// Requires: url (feed), itemId, handle, password, pds (optional, defaults to https://1440.news)
|
// Requires: feedUrl, guid, handle, password, pds (optional, defaults to https://1440.news)
|
||||||
func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
|
func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
|
||||||
itemIDStr := r.URL.Query().Get("itemId")
|
feedURL := r.URL.Query().Get("feedUrl")
|
||||||
|
guidParam := r.URL.Query().Get("guid")
|
||||||
handle := r.URL.Query().Get("handle")
|
handle := r.URL.Query().Get("handle")
|
||||||
password := r.URL.Query().Get("password")
|
password := r.URL.Query().Get("password")
|
||||||
pdsHost := r.URL.Query().Get("pds")
|
pdsHost := r.URL.Query().Get("pds")
|
||||||
|
|
||||||
if itemIDStr == "" {
|
if feedURL == "" || guidParam == "" {
|
||||||
http.Error(w, "itemId parameter required", http.StatusBadRequest)
|
http.Error(w, "feedUrl and guid parameters required", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if handle == "" || password == "" {
|
if handle == "" || password == "" {
|
||||||
@@ -523,9 +524,6 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
|
|||||||
pdsHost = "https://1440.news"
|
pdsHost = "https://1440.news"
|
||||||
}
|
}
|
||||||
|
|
||||||
var itemID int64
|
|
||||||
fmt.Sscanf(itemIDStr, "%d", &itemID)
|
|
||||||
|
|
||||||
// Get the item
|
// Get the item
|
||||||
var item Item
|
var item Item
|
||||||
var guid, title, link, description, content, author *string
|
var guid, title, link, description, content, author *string
|
||||||
@@ -533,10 +531,10 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
|
|||||||
var publishedUri *string
|
var publishedUri *string
|
||||||
|
|
||||||
err := c.db.QueryRow(`
|
err := c.db.QueryRow(`
|
||||||
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri
|
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at, published_at, published_uri
|
||||||
FROM items WHERE id = $1
|
FROM items WHERE feed_url = $1 AND guid = $2
|
||||||
`, itemID).Scan(
|
`, feedURL, guidParam).Scan(
|
||||||
&item.ID, &item.FeedURL, &guid, &title, &link,
|
&item.FeedURL, &guid, &title, &link,
|
||||||
&description, &content, &author, &pubDate,
|
&description, &content, &author, &pubDate,
|
||||||
&item.DiscoveredAt, &updatedAt, &publishedAt, &publishedUri,
|
&item.DiscoveredAt, &updatedAt, &publishedAt, &publishedUri,
|
||||||
)
|
)
|
||||||
@@ -571,7 +569,7 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Mark as published
|
// Mark as published
|
||||||
c.MarkItemPublished(item.ID, uri)
|
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
|
||||||
|
|
||||||
// Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt
|
// Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt
|
||||||
rkeyTime := item.PubDate
|
rkeyTime := item.PubDate
|
||||||
@@ -582,7 +580,8 @@ func (c *Crawler) handleAPITestPublish(w http.ResponseWriter, r *http.Request) {
|
|||||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||||
"status": "published",
|
"status": "published",
|
||||||
"uri": uri,
|
"uri": uri,
|
||||||
"itemId": item.ID,
|
"feedUrl": item.FeedURL,
|
||||||
|
"guid": item.GUID,
|
||||||
"title": item.Title,
|
"title": item.Title,
|
||||||
"rkey": GenerateRkey(item.GUID, rkeyTime),
|
"rkey": GenerateRkey(item.GUID, rkeyTime),
|
||||||
})
|
})
|
||||||
@@ -643,7 +642,8 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type PublishResult struct {
|
type PublishResult struct {
|
||||||
ItemID int64 `json:"item_id"`
|
FeedURL string `json:"feed_url"`
|
||||||
|
GUID string `json:"guid"`
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
URI string `json:"uri,omitempty"`
|
URI string `json:"uri,omitempty"`
|
||||||
Error string `json:"error,omitempty"`
|
Error string `json:"error,omitempty"`
|
||||||
@@ -655,7 +655,8 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
for i, item := range items {
|
for i, item := range items {
|
||||||
result := PublishResult{
|
result := PublishResult{
|
||||||
ItemID: item.ID,
|
FeedURL: item.FeedURL,
|
||||||
|
GUID: item.GUID,
|
||||||
Title: item.Title,
|
Title: item.Title,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -665,7 +666,7 @@ func (c *Crawler) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) {
|
|||||||
failed++
|
failed++
|
||||||
} else {
|
} else {
|
||||||
result.URI = uri
|
result.URI = uri
|
||||||
c.MarkItemPublished(item.ID, uri)
|
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
|
||||||
published++
|
published++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -867,7 +868,8 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
|
|||||||
}
|
}
|
||||||
|
|
||||||
type PublishResult struct {
|
type PublishResult struct {
|
||||||
ItemID int64 `json:"item_id"`
|
FeedURL string `json:"feed_url"`
|
||||||
|
GUID string `json:"guid"`
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
URI string `json:"uri,omitempty"`
|
URI string `json:"uri,omitempty"`
|
||||||
Error string `json:"error,omitempty"`
|
Error string `json:"error,omitempty"`
|
||||||
@@ -879,7 +881,8 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
|
|||||||
|
|
||||||
for i, item := range items {
|
for i, item := range items {
|
||||||
result := PublishResult{
|
result := PublishResult{
|
||||||
ItemID: item.ID,
|
FeedURL: item.FeedURL,
|
||||||
|
GUID: item.GUID,
|
||||||
Title: item.Title,
|
Title: item.Title,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -889,7 +892,7 @@ func (c *Crawler) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Reques
|
|||||||
failed++
|
failed++
|
||||||
} else {
|
} else {
|
||||||
result.URI = uri
|
result.URI = uri
|
||||||
c.MarkItemPublished(item.ID, uri)
|
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
|
||||||
published++
|
published++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+2
-5
@@ -41,7 +41,6 @@ type SearchFeed struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type SearchItem struct {
|
type SearchItem struct {
|
||||||
ID int64 `json:"id"`
|
|
||||||
FeedURL string `json:"feed_url"`
|
FeedURL string `json:"feed_url"`
|
||||||
GUID string `json:"guid"`
|
GUID string `json:"guid"`
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
@@ -187,7 +186,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
// Search items via full-text search
|
// Search items via full-text search
|
||||||
itemRows, err := c.db.Query(`
|
itemRows, err := c.db.Query(`
|
||||||
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
|
SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
|
||||||
FROM items i
|
FROM items i
|
||||||
WHERE i.search_vector @@ to_tsquery('english', $1)
|
WHERE i.search_vector @@ to_tsquery('english', $1)
|
||||||
ORDER BY i.pub_date DESC
|
ORDER BY i.pub_date DESC
|
||||||
@@ -196,16 +195,14 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
defer itemRows.Close()
|
defer itemRows.Close()
|
||||||
for itemRows.Next() {
|
for itemRows.Next() {
|
||||||
var id int64
|
|
||||||
var feedUrl string
|
var feedUrl string
|
||||||
var guid, title, link, description, content, author *string
|
var guid, title, link, description, content, author *string
|
||||||
var pubDate, discoveredAt, updatedAt *time.Time
|
var pubDate, discoveredAt, updatedAt *time.Time
|
||||||
if err := itemRows.Scan(&id, &feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
|
if err := itemRows.Scan(&feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
item := SearchItem{
|
item := SearchItem{
|
||||||
ID: id,
|
|
||||||
FeedURL: feedUrl,
|
FeedURL: feedUrl,
|
||||||
GUID: StringValue(guid),
|
GUID: StringValue(guid),
|
||||||
Title: StringValue(title),
|
Title: StringValue(title),
|
||||||
|
|||||||
+5
-5
@@ -313,7 +313,7 @@ func (c *Crawler) StartPublishLoop() {
|
|||||||
// Shorten URLs before publishing
|
// Shorten URLs before publishing
|
||||||
itemToPublish := item
|
itemToPublish := item
|
||||||
if item.Link != "" {
|
if item.Link != "" {
|
||||||
if shortURL, err := c.GetShortURLForPost(item.Link, &item.ID, item.FeedURL); err == nil {
|
if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil {
|
||||||
fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL)
|
fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL)
|
||||||
itemToPublish.Link = shortURL
|
itemToPublish.Link = shortURL
|
||||||
} else {
|
} else {
|
||||||
@@ -324,13 +324,13 @@ func (c *Crawler) StartPublishLoop() {
|
|||||||
// Publish the item
|
// Publish the item
|
||||||
uri, err := publisher.PublishItem(session, &itemToPublish)
|
uri, err := publisher.PublishItem(session, &itemToPublish)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Publish: failed item %d: %v\n", item.ID, err)
|
fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err)
|
||||||
// Clear session cache on auth errors
|
// Clear session cache on auth errors
|
||||||
if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") {
|
if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") {
|
||||||
delete(sessions, account)
|
delete(sessions, account)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
c.MarkItemPublished(item.ID, uri)
|
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
|
||||||
fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account)
|
fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -453,7 +453,7 @@ func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string)
|
|||||||
// GetAllUnpublishedItems returns unpublished items from all approved feeds
|
// GetAllUnpublishedItems returns unpublished items from all approved feeds
|
||||||
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
||||||
rows, err := c.db.Query(`
|
rows, err := c.db.Query(`
|
||||||
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content,
|
SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content,
|
||||||
i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags,
|
i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags,
|
||||||
i.enclosure_url, i.enclosure_type, i.enclosure_length
|
i.enclosure_url, i.enclosure_type, i.enclosure_length
|
||||||
FROM items i
|
FROM items i
|
||||||
@@ -477,7 +477,7 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
|||||||
var enclosureURL, enclosureType *string
|
var enclosureURL, enclosureType *string
|
||||||
var enclosureLength *int64
|
var enclosureLength *int64
|
||||||
|
|
||||||
err := rows.Scan(&item.ID, &item.FeedURL, &guid, &title, &link, &description,
|
err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description,
|
||||||
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON,
|
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON,
|
||||||
&enclosureURL, &enclosureType, &enclosureLength)
|
&enclosureURL, &enclosureType, &enclosureLength)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ func OpenDatabase(connString string) (*DB, error) {
|
|||||||
|
|
||||||
// Connection pool settings
|
// Connection pool settings
|
||||||
config.MaxConns = 10
|
config.MaxConns = 10
|
||||||
config.MinConns = 2
|
config.MinConns = 0 // Don't pre-create connections to avoid schema race conditions
|
||||||
config.MaxConnLifetime = 5 * time.Minute
|
config.MaxConnLifetime = 5 * time.Minute
|
||||||
config.MaxConnIdleTime = 1 * time.Minute
|
config.MaxConnIdleTime = 1 * time.Minute
|
||||||
|
|
||||||
@@ -193,11 +193,17 @@ func OpenDatabase(connString string) (*DB, error) {
|
|||||||
|
|
||||||
db := &DB{pool}
|
db := &DB{pool}
|
||||||
|
|
||||||
// Create schema
|
// Check if schema already exists (check for domains table)
|
||||||
|
var tableExists bool
|
||||||
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'domains')").Scan(&tableExists)
|
||||||
|
|
||||||
|
if !tableExists {
|
||||||
|
// Create schema only if tables don't exist
|
||||||
if _, err := pool.Exec(ctx, schema); err != nil {
|
if _, err := pool.Exec(ctx, schema); err != nil {
|
||||||
pool.Close()
|
pool.Close()
|
||||||
return nil, fmt.Errorf("failed to create schema: %v", err)
|
return nil, fmt.Errorf("failed to create schema: %v", err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
fmt.Println(" Schema OK")
|
fmt.Println(" Schema OK")
|
||||||
|
|
||||||
// Migration: add miss_count column if not exists
|
// Migration: add miss_count column if not exists
|
||||||
@@ -291,6 +297,14 @@ func OpenDatabase(connString string) (*DB, error) {
|
|||||||
pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN token_expiry TYPE TIMESTAMP USING token_expiry AT TIME ZONE 'UTC'")
|
pool.Exec(ctx, "ALTER TABLE oauth_sessions ALTER COLUMN token_expiry TYPE TIMESTAMP USING token_expiry AT TIME ZONE 'UTC'")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Migration: rename item_id to item_guid in short_urls table (items now use composite PK)
|
||||||
|
pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_name='short_urls' AND column_name='item_id')").Scan(&colExists)
|
||||||
|
if colExists {
|
||||||
|
// Drop the column and add item_guid instead (can't convert int64 to text meaningfully)
|
||||||
|
pool.Exec(ctx, "ALTER TABLE short_urls DROP COLUMN IF EXISTS item_id")
|
||||||
|
pool.Exec(ctx, "ALTER TABLE short_urls ADD COLUMN IF NOT EXISTS item_guid TEXT")
|
||||||
|
}
|
||||||
|
|
||||||
fmt.Println(" Schema OK")
|
fmt.Println(" Schema OK")
|
||||||
|
|
||||||
// Run stats and background index creation
|
// Run stats and background index creation
|
||||||
|
|||||||
+3
-3
@@ -11,9 +11,9 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
DB_HOST: infra-postgres
|
DB_HOST: infra-postgres
|
||||||
DB_PORT: 5432
|
DB_PORT: 5432
|
||||||
DB_USER: news_1440
|
DB_USER: dba_1440_news
|
||||||
DB_PASSWORD_FILE: /run/secrets/db_password
|
DB_PASSWORD_FILE: /run/secrets/db_password
|
||||||
DB_NAME: news_1440
|
DB_NAME: db_1440_news
|
||||||
secrets:
|
secrets:
|
||||||
- db_password
|
- db_password
|
||||||
volumes:
|
volumes:
|
||||||
@@ -55,7 +55,7 @@ services:
|
|||||||
|
|
||||||
secrets:
|
secrets:
|
||||||
db_password:
|
db_password:
|
||||||
file: ../../../infra/postgres/secrets/news_1440_password.txt
|
file: ../../../infra/postgres/secrets/dba_1440_news_password.txt
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
proxy:
|
proxy:
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ type Enclosure struct {
|
|||||||
|
|
||||||
// Item represents an individual entry/article from a feed
|
// Item represents an individual entry/article from a feed
|
||||||
type Item struct {
|
type Item struct {
|
||||||
ID int64 `json:"id,omitempty"`
|
|
||||||
FeedURL string `json:"feed_url"`
|
FeedURL string `json:"feed_url"`
|
||||||
GUID string `json:"guid,omitempty"`
|
GUID string `json:"guid,omitempty"`
|
||||||
Title string `json:"title,omitempty"`
|
Title string `json:"title,omitempty"`
|
||||||
@@ -176,7 +175,7 @@ func (c *Crawler) saveItems(items []*Item) error {
|
|||||||
// GetItemsByFeed returns all items for a specific feed
|
// GetItemsByFeed returns all items for a specific feed
|
||||||
func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) {
|
func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) {
|
||||||
rows, err := c.db.Query(`
|
rows, err := c.db.Query(`
|
||||||
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
|
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
|
||||||
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
|
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
|
||||||
published_at, published_uri
|
published_at, published_uri
|
||||||
FROM items
|
FROM items
|
||||||
@@ -196,7 +195,7 @@ func (c *Crawler) GetItemsByFeed(feedURL string, limit int) ([]*Item, error) {
|
|||||||
func (c *Crawler) SearchItems(query string, limit int) ([]*Item, error) {
|
func (c *Crawler) SearchItems(query string, limit int) ([]*Item, error) {
|
||||||
tsquery := ToSearchQuery(query)
|
tsquery := ToSearchQuery(query)
|
||||||
rows, err := c.db.Query(`
|
rows, err := c.db.Query(`
|
||||||
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
|
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
|
||||||
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
|
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
|
||||||
published_at, published_uri
|
published_at, published_uri
|
||||||
FROM items
|
FROM items
|
||||||
@@ -225,7 +224,7 @@ func scanItems(rows pgx.Rows) ([]*Item, error) {
|
|||||||
var publishedUri *string
|
var publishedUri *string
|
||||||
|
|
||||||
if err := rows.Scan(
|
if err := rows.Scan(
|
||||||
&item.ID, &item.FeedURL, &guid, &title, &link,
|
&item.FeedURL, &guid, &title, &link,
|
||||||
&description, &content, &author, &pubDate,
|
&description, &content, &author, &pubDate,
|
||||||
&item.DiscoveredAt, &updatedAt,
|
&item.DiscoveredAt, &updatedAt,
|
||||||
&enclosureUrl, &enclosureType, &enclosureLength, &imageUrlsJSON, &tagsJSON,
|
&enclosureUrl, &enclosureType, &enclosureLength, &imageUrlsJSON, &tagsJSON,
|
||||||
@@ -294,7 +293,7 @@ func (c *Crawler) CleanupOldItems() (int64, error) {
|
|||||||
// GetUnpublishedItems returns items for a feed that haven't been published yet
|
// GetUnpublishedItems returns items for a feed that haven't been published yet
|
||||||
func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error) {
|
func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error) {
|
||||||
rows, err := c.db.Query(`
|
rows, err := c.db.Query(`
|
||||||
SELECT id, feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
|
SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at,
|
||||||
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
|
enclosure_url, enclosure_type, enclosure_length, image_urls, tags,
|
||||||
published_at, published_uri
|
published_at, published_uri
|
||||||
FROM items
|
FROM items
|
||||||
@@ -311,10 +310,10 @@ func (c *Crawler) GetUnpublishedItems(feedURL string, limit int) ([]*Item, error
|
|||||||
}
|
}
|
||||||
|
|
||||||
// MarkItemPublished marks an item as published with the given URI
|
// MarkItemPublished marks an item as published with the given URI
|
||||||
func (c *Crawler) MarkItemPublished(itemID int64, uri string) error {
|
func (c *Crawler) MarkItemPublished(feedURL, guid, uri string) error {
|
||||||
_, err := c.db.Exec(`
|
_, err := c.db.Exec(`
|
||||||
UPDATE items SET published_at = NOW(), published_uri = $1 WHERE id = $2
|
UPDATE items SET published_at = NOW(), published_uri = $1 WHERE feed_url = $2 AND guid = $3
|
||||||
`, uri, itemID)
|
`, uri, feedURL, guid)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+21
-18
@@ -13,7 +13,7 @@ import (
|
|||||||
type ShortURL struct {
|
type ShortURL struct {
|
||||||
Code string `json:"code"`
|
Code string `json:"code"`
|
||||||
OriginalURL string `json:"original_url"`
|
OriginalURL string `json:"original_url"`
|
||||||
ItemID *int64 `json:"item_id,omitempty"`
|
ItemGUID string `json:"item_guid,omitempty"`
|
||||||
FeedURL string `json:"feed_url,omitempty"`
|
FeedURL string `json:"feed_url,omitempty"`
|
||||||
CreatedAt time.Time `json:"created_at"`
|
CreatedAt time.Time `json:"created_at"`
|
||||||
ClickCount int `json:"click_count"`
|
ClickCount int `json:"click_count"`
|
||||||
@@ -46,16 +46,19 @@ func generateShortCode(url string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CreateShortURL creates or retrieves a short URL for the given original URL
|
// CreateShortURL creates or retrieves a short URL for the given original URL
|
||||||
func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL string) (*ShortURL, error) {
|
func (c *Crawler) CreateShortURL(originalURL string, itemGUID, feedURL string) (*ShortURL, error) {
|
||||||
// Check if we already have this URL
|
// Check if we already have this URL
|
||||||
var existing ShortURL
|
var existing ShortURL
|
||||||
|
var itemGUIDPtr, feedURLPtr *string
|
||||||
err := c.db.QueryRow(`
|
err := c.db.QueryRow(`
|
||||||
SELECT code, original_url, item_id, feed_url, created_at, click_count
|
SELECT code, original_url, item_guid, feed_url, created_at, click_count
|
||||||
FROM short_urls WHERE original_url = $1
|
FROM short_urls WHERE original_url = $1
|
||||||
`, originalURL).Scan(&existing.Code, &existing.OriginalURL, &existing.ItemID,
|
`, originalURL).Scan(&existing.Code, &existing.OriginalURL, &itemGUIDPtr,
|
||||||
&existing.FeedURL, &existing.CreatedAt, &existing.ClickCount)
|
&feedURLPtr, &existing.CreatedAt, &existing.ClickCount)
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
existing.ItemGUID = StringValue(itemGUIDPtr)
|
||||||
|
existing.FeedURL = StringValue(feedURLPtr)
|
||||||
return &existing, nil
|
return &existing, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,9 +88,9 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
|
|||||||
// Insert new short URL
|
// Insert new short URL
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
_, err = c.db.Exec(`
|
_, err = c.db.Exec(`
|
||||||
INSERT INTO short_urls (code, original_url, item_id, feed_url, created_at, click_count)
|
INSERT INTO short_urls (code, original_url, item_guid, feed_url, created_at, click_count)
|
||||||
VALUES ($1, $2, $3, $4, $5, 0)
|
VALUES ($1, $2, $3, $4, $5, 0)
|
||||||
`, code, originalURL, itemID, NullableString(feedURL), now)
|
`, code, originalURL, NullableString(itemGUID), NullableString(feedURL), now)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create short URL: %v", err)
|
return nil, fmt.Errorf("failed to create short URL: %v", err)
|
||||||
@@ -96,7 +99,7 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
|
|||||||
return &ShortURL{
|
return &ShortURL{
|
||||||
Code: code,
|
Code: code,
|
||||||
OriginalURL: originalURL,
|
OriginalURL: originalURL,
|
||||||
ItemID: itemID,
|
ItemGUID: itemGUID,
|
||||||
FeedURL: feedURL,
|
FeedURL: feedURL,
|
||||||
CreatedAt: now,
|
CreatedAt: now,
|
||||||
ClickCount: 0,
|
ClickCount: 0,
|
||||||
@@ -106,19 +109,19 @@ func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL stri
|
|||||||
// GetShortURL retrieves a short URL by code
|
// GetShortURL retrieves a short URL by code
|
||||||
func (c *Crawler) GetShortURL(code string) (*ShortURL, error) {
|
func (c *Crawler) GetShortURL(code string) (*ShortURL, error) {
|
||||||
var s ShortURL
|
var s ShortURL
|
||||||
var itemID *int64
|
var itemGUID *string
|
||||||
var feedURL *string
|
var feedURL *string
|
||||||
|
|
||||||
err := c.db.QueryRow(`
|
err := c.db.QueryRow(`
|
||||||
SELECT code, original_url, item_id, feed_url, created_at, click_count
|
SELECT code, original_url, item_guid, feed_url, created_at, click_count
|
||||||
FROM short_urls WHERE code = $1
|
FROM short_urls WHERE code = $1
|
||||||
`, code).Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount)
|
`, code).Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
s.ItemID = itemID
|
s.ItemGUID = StringValue(itemGUID)
|
||||||
s.FeedURL = StringValue(feedURL)
|
s.FeedURL = StringValue(feedURL)
|
||||||
return &s, nil
|
return &s, nil
|
||||||
}
|
}
|
||||||
@@ -163,8 +166,8 @@ func (c *Crawler) RecordClick(code string, r *http.Request) error {
|
|||||||
|
|
||||||
// GetShortURLForPost returns the short URL string for use in posts
|
// GetShortURLForPost returns the short URL string for use in posts
|
||||||
// Format: https://url.1440.news/{code}
|
// Format: https://url.1440.news/{code}
|
||||||
func (c *Crawler) GetShortURLForPost(originalURL string, itemID *int64, feedURL string) (string, error) {
|
func (c *Crawler) GetShortURLForPost(originalURL, itemGUID, feedURL string) (string, error) {
|
||||||
shortURL, err := c.CreateShortURL(originalURL, itemID, feedURL)
|
shortURL, err := c.CreateShortURL(originalURL, itemGUID, feedURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -216,7 +219,7 @@ func (c *Crawler) GetRecentClicks(limit int) ([]Click, error) {
|
|||||||
// GetTopShortURLs returns the most clicked short URLs
|
// GetTopShortURLs returns the most clicked short URLs
|
||||||
func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
|
func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
|
||||||
rows, err := c.db.Query(`
|
rows, err := c.db.Query(`
|
||||||
SELECT code, original_url, item_id, feed_url, created_at, click_count
|
SELECT code, original_url, item_guid, feed_url, created_at, click_count
|
||||||
FROM short_urls
|
FROM short_urls
|
||||||
ORDER BY click_count DESC
|
ORDER BY click_count DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
@@ -229,15 +232,15 @@ func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
|
|||||||
var urls []ShortURL
|
var urls []ShortURL
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var s ShortURL
|
var s ShortURL
|
||||||
var itemID *int64
|
var itemGUID *string
|
||||||
var feedURL *string
|
var feedURL *string
|
||||||
|
|
||||||
err := rows.Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount)
|
err := rows.Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
s.ItemID = itemID
|
s.ItemGUID = StringValue(itemGUID)
|
||||||
s.FeedURL = StringValue(feedURL)
|
s.FeedURL = StringValue(feedURL)
|
||||||
urls = append(urls, s)
|
urls = append(urls, s)
|
||||||
}
|
}
|
||||||
|
|||||||
+2
-2
@@ -445,8 +445,8 @@ const dashboardHTML = `<!DOCTYPE html>
|
|||||||
<title>1440.news Feed Crawler</title>
|
<title>1440.news Feed Crawler</title>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
<link rel="stylesheet" href="/static/dashboard.css?v=1769995130">
|
<link rel="stylesheet" href="/static/dashboard.css?v=1770006945">
|
||||||
<script src="/static/dashboard.js?v=1769995130"></script>
|
<script src="/static/dashboard.js?v=1770006945"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="topSection">
|
<div id="topSection">
|
||||||
|
|||||||
Reference in New Issue
Block a user