Remove publishing code - now handled by publish service
Publishing functionality has been moved to the standalone publish service. Removed: - publisher.go, pds_auth.go, pds_records.go, image.go, handle.go - StartPublishLoop and related functions from crawler.go - Publish loop invocation from main.go Updated CLAUDE.md to reflect the new architecture. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
-361
@@ -3,12 +3,10 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -147,365 +145,6 @@ func (c *Crawler) StartMaintenanceLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
// StartPublishLoop automatically publishes unpublished items for approved feeds
|
||||
// Grabs up to 50 items sorted by discovered_at, publishes one per second, then reloops
|
||||
func (c *Crawler) StartPublishLoop() {
|
||||
// Load PDS credentials from environment or pds.env file
|
||||
pdsHost := os.Getenv("PDS_HOST")
|
||||
pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD")
|
||||
|
||||
if pdsHost == "" || pdsAdminPassword == "" {
|
||||
if data, err := os.ReadFile("pds.env"); err == nil {
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "#") || line == "" {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
if len(parts) == 2 {
|
||||
key := strings.TrimSpace(parts[0])
|
||||
value := strings.TrimSpace(parts[1])
|
||||
switch key {
|
||||
case "PDS_HOST":
|
||||
pdsHost = value
|
||||
case "PDS_ADMIN_PASSWORD":
|
||||
pdsAdminPassword = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if pdsHost == "" || pdsAdminPassword == "" {
|
||||
fmt.Println("Publish loop: PDS credentials not configured, skipping")
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Publish loop: starting with PDS %s\n", pdsHost)
|
||||
feedPassword := "feed1440!"
|
||||
|
||||
// Cache sessions per account
|
||||
sessions := make(map[string]*PDSSession)
|
||||
publisher := NewPublisher(pdsHost)
|
||||
|
||||
// Refresh existing account profiles on startup
|
||||
c.RefreshAllProfiles(publisher, feedPassword)
|
||||
|
||||
for {
|
||||
if c.IsShuttingDown() {
|
||||
return
|
||||
}
|
||||
|
||||
// Get up to 50 unpublished items from approved feeds, sorted by discovered_at ASC
|
||||
items, err := c.GetAllUnpublishedItems(50)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish loop error: %v\n", err)
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(items) == 0 {
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
// Publish one item per second
|
||||
for _, item := range items {
|
||||
if c.IsShuttingDown() {
|
||||
return
|
||||
}
|
||||
// Get or create session for this feed's account
|
||||
account := c.getAccountForFeed(item.FeedURL)
|
||||
if account == "" {
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
session, ok := sessions[account]
|
||||
if !ok {
|
||||
// Try to log in
|
||||
session, err = publisher.CreateSession(account, feedPassword)
|
||||
if err != nil {
|
||||
// Account might not exist - try to create it
|
||||
inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish: failed to create invite for %s: %v\n", account, err)
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
email := account + "@1440.news"
|
||||
session, err = publisher.CreateAccount(account, email, feedPassword, inviteCode)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish: failed to create account %s: %v\n", account, err)
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Publish: created account %s\n", account)
|
||||
c.db.Exec("UPDATE feeds SET publish_account = $1 WHERE url = $2", account, item.FeedURL)
|
||||
|
||||
// Set up profile for new account
|
||||
feedInfo := c.getFeedInfo(item.FeedURL)
|
||||
if feedInfo != nil {
|
||||
displayName := feedInfo.Title
|
||||
if displayName == "" {
|
||||
displayName = account
|
||||
}
|
||||
// Build description with feed URL (strip HTML tags)
|
||||
description := stripHTML(feedInfo.Description)
|
||||
if description == "" {
|
||||
description = "News feed via 1440.news"
|
||||
}
|
||||
// Add feed URL as first line of description
|
||||
feedURLFull := "https://" + item.FeedURL
|
||||
description = feedURLFull + "\n\n" + description
|
||||
// Truncate if needed
|
||||
if len(displayName) > 64 {
|
||||
displayName = displayName[:61] + "..."
|
||||
}
|
||||
if len(description) > 256 {
|
||||
description = description[:253] + "..."
|
||||
}
|
||||
// Fetch and upload favicon as avatar
|
||||
var avatar *BlobRef
|
||||
faviconSource := feedInfo.SiteURL
|
||||
if faviconSource == "" {
|
||||
// Fallback to deriving from feed URL
|
||||
faviconSource = feedInfo.SourceHost
|
||||
}
|
||||
if faviconSource != "" {
|
||||
faviconURL := publisher.FetchFavicon(faviconSource)
|
||||
if faviconURL != "" {
|
||||
avatar = publisher.fetchAndUploadImage(session, faviconURL)
|
||||
}
|
||||
}
|
||||
if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil {
|
||||
fmt.Printf("Publish: failed to set profile for %s: %v\n", account, err)
|
||||
} else {
|
||||
fmt.Printf("Publish: set profile for %s\n", account)
|
||||
}
|
||||
|
||||
// Have directory account follow this new account
|
||||
if err := publisher.FollowAsDirectory(session.DID); err != nil {
|
||||
fmt.Printf("Publish: directory follow failed for %s: %v\n", account, err)
|
||||
} else {
|
||||
fmt.Printf("Publish: directory now following %s\n", account)
|
||||
}
|
||||
}
|
||||
}
|
||||
sessions[account] = session
|
||||
}
|
||||
|
||||
// Shorten URLs before publishing
|
||||
itemToPublish := item
|
||||
if item.Link != "" {
|
||||
if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil {
|
||||
fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL)
|
||||
itemToPublish.Link = shortURL
|
||||
} else {
|
||||
fmt.Printf("Publish: short URL failed for %s: %v\n", item.Link[:min(40, len(item.Link))], err)
|
||||
}
|
||||
}
|
||||
|
||||
// Publish the item
|
||||
uri, err := publisher.PublishItem(session, &itemToPublish)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err)
|
||||
// Clear session cache on auth errors
|
||||
if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") {
|
||||
delete(sessions, account)
|
||||
}
|
||||
} else {
|
||||
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
|
||||
fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account)
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// getAccountForFeed returns the publish account for a feed URL
|
||||
func (c *Crawler) getAccountForFeed(feedURL string) string {
|
||||
var account *string
|
||||
err := c.db.QueryRow(`
|
||||
SELECT publish_account FROM feeds
|
||||
WHERE url = $1 AND publish_status = 'pass' AND status = 'pass'
|
||||
`, feedURL).Scan(&account)
|
||||
if err != nil || account == nil || *account == "" {
|
||||
// Derive handle from feed URL
|
||||
return DeriveHandleFromFeed(feedURL)
|
||||
}
|
||||
return *account
|
||||
}
|
||||
|
||||
// FeedInfo holds basic feed metadata for profile setup
|
||||
type FeedInfo struct {
|
||||
Title string
|
||||
Description string
|
||||
SiteURL string
|
||||
SourceHost string
|
||||
}
|
||||
|
||||
// getFeedInfo returns feed metadata for profile setup
|
||||
func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo {
|
||||
var title, description, siteURL, sourceHost *string
|
||||
err := c.db.QueryRow(`
|
||||
SELECT title, description, site_url, domain_host as source_host FROM feeds WHERE url = $1
|
||||
`, feedURL).Scan(&title, &description, &siteURL, &sourceHost)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &FeedInfo{
|
||||
Title: StringValue(title),
|
||||
Description: StringValue(description),
|
||||
SiteURL: StringValue(siteURL),
|
||||
SourceHost: StringValue(sourceHost),
|
||||
}
|
||||
}
|
||||
|
||||
// RefreshAllProfiles updates profiles for all existing accounts with feed URLs
|
||||
func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, title, description, site_url, domain_host as source_host, publish_account
|
||||
FROM feeds
|
||||
WHERE publish_account IS NOT NULL AND publish_account <> ''
|
||||
`)
|
||||
if err != nil {
|
||||
fmt.Printf("RefreshProfiles: query error: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var feedURL, account string
|
||||
var title, description, siteURL, sourceHost *string
|
||||
if err := rows.Scan(&feedURL, &title, &description, &siteURL, &sourceHost, &account); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Login to account
|
||||
session, err := publisher.CreateSession(account, feedPassword)
|
||||
if err != nil {
|
||||
fmt.Printf("RefreshProfiles: login failed for %s: %v\n", account, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Build profile
|
||||
displayName := StringValue(title)
|
||||
if displayName == "" {
|
||||
displayName = account
|
||||
}
|
||||
desc := stripHTML(StringValue(description))
|
||||
if desc == "" {
|
||||
desc = "News feed via 1440.news"
|
||||
}
|
||||
// Add feed URL as first line
|
||||
feedURLFull := "https://" + feedURL
|
||||
desc = feedURLFull + "\n\n" + desc
|
||||
|
||||
// Truncate if needed
|
||||
if len(displayName) > 64 {
|
||||
displayName = displayName[:61] + "..."
|
||||
}
|
||||
if len(desc) > 256 {
|
||||
desc = desc[:253] + "..."
|
||||
}
|
||||
|
||||
// Fetch and upload favicon as avatar
|
||||
var avatar *BlobRef
|
||||
faviconSource := StringValue(siteURL)
|
||||
if faviconSource == "" {
|
||||
// Fallback to source host
|
||||
faviconSource = StringValue(sourceHost)
|
||||
}
|
||||
if faviconSource != "" {
|
||||
faviconURL := publisher.FetchFavicon(faviconSource)
|
||||
if faviconURL != "" {
|
||||
avatar = publisher.fetchAndUploadImage(session, faviconURL)
|
||||
}
|
||||
}
|
||||
|
||||
if err := publisher.UpdateProfile(session, displayName, desc, avatar); err != nil {
|
||||
fmt.Printf("RefreshProfiles: update failed for %s: %v\n", account, err)
|
||||
} else {
|
||||
fmt.Printf("RefreshProfiles: updated %s\n", account)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetAllUnpublishedItems returns unpublished items from all approved feeds
|
||||
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content,
|
||||
i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags,
|
||||
i.enclosure_url, i.enclosure_type, i.enclosure_length
|
||||
FROM items i
|
||||
JOIN feeds f ON i.feed_url = f.url
|
||||
WHERE f.publish_status = 'pass'
|
||||
AND f.status = 'pass'
|
||||
AND i.published_at IS NULL
|
||||
ORDER BY i.discovered_at ASC
|
||||
LIMIT $1
|
||||
`, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var items []Item
|
||||
for rows.Next() {
|
||||
var item Item
|
||||
var guid, title, link, description, content, author, imageURLsJSON, tagsJSON *string
|
||||
var pubDate, discoveredAt *time.Time
|
||||
var enclosureURL, enclosureType *string
|
||||
var enclosureLength *int64
|
||||
|
||||
err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description,
|
||||
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON,
|
||||
&enclosureURL, &enclosureType, &enclosureLength)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
item.GUID = StringValue(guid)
|
||||
item.Title = StringValue(title)
|
||||
item.Link = StringValue(link)
|
||||
item.Description = StringValue(description)
|
||||
item.Content = StringValue(content)
|
||||
item.Author = StringValue(author)
|
||||
item.PubDate = TimeValue(pubDate)
|
||||
item.DiscoveredAt = TimeValue(discoveredAt)
|
||||
|
||||
// Parse image URLs from JSON array
|
||||
if imageURLsJSON != nil && *imageURLsJSON != "" {
|
||||
json.Unmarshal([]byte(*imageURLsJSON), &item.ImageURLs)
|
||||
}
|
||||
|
||||
// Parse tags from JSON array
|
||||
if tagsJSON != nil && *tagsJSON != "" {
|
||||
json.Unmarshal([]byte(*tagsJSON), &item.Tags)
|
||||
}
|
||||
|
||||
// Parse enclosure
|
||||
if enclosureURL != nil && *enclosureURL != "" {
|
||||
item.Enclosure = &Enclosure{
|
||||
URL: *enclosureURL,
|
||||
Type: StringValue(enclosureType),
|
||||
}
|
||||
if enclosureLength != nil {
|
||||
item.Enclosure.Length = *enclosureLength
|
||||
}
|
||||
}
|
||||
|
||||
items = append(items, item)
|
||||
}
|
||||
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// dnsResolver uses local caching DNS (infra-dns) with fallback to system
|
||||
var dnsResolver = &net.Resolver{
|
||||
PreferGo: true,
|
||||
|
||||
Reference in New Issue
Block a user