Publishing: - Add publisher.go for posting feed items to AT Protocol PDS - Support deterministic rkeys from SHA256(guid + discoveredAt) - Handle multiple URLs in posts with facets for each link - Image embed support (app.bsky.embed.images) for up to 4 images - External embed with thumbnail fallback - Podcast/audio enclosure URLs included in post text Media extraction: - Parse RSS enclosures (audio, video, images) - Extract Media RSS content and thumbnails - Extract images from HTML content in descriptions - Store enclosure and imageUrls in items table SQLite stability improvements: - Add synchronous=NORMAL and wal_autocheckpoint pragmas - Connection pool tuning (idle conns, max lifetime) - Periodic WAL checkpoint every 5 minutes - Hourly integrity checks with PRAGMA quick_check - Daily hot backup via VACUUM INTO - Docker stop_grace_period: 30s for graceful shutdown Dashboard: - Feed publishing UI and API endpoints - Account creation with invite codes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
70 lines
1.7 KiB
Go
70 lines
1.7 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
)
|
|
|
|
func main() {
|
|
// Ensure feeds directory exists
|
|
if err := os.MkdirAll("feeds", 0755); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error creating feeds directory: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
crawler, err := NewCrawler("feeds/feeds.db")
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error initializing crawler: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Setup graceful shutdown
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
|
|
// Start dashboard in background
|
|
go func() {
|
|
if err := crawler.StartDashboard("0.0.0.0:4321"); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Dashboard error: %v\n", err)
|
|
}
|
|
}()
|
|
|
|
// Initialize stats in background (can be slow with large DBs)
|
|
go crawler.UpdateStats()
|
|
|
|
// Start all loops independently
|
|
fmt.Println("Starting import, crawl, check, and stats loops...")
|
|
|
|
// Import loop (background)
|
|
go crawler.ImportDomainsInBackground("vertices.txt.gz")
|
|
|
|
// Check loop (background)
|
|
go crawler.StartCheckLoop()
|
|
|
|
// Stats loop (background) - updates once per minute
|
|
go crawler.StartStatsLoop()
|
|
|
|
// Cleanup loop (background) - removes old items once per week
|
|
go crawler.StartCleanupLoop()
|
|
|
|
// Maintenance loop (background) - WAL checkpoints and integrity checks
|
|
go crawler.StartMaintenanceLoop()
|
|
|
|
// Crawl loop (background)
|
|
go crawler.StartCrawlLoop()
|
|
|
|
// Wait for shutdown signal
|
|
sig := <-sigChan
|
|
fmt.Printf("\nReceived %v, shutting down gracefully...\n", sig)
|
|
|
|
// Close crawler (checkpoints WAL and closes database)
|
|
if err := crawler.Close(); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error closing crawler: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
fmt.Println("Shutdown complete")
|
|
}
|