Add AT Protocol publishing, media support, and SQLite stability

Publishing:
- Add publisher.go for posting feed items to AT Protocol PDS
- Support deterministic rkeys from SHA256(guid + discoveredAt)
- Handle multiple URLs in posts with facets for each link
- Image embed support (app.bsky.embed.images) for up to 4 images
- External embed with thumbnail fallback
- Podcast/audio enclosure URLs included in post text

Media extraction:
- Parse RSS enclosures (audio, video, images)
- Extract Media RSS content and thumbnails
- Extract images from HTML content in descriptions
- Store enclosure and imageUrls in items table

SQLite stability improvements:
- Add synchronous=NORMAL and wal_autocheckpoint pragmas
- Connection pool tuning (idle conns, max lifetime)
- Periodic WAL checkpoint every 5 minutes
- Hourly integrity checks with PRAGMA quick_check
- Daily hot backup via VACUUM INTO
- Docker stop_grace_period: 30s for graceful shutdown

Dashboard:
- Feed publishing UI and API endpoints
- Account creation with invite codes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-01-28 15:30:02 -05:00
parent aa6f571215
commit 75835d771d
11 changed files with 3723 additions and 635 deletions
+60 -3
View File
@@ -61,6 +61,13 @@ func NewCrawler(dbPath string) (*Crawler, error) {
func (c *Crawler) Close() error {
if c.db != nil {
// Checkpoint WAL to merge it back into main database before closing
// This prevents corruption if the container is stopped mid-write
fmt.Println("Checkpointing WAL...")
if _, err := c.db.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
fmt.Printf("WAL checkpoint warning: %v\n", err)
}
fmt.Println("Closing database...")
return c.db.Close()
}
return nil
@@ -87,6 +94,56 @@ func (c *Crawler) StartCleanupLoop() {
}
}
// StartMaintenanceLoop performs periodic database maintenance
// - WAL checkpoint every 5 minutes to prevent WAL bloat and reduce corruption risk
// - Quick integrity check every hour to detect issues early
// - Hot backup every 24 hours for recovery
func (c *Crawler) StartMaintenanceLoop() {
checkpointTicker := time.NewTicker(5 * time.Minute)
integrityTicker := time.NewTicker(1 * time.Hour)
backupTicker := time.NewTicker(24 * time.Hour)
defer checkpointTicker.Stop()
defer integrityTicker.Stop()
defer backupTicker.Stop()
for {
select {
case <-checkpointTicker.C:
// Passive checkpoint - doesn't block writers
if _, err := c.db.Exec("PRAGMA wal_checkpoint(PASSIVE)"); err != nil {
fmt.Printf("WAL checkpoint error: %v\n", err)
}
case <-integrityTicker.C:
// Quick check is faster than full integrity_check
var result string
if err := c.db.QueryRow("PRAGMA quick_check").Scan(&result); err != nil {
fmt.Printf("Integrity check error: %v\n", err)
} else if result != "ok" {
fmt.Printf("WARNING: Database integrity issue detected: %s\n", result)
}
case <-backupTicker.C:
c.createBackup()
}
}
}
// createBackup creates a hot backup of the database using SQLite's backup API
func (c *Crawler) createBackup() {
backupPath := "feeds/feeds.db.backup"
fmt.Println("Creating database backup...")
// Use SQLite's online backup via VACUUM INTO (available in SQLite 3.27+)
// This creates a consistent snapshot without blocking writers
if _, err := c.db.Exec("VACUUM INTO ?", backupPath); err != nil {
fmt.Printf("Backup error: %v\n", err)
return
}
fmt.Printf("Backup created: %s\n", backupPath)
}
// StartCrawlLoop runs the domain crawling loop independently
func (c *Crawler) StartCrawlLoop() {
numWorkers := runtime.NumCPU()
@@ -113,9 +170,9 @@ func (c *Crawler) StartCrawlLoop() {
}()
}
const fetchSize = 100
const fetchSize = 1000
for {
domains, err := c.GetUncheckedDomainsRandom(fetchSize)
domains, err := c.GetUncheckedDomains(fetchSize)
if err != nil {
fmt.Printf("Error fetching domains: %v\n", err)
}
@@ -155,7 +212,7 @@ func (c *Crawler) StartCheckLoop() {
}()
}
const fetchSize = 100
const fetchSize = 1000
for {
feeds, err := c.GetFeedsDueForCheck(fetchSize)
if err != nil {