Add AT Protocol publishing, media support, and SQLite stability
Publishing: - Add publisher.go for posting feed items to AT Protocol PDS - Support deterministic rkeys from SHA256(guid + discoveredAt) - Handle multiple URLs in posts with facets for each link - Image embed support (app.bsky.embed.images) for up to 4 images - External embed with thumbnail fallback - Podcast/audio enclosure URLs included in post text Media extraction: - Parse RSS enclosures (audio, video, images) - Extract Media RSS content and thumbnails - Extract images from HTML content in descriptions - Store enclosure and imageUrls in items table SQLite stability improvements: - Add synchronous=NORMAL and wal_autocheckpoint pragmas - Connection pool tuning (idle conns, max lifetime) - Periodic WAL checkpoint every 5 minutes - Hourly integrity checks with PRAGMA quick_check - Daily hot backup via VACUUM INTO - Docker stop_grace_period: 30s for graceful shutdown Dashboard: - Feed publishing UI and API endpoints - Account creation with invite codes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+60
-3
@@ -61,6 +61,13 @@ func NewCrawler(dbPath string) (*Crawler, error) {
|
||||
|
||||
func (c *Crawler) Close() error {
|
||||
if c.db != nil {
|
||||
// Checkpoint WAL to merge it back into main database before closing
|
||||
// This prevents corruption if the container is stopped mid-write
|
||||
fmt.Println("Checkpointing WAL...")
|
||||
if _, err := c.db.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
|
||||
fmt.Printf("WAL checkpoint warning: %v\n", err)
|
||||
}
|
||||
fmt.Println("Closing database...")
|
||||
return c.db.Close()
|
||||
}
|
||||
return nil
|
||||
@@ -87,6 +94,56 @@ func (c *Crawler) StartCleanupLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
// StartMaintenanceLoop performs periodic database maintenance
|
||||
// - WAL checkpoint every 5 minutes to prevent WAL bloat and reduce corruption risk
|
||||
// - Quick integrity check every hour to detect issues early
|
||||
// - Hot backup every 24 hours for recovery
|
||||
func (c *Crawler) StartMaintenanceLoop() {
|
||||
checkpointTicker := time.NewTicker(5 * time.Minute)
|
||||
integrityTicker := time.NewTicker(1 * time.Hour)
|
||||
backupTicker := time.NewTicker(24 * time.Hour)
|
||||
defer checkpointTicker.Stop()
|
||||
defer integrityTicker.Stop()
|
||||
defer backupTicker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-checkpointTicker.C:
|
||||
// Passive checkpoint - doesn't block writers
|
||||
if _, err := c.db.Exec("PRAGMA wal_checkpoint(PASSIVE)"); err != nil {
|
||||
fmt.Printf("WAL checkpoint error: %v\n", err)
|
||||
}
|
||||
|
||||
case <-integrityTicker.C:
|
||||
// Quick check is faster than full integrity_check
|
||||
var result string
|
||||
if err := c.db.QueryRow("PRAGMA quick_check").Scan(&result); err != nil {
|
||||
fmt.Printf("Integrity check error: %v\n", err)
|
||||
} else if result != "ok" {
|
||||
fmt.Printf("WARNING: Database integrity issue detected: %s\n", result)
|
||||
}
|
||||
|
||||
case <-backupTicker.C:
|
||||
c.createBackup()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createBackup creates a hot backup of the database using SQLite's backup API
|
||||
func (c *Crawler) createBackup() {
|
||||
backupPath := "feeds/feeds.db.backup"
|
||||
fmt.Println("Creating database backup...")
|
||||
|
||||
// Use SQLite's online backup via VACUUM INTO (available in SQLite 3.27+)
|
||||
// This creates a consistent snapshot without blocking writers
|
||||
if _, err := c.db.Exec("VACUUM INTO ?", backupPath); err != nil {
|
||||
fmt.Printf("Backup error: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Backup created: %s\n", backupPath)
|
||||
}
|
||||
|
||||
// StartCrawlLoop runs the domain crawling loop independently
|
||||
func (c *Crawler) StartCrawlLoop() {
|
||||
numWorkers := runtime.NumCPU()
|
||||
@@ -113,9 +170,9 @@ func (c *Crawler) StartCrawlLoop() {
|
||||
}()
|
||||
}
|
||||
|
||||
const fetchSize = 100
|
||||
const fetchSize = 1000
|
||||
for {
|
||||
domains, err := c.GetUncheckedDomainsRandom(fetchSize)
|
||||
domains, err := c.GetUncheckedDomains(fetchSize)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching domains: %v\n", err)
|
||||
}
|
||||
@@ -155,7 +212,7 @@ func (c *Crawler) StartCheckLoop() {
|
||||
}()
|
||||
}
|
||||
|
||||
const fetchSize = 100
|
||||
const fetchSize = 1000
|
||||
for {
|
||||
feeds, err := c.GetFeedsDueForCheck(fetchSize)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user