Add AT Protocol publishing, media support, and SQLite stability

Publishing:
- Add publisher.go for posting feed items to AT Protocol PDS
- Support deterministic rkeys from SHA256(guid + discoveredAt)
- Handle multiple URLs in posts with facets for each link
- Image embed support (app.bsky.embed.images) for up to 4 images
- External embed with thumbnail fallback
- Podcast/audio enclosure URLs included in post text

Media extraction:
- Parse RSS enclosures (audio, video, images)
- Extract Media RSS content and thumbnails
- Extract images from HTML content in descriptions
- Store enclosure and imageUrls in items table

SQLite stability improvements:
- Add synchronous=NORMAL and wal_autocheckpoint pragmas
- Connection pool tuning (idle conns, max lifetime)
- Periodic WAL checkpoint every 5 minutes
- Hourly integrity checks with PRAGMA quick_check
- Daily hot backup via VACUUM INTO
- Docker stop_grace_period: 30s for graceful shutdown

Dashboard:
- Feed publishing UI and API endpoints
- Account creation with invite codes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-01-28 15:30:02 -05:00
parent aa6f571215
commit 75835d771d
11 changed files with 3723 additions and 635 deletions
+4 -18
View File
@@ -88,26 +88,12 @@ func (c *Crawler) getDomain(host string) (*Domain, error) {
return domain, nil
}
// GetUncheckedDomains returns all domains with status "unchecked"
func (c *Crawler) GetUncheckedDomains() ([]*Domain, error) {
// GetUncheckedDomains returns up to limit unchecked domains ordered by discoveredAt (FIFO)
func (c *Crawler) GetUncheckedDomains(limit int) ([]*Domain, error) {
rows, err := c.db.Query(`
SELECT host, status, discoveredAt, lastCrawledAt, feedsFound, lastError, tld
FROM domains WHERE status = 'unchecked'
`)
if err != nil {
return nil, err
}
defer rows.Close()
return c.scanDomains(rows)
}
// GetUncheckedDomainsRandom returns up to limit unchecked domains in random order
func (c *Crawler) GetUncheckedDomainsRandom(limit int) ([]*Domain, error) {
rows, err := c.db.Query(`
SELECT host, status, discoveredAt, lastCrawledAt, feedsFound, lastError, tld
FROM domains WHERE status = 'unchecked'
ORDER BY RANDOM()
ORDER BY discoveredAt ASC
LIMIT ?
`, limit)
if err != nil {
@@ -224,7 +210,7 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 1024*1024)
const batchSize = 10000
const batchSize = 1000
now := time.Now()
nowStr := now.Format("2006-01-02 15:04:05")
totalImported := 0