Add AT Protocol publishing, media support, and SQLite stability
Publishing: - Add publisher.go for posting feed items to AT Protocol PDS - Support deterministic rkeys from SHA256(guid + discoveredAt) - Handle multiple URLs in posts with facets for each link - Image embed support (app.bsky.embed.images) for up to 4 images - External embed with thumbnail fallback - Podcast/audio enclosure URLs included in post text Media extraction: - Parse RSS enclosures (audio, video, images) - Extract Media RSS content and thumbnails - Extract images from HTML content in descriptions - Store enclosure and imageUrls in items table SQLite stability improvements: - Add synchronous=NORMAL and wal_autocheckpoint pragmas - Connection pool tuning (idle conns, max lifetime) - Periodic WAL checkpoint every 5 minutes - Hourly integrity checks with PRAGMA quick_check - Daily hot backup via VACUUM INTO - Docker stop_grace_period: 30s for graceful shutdown Dashboard: - Feed publishing UI and API endpoints - Account creation with invite codes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -88,26 +88,12 @@ func (c *Crawler) getDomain(host string) (*Domain, error) {
|
||||
return domain, nil
|
||||
}
|
||||
|
||||
// GetUncheckedDomains returns all domains with status "unchecked"
|
||||
func (c *Crawler) GetUncheckedDomains() ([]*Domain, error) {
|
||||
// GetUncheckedDomains returns up to limit unchecked domains ordered by discoveredAt (FIFO)
|
||||
func (c *Crawler) GetUncheckedDomains(limit int) ([]*Domain, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT host, status, discoveredAt, lastCrawledAt, feedsFound, lastError, tld
|
||||
FROM domains WHERE status = 'unchecked'
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
return c.scanDomains(rows)
|
||||
}
|
||||
|
||||
// GetUncheckedDomainsRandom returns up to limit unchecked domains in random order
|
||||
func (c *Crawler) GetUncheckedDomainsRandom(limit int) ([]*Domain, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT host, status, discoveredAt, lastCrawledAt, feedsFound, lastError, tld
|
||||
FROM domains WHERE status = 'unchecked'
|
||||
ORDER BY RANDOM()
|
||||
ORDER BY discoveredAt ASC
|
||||
LIMIT ?
|
||||
`, limit)
|
||||
if err != nil {
|
||||
@@ -224,7 +210,7 @@ func (c *Crawler) ImportDomainsInBackground(filename string) {
|
||||
buf := make([]byte, 0, 64*1024)
|
||||
scanner.Buffer(buf, 1024*1024)
|
||||
|
||||
const batchSize = 10000
|
||||
const batchSize = 1000
|
||||
now := time.Now()
|
||||
nowStr := now.Format("2006-01-02 15:04:05")
|
||||
totalImported := 0
|
||||
|
||||
Reference in New Issue
Block a user