Restore working codebase with all methods

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-02-01 19:08:53 -05:00
parent 211812363a
commit 8a9001c02c
18 changed files with 2357 additions and 331 deletions
+30 -27
View File
@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/jackc/pgx/v5"
@@ -16,16 +17,16 @@ type SearchResult struct {
}
type SearchFeed struct {
URL string `json:"url"`
Type string `json:"type"`
Category string `json:"category"`
Title string `json:"title"`
Description string `json:"description"`
Language string `json:"language"`
SiteURL string `json:"site_url"`
DiscoveredAt string `json:"discovered_at"`
LastCrawledAt string `json:"last_crawled_at"`
NextCrawlAt string `json:"next_crawl_at"`
URL string `json:"url"`
Type string `json:"type"`
Category string `json:"category"`
Title string `json:"title"`
Description string `json:"description"`
Language string `json:"language"`
SiteURL string `json:"site_url"`
DiscoveredAt string `json:"discovered_at"`
LastCheckedAt string `json:"last_checked_at"`
NextCheckAt string `json:"next_check_at"`
LastBuildDate string `json:"last_build_date"`
Status string `json:"status"`
LastError string `json:"last_error"`
@@ -76,7 +77,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
var url string
var feedType, category, title, description, language, siteUrl *string
var discoveredAt time.Time
var lastCrawledAt, nextCrawlAt, lastBuildDate *time.Time
var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time
var itemCount *int
var status, lastError *string
var lastErrorAt *time.Time
@@ -85,7 +86,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
var noUpdate *bool
if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl,
&discoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate,
&discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&status, &lastError, &lastErrorAt,
&sourceUrl, &sourceHost, &tld,
&itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil {
@@ -110,11 +111,11 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SourceHost: StringValue(sourceHost),
TLD: StringValue(tld),
}
if lastCrawledAt != nil {
sf.LastCrawledAt = lastCrawledAt.Format(time.RFC3339)
if lastCheckedAt != nil {
sf.LastCheckedAt = lastCheckedAt.Format(time.RFC3339)
}
if nextCrawlAt != nil {
sf.NextCrawlAt = nextCrawlAt.Format(time.RFC3339)
if nextCheckAt != nil {
sf.NextCheckAt = nextCheckAt.Format(time.RFC3339)
}
if lastBuildDate != nil {
sf.LastBuildDate = lastBuildDate.Format(time.RFC3339)
@@ -138,16 +139,18 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
}
// Search feeds by source_host (LIKE search for domain matching)
// Use LOWER() to leverage trigram index
lowerPattern := "%" + strings.ToLower(query) + "%"
hostRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds
WHERE source_host ILIKE $1 OR url ILIKE $1
WHERE LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1
LIMIT $2
`, "%"+query+"%", limit)
`, lowerPattern, limit)
if err == nil {
defer hostRows.Close()
for hostRows.Next() {
@@ -163,7 +166,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
tsQuery := ToSearchQuery(query)
feedRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update
@@ -228,7 +231,7 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
// Fetch feed info for this item's feed
var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string
var fDiscoveredAt time.Time
var fLastCrawledAt, fNextCrawlAt, fLastBuildDate *time.Time
var fLastCheckedAt, fNextCheckAt, fLastBuildDate *time.Time
var fItemCount *int
var fStatus, fLastError *string
var fLastErrorAt *time.Time
@@ -238,13 +241,13 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
c.db.QueryRow(`
SELECT type, category, title, description, language, site_url,
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds WHERE url = $1
`, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl,
&fDiscoveredAt, &fLastCrawledAt, &fNextCrawlAt, &fLastBuildDate,
&fDiscoveredAt, &fLastCheckedAt, &fNextCheckAt, &fLastBuildDate,
&fStatus, &fLastError, &fLastErrorAt,
&fSourceUrl, &fSourceHost, &fTLD,
&fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate)
@@ -268,11 +271,11 @@ func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
SourceHost: StringValue(fSourceHost),
TLD: StringValue(fTLD),
}
if fLastCrawledAt != nil {
sf.LastCrawledAt = fLastCrawledAt.Format(time.RFC3339)
if fLastCheckedAt != nil {
sf.LastCheckedAt = fLastCheckedAt.Format(time.RFC3339)
}
if fNextCrawlAt != nil {
sf.NextCrawlAt = fNextCrawlAt.Format(time.RFC3339)
if fNextCheckAt != nil {
sf.NextCheckAt = fNextCheckAt.Format(time.RFC3339)
}
if fLastBuildDate != nil {
sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339)