Files
crawler/api_search.go
primal 8a9001c02c Restore working codebase with all methods
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 19:08:53 -05:00

315 lines
9.6 KiB
Go

package main
import (
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/jackc/pgx/v5"
)
// SearchResult represents a search result with feed and matching items
type SearchResult struct {
Feed SearchFeed `json:"feed"`
Items []SearchItem `json:"items"`
}
type SearchFeed struct {
URL string `json:"url"`
Type string `json:"type"`
Category string `json:"category"`
Title string `json:"title"`
Description string `json:"description"`
Language string `json:"language"`
SiteURL string `json:"site_url"`
DiscoveredAt string `json:"discovered_at"`
LastCheckedAt string `json:"last_checked_at"`
NextCheckAt string `json:"next_check_at"`
LastBuildDate string `json:"last_build_date"`
Status string `json:"status"`
LastError string `json:"last_error"`
LastErrorAt string `json:"last_error_at"`
SourceURL string `json:"source_url"`
SourceHost string `json:"source_host"`
TLD string `json:"tld"`
ItemCount int `json:"item_count"`
OldestItemDate string `json:"oldest_item_date"`
NewestItemDate string `json:"newest_item_date"`
NoUpdate bool `json:"no_update"`
}
type SearchItem struct {
ID int64 `json:"id"`
FeedURL string `json:"feed_url"`
GUID string `json:"guid"`
Title string `json:"title"`
Link string `json:"link"`
Description string `json:"description"`
Content string `json:"content"`
Author string `json:"author"`
PubDate string `json:"pub_date"`
DiscoveredAt string `json:"discovered_at"`
UpdatedAt string `json:"updated_at"`
}
func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
query := r.URL.Query().Get("q")
if query == "" {
http.Error(w, "q parameter required", http.StatusBadRequest)
return
}
limit := 100
if l := r.URL.Query().Get("limit"); l != "" {
fmt.Sscanf(l, "%d", &limit)
if limit > 500 {
limit = 500
}
}
// Results map: feedURL -> SearchResult
results := make(map[string]*SearchResult)
// Helper to scan feed row into SearchFeed
scanFeed := func(rows pgx.Rows) (string, SearchFeed, bool) {
var url string
var feedType, category, title, description, language, siteUrl *string
var discoveredAt time.Time
var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time
var itemCount *int
var status, lastError *string
var lastErrorAt *time.Time
var sourceUrl, sourceHost, tld *string
var oldestItemDate, newestItemDate *time.Time
var noUpdate *bool
if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl,
&discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate,
&status, &lastError, &lastErrorAt,
&sourceUrl, &sourceHost, &tld,
&itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil {
return "", SearchFeed{}, false
}
cat := StringValue(category)
if cat == "" {
cat = "main"
}
sf := SearchFeed{
URL: url,
Type: StringValue(feedType),
Category: cat,
Title: StringValue(title),
Description: StringValue(description),
Language: StringValue(language),
SiteURL: StringValue(siteUrl),
DiscoveredAt: discoveredAt.Format(time.RFC3339),
Status: StringValue(status),
LastError: StringValue(lastError),
SourceURL: StringValue(sourceUrl),
SourceHost: StringValue(sourceHost),
TLD: StringValue(tld),
}
if lastCheckedAt != nil {
sf.LastCheckedAt = lastCheckedAt.Format(time.RFC3339)
}
if nextCheckAt != nil {
sf.NextCheckAt = nextCheckAt.Format(time.RFC3339)
}
if lastBuildDate != nil {
sf.LastBuildDate = lastBuildDate.Format(time.RFC3339)
}
if lastErrorAt != nil {
sf.LastErrorAt = lastErrorAt.Format(time.RFC3339)
}
if itemCount != nil {
sf.ItemCount = *itemCount
}
if oldestItemDate != nil {
sf.OldestItemDate = oldestItemDate.Format(time.RFC3339)
}
if newestItemDate != nil {
sf.NewestItemDate = newestItemDate.Format(time.RFC3339)
}
if noUpdate != nil {
sf.NoUpdate = *noUpdate
}
return url, sf, true
}
// Search feeds by source_host (LIKE search for domain matching)
// Use LOWER() to leverage trigram index
lowerPattern := "%" + strings.ToLower(query) + "%"
hostRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds
WHERE LOWER(source_host) LIKE $1 OR LOWER(url) LIKE $1
LIMIT $2
`, lowerPattern, limit)
if err == nil {
defer hostRows.Close()
for hostRows.Next() {
if url, feed, ok := scanFeed(hostRows); ok {
if _, exists := results[url]; !exists {
results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}}
}
}
}
}
// Search feeds via full-text search
tsQuery := ToSearchQuery(query)
feedRows, err := c.db.Query(`
SELECT url, type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds
WHERE search_vector @@ to_tsquery('english', $1)
LIMIT $2
`, tsQuery, limit)
if err == nil {
defer feedRows.Close()
for feedRows.Next() {
if url, feed, ok := scanFeed(feedRows); ok {
if _, exists := results[url]; !exists {
results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}}
}
}
}
}
// Search items via full-text search
itemRows, err := c.db.Query(`
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
FROM items i
WHERE i.search_vector @@ to_tsquery('english', $1)
ORDER BY i.pub_date DESC
LIMIT $2
`, tsQuery, limit)
if err == nil {
defer itemRows.Close()
for itemRows.Next() {
var id int64
var feedUrl string
var guid, title, link, description, content, author *string
var pubDate, discoveredAt, updatedAt *time.Time
if err := itemRows.Scan(&id, &feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
continue
}
item := SearchItem{
ID: id,
FeedURL: feedUrl,
GUID: StringValue(guid),
Title: StringValue(title),
Link: StringValue(link),
Description: StringValue(description),
Content: StringValue(content),
Author: StringValue(author),
}
if pubDate != nil {
item.PubDate = pubDate.Format(time.RFC3339)
}
if discoveredAt != nil {
item.DiscoveredAt = discoveredAt.Format(time.RFC3339)
}
if updatedAt != nil {
item.UpdatedAt = updatedAt.Format(time.RFC3339)
}
// Add to existing result or create new one
if result, exists := results[feedUrl]; exists {
result.Items = append(result.Items, item)
} else {
// Fetch feed info for this item's feed
var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string
var fDiscoveredAt time.Time
var fLastCheckedAt, fNextCheckAt, fLastBuildDate *time.Time
var fItemCount *int
var fStatus, fLastError *string
var fLastErrorAt *time.Time
var fSourceUrl, fSourceHost, fTLD *string
var fOldestItemDate, fNewestItemDate *time.Time
var fNoUpdate *bool
c.db.QueryRow(`
SELECT type, category, title, description, language, site_url,
discovered_at, last_checked_at, next_check_at, last_build_date,
status, last_error, last_error_at,
source_url, source_host, tld,
item_count, oldest_item_date, newest_item_date, no_update
FROM feeds WHERE url = $1
`, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl,
&fDiscoveredAt, &fLastCheckedAt, &fNextCheckAt, &fLastBuildDate,
&fStatus, &fLastError, &fLastErrorAt,
&fSourceUrl, &fSourceHost, &fTLD,
&fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate)
fCat := StringValue(fCategory)
if fCat == "" {
fCat = "main"
}
sf := SearchFeed{
URL: feedUrl,
Type: StringValue(fType),
Category: fCat,
Title: StringValue(fTitle),
Description: StringValue(fDesc),
Language: StringValue(fLang),
SiteURL: StringValue(fSiteUrl),
DiscoveredAt: fDiscoveredAt.Format(time.RFC3339),
Status: StringValue(fStatus),
LastError: StringValue(fLastError),
SourceURL: StringValue(fSourceUrl),
SourceHost: StringValue(fSourceHost),
TLD: StringValue(fTLD),
}
if fLastCheckedAt != nil {
sf.LastCheckedAt = fLastCheckedAt.Format(time.RFC3339)
}
if fNextCheckAt != nil {
sf.NextCheckAt = fNextCheckAt.Format(time.RFC3339)
}
if fLastBuildDate != nil {
sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339)
}
if fLastErrorAt != nil {
sf.LastErrorAt = fLastErrorAt.Format(time.RFC3339)
}
if fItemCount != nil {
sf.ItemCount = *fItemCount
}
if fOldestItemDate != nil {
sf.OldestItemDate = fOldestItemDate.Format(time.RFC3339)
}
if fNewestItemDate != nil {
sf.NewestItemDate = fNewestItemDate.Format(time.RFC3339)
}
if fNoUpdate != nil {
sf.NoUpdate = *fNoUpdate
}
results[feedUrl] = &SearchResult{
Feed: sf,
Items: []SearchItem{item},
}
}
}
}
// Convert map to slice
var resultList []SearchResult
for _, r := range results {
resultList = append(resultList, *r)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resultList)
}