312 lines
9.5 KiB
Go
312 lines
9.5 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5"
|
|
)
|
|
|
|
// SearchResult represents a search result with feed and matching items
|
|
type SearchResult struct {
|
|
Feed SearchFeed `json:"feed"`
|
|
Items []SearchItem `json:"items"`
|
|
}
|
|
|
|
type SearchFeed struct {
|
|
URL string `json:"url"`
|
|
Type string `json:"type"`
|
|
Category string `json:"category"`
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
Language string `json:"language"`
|
|
SiteURL string `json:"site_url"`
|
|
DiscoveredAt string `json:"discovered_at"`
|
|
LastCrawledAt string `json:"last_crawled_at"`
|
|
NextCrawlAt string `json:"next_crawl_at"`
|
|
LastBuildDate string `json:"last_build_date"`
|
|
Status string `json:"status"`
|
|
LastError string `json:"last_error"`
|
|
LastErrorAt string `json:"last_error_at"`
|
|
SourceURL string `json:"source_url"`
|
|
SourceHost string `json:"source_host"`
|
|
TLD string `json:"tld"`
|
|
ItemCount int `json:"item_count"`
|
|
OldestItemDate string `json:"oldest_item_date"`
|
|
NewestItemDate string `json:"newest_item_date"`
|
|
NoUpdate bool `json:"no_update"`
|
|
}
|
|
|
|
type SearchItem struct {
|
|
ID int64 `json:"id"`
|
|
FeedURL string `json:"feed_url"`
|
|
GUID string `json:"guid"`
|
|
Title string `json:"title"`
|
|
Link string `json:"link"`
|
|
Description string `json:"description"`
|
|
Content string `json:"content"`
|
|
Author string `json:"author"`
|
|
PubDate string `json:"pub_date"`
|
|
DiscoveredAt string `json:"discovered_at"`
|
|
UpdatedAt string `json:"updated_at"`
|
|
}
|
|
|
|
func (c *Crawler) handleAPISearch(w http.ResponseWriter, r *http.Request) {
|
|
query := r.URL.Query().Get("q")
|
|
if query == "" {
|
|
http.Error(w, "q parameter required", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
limit := 100
|
|
if l := r.URL.Query().Get("limit"); l != "" {
|
|
fmt.Sscanf(l, "%d", &limit)
|
|
if limit > 500 {
|
|
limit = 500
|
|
}
|
|
}
|
|
|
|
// Results map: feedURL -> SearchResult
|
|
results := make(map[string]*SearchResult)
|
|
|
|
// Helper to scan feed row into SearchFeed
|
|
scanFeed := func(rows pgx.Rows) (string, SearchFeed, bool) {
|
|
var url string
|
|
var feedType, category, title, description, language, siteUrl *string
|
|
var discoveredAt time.Time
|
|
var lastCrawledAt, nextCrawlAt, lastBuildDate *time.Time
|
|
var itemCount *int
|
|
var status, lastError *string
|
|
var lastErrorAt *time.Time
|
|
var sourceUrl, sourceHost, tld *string
|
|
var oldestItemDate, newestItemDate *time.Time
|
|
var noUpdate *bool
|
|
|
|
if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl,
|
|
&discoveredAt, &lastCrawledAt, &nextCrawlAt, &lastBuildDate,
|
|
&status, &lastError, &lastErrorAt,
|
|
&sourceUrl, &sourceHost, &tld,
|
|
&itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil {
|
|
return "", SearchFeed{}, false
|
|
}
|
|
cat := StringValue(category)
|
|
if cat == "" {
|
|
cat = "main"
|
|
}
|
|
sf := SearchFeed{
|
|
URL: url,
|
|
Type: StringValue(feedType),
|
|
Category: cat,
|
|
Title: StringValue(title),
|
|
Description: StringValue(description),
|
|
Language: StringValue(language),
|
|
SiteURL: StringValue(siteUrl),
|
|
DiscoveredAt: discoveredAt.Format(time.RFC3339),
|
|
Status: StringValue(status),
|
|
LastError: StringValue(lastError),
|
|
SourceURL: StringValue(sourceUrl),
|
|
SourceHost: StringValue(sourceHost),
|
|
TLD: StringValue(tld),
|
|
}
|
|
if lastCrawledAt != nil {
|
|
sf.LastCrawledAt = lastCrawledAt.Format(time.RFC3339)
|
|
}
|
|
if nextCrawlAt != nil {
|
|
sf.NextCrawlAt = nextCrawlAt.Format(time.RFC3339)
|
|
}
|
|
if lastBuildDate != nil {
|
|
sf.LastBuildDate = lastBuildDate.Format(time.RFC3339)
|
|
}
|
|
if lastErrorAt != nil {
|
|
sf.LastErrorAt = lastErrorAt.Format(time.RFC3339)
|
|
}
|
|
if itemCount != nil {
|
|
sf.ItemCount = *itemCount
|
|
}
|
|
if oldestItemDate != nil {
|
|
sf.OldestItemDate = oldestItemDate.Format(time.RFC3339)
|
|
}
|
|
if newestItemDate != nil {
|
|
sf.NewestItemDate = newestItemDate.Format(time.RFC3339)
|
|
}
|
|
if noUpdate != nil {
|
|
sf.NoUpdate = *noUpdate
|
|
}
|
|
return url, sf, true
|
|
}
|
|
|
|
// Search feeds by source_host (LIKE search for domain matching)
|
|
hostRows, err := c.db.Query(`
|
|
SELECT url, type, category, title, description, language, site_url,
|
|
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
|
status, last_error, last_error_at,
|
|
source_url, source_host, tld,
|
|
item_count, oldest_item_date, newest_item_date, no_update
|
|
FROM feeds
|
|
WHERE source_host ILIKE $1 OR url ILIKE $1
|
|
LIMIT $2
|
|
`, "%"+query+"%", limit)
|
|
if err == nil {
|
|
defer hostRows.Close()
|
|
for hostRows.Next() {
|
|
if url, feed, ok := scanFeed(hostRows); ok {
|
|
if _, exists := results[url]; !exists {
|
|
results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Search feeds via full-text search
|
|
tsQuery := ToSearchQuery(query)
|
|
feedRows, err := c.db.Query(`
|
|
SELECT url, type, category, title, description, language, site_url,
|
|
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
|
status, last_error, last_error_at,
|
|
source_url, source_host, tld,
|
|
item_count, oldest_item_date, newest_item_date, no_update
|
|
FROM feeds
|
|
WHERE search_vector @@ to_tsquery('english', $1)
|
|
LIMIT $2
|
|
`, tsQuery, limit)
|
|
if err == nil {
|
|
defer feedRows.Close()
|
|
for feedRows.Next() {
|
|
if url, feed, ok := scanFeed(feedRows); ok {
|
|
if _, exists := results[url]; !exists {
|
|
results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Search items via full-text search
|
|
itemRows, err := c.db.Query(`
|
|
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at
|
|
FROM items i
|
|
WHERE i.search_vector @@ to_tsquery('english', $1)
|
|
ORDER BY i.pub_date DESC
|
|
LIMIT $2
|
|
`, tsQuery, limit)
|
|
if err == nil {
|
|
defer itemRows.Close()
|
|
for itemRows.Next() {
|
|
var id int64
|
|
var feedUrl string
|
|
var guid, title, link, description, content, author *string
|
|
var pubDate, discoveredAt, updatedAt *time.Time
|
|
if err := itemRows.Scan(&id, &feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil {
|
|
continue
|
|
}
|
|
|
|
item := SearchItem{
|
|
ID: id,
|
|
FeedURL: feedUrl,
|
|
GUID: StringValue(guid),
|
|
Title: StringValue(title),
|
|
Link: StringValue(link),
|
|
Description: StringValue(description),
|
|
Content: StringValue(content),
|
|
Author: StringValue(author),
|
|
}
|
|
if pubDate != nil {
|
|
item.PubDate = pubDate.Format(time.RFC3339)
|
|
}
|
|
if discoveredAt != nil {
|
|
item.DiscoveredAt = discoveredAt.Format(time.RFC3339)
|
|
}
|
|
if updatedAt != nil {
|
|
item.UpdatedAt = updatedAt.Format(time.RFC3339)
|
|
}
|
|
|
|
// Add to existing result or create new one
|
|
if result, exists := results[feedUrl]; exists {
|
|
result.Items = append(result.Items, item)
|
|
} else {
|
|
// Fetch feed info for this item's feed
|
|
var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string
|
|
var fDiscoveredAt time.Time
|
|
var fLastCrawledAt, fNextCrawlAt, fLastBuildDate *time.Time
|
|
var fItemCount *int
|
|
var fStatus, fLastError *string
|
|
var fLastErrorAt *time.Time
|
|
var fSourceUrl, fSourceHost, fTLD *string
|
|
var fOldestItemDate, fNewestItemDate *time.Time
|
|
var fNoUpdate *bool
|
|
|
|
c.db.QueryRow(`
|
|
SELECT type, category, title, description, language, site_url,
|
|
discovered_at, last_crawled_at, next_crawl_at, last_build_date,
|
|
status, last_error, last_error_at,
|
|
source_url, source_host, tld,
|
|
item_count, oldest_item_date, newest_item_date, no_update
|
|
FROM feeds WHERE url = $1
|
|
`, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl,
|
|
&fDiscoveredAt, &fLastCrawledAt, &fNextCrawlAt, &fLastBuildDate,
|
|
&fStatus, &fLastError, &fLastErrorAt,
|
|
&fSourceUrl, &fSourceHost, &fTLD,
|
|
&fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate)
|
|
|
|
fCat := StringValue(fCategory)
|
|
if fCat == "" {
|
|
fCat = "main"
|
|
}
|
|
sf := SearchFeed{
|
|
URL: feedUrl,
|
|
Type: StringValue(fType),
|
|
Category: fCat,
|
|
Title: StringValue(fTitle),
|
|
Description: StringValue(fDesc),
|
|
Language: StringValue(fLang),
|
|
SiteURL: StringValue(fSiteUrl),
|
|
DiscoveredAt: fDiscoveredAt.Format(time.RFC3339),
|
|
Status: StringValue(fStatus),
|
|
LastError: StringValue(fLastError),
|
|
SourceURL: StringValue(fSourceUrl),
|
|
SourceHost: StringValue(fSourceHost),
|
|
TLD: StringValue(fTLD),
|
|
}
|
|
if fLastCrawledAt != nil {
|
|
sf.LastCrawledAt = fLastCrawledAt.Format(time.RFC3339)
|
|
}
|
|
if fNextCrawlAt != nil {
|
|
sf.NextCrawlAt = fNextCrawlAt.Format(time.RFC3339)
|
|
}
|
|
if fLastBuildDate != nil {
|
|
sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339)
|
|
}
|
|
if fLastErrorAt != nil {
|
|
sf.LastErrorAt = fLastErrorAt.Format(time.RFC3339)
|
|
}
|
|
if fItemCount != nil {
|
|
sf.ItemCount = *fItemCount
|
|
}
|
|
if fOldestItemDate != nil {
|
|
sf.OldestItemDate = fOldestItemDate.Format(time.RFC3339)
|
|
}
|
|
if fNewestItemDate != nil {
|
|
sf.NewestItemDate = fNewestItemDate.Format(time.RFC3339)
|
|
}
|
|
if fNoUpdate != nil {
|
|
sf.NoUpdate = *fNoUpdate
|
|
}
|
|
results[feedUrl] = &SearchResult{
|
|
Feed: sf,
|
|
Items: []SearchItem{item},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert map to slice
|
|
var resultList []SearchResult
|
|
for _, r := range results {
|
|
resultList = append(resultList, *r)
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(resultList)
|
|
}
|