diff --git a/Dockerfile b/Dockerfile index 8f29e36..924fc1a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,13 +5,13 @@ WORKDIR /build # Copy shared module first COPY shared/ ./shared/ -# Copy dashboard module -COPY dashboard/ ./dashboard/ +# Copy watcher module +COPY watcher/ ./watcher/ # Build the binary -WORKDIR /build/dashboard +WORKDIR /build/watcher RUN go mod download -RUN CGO_ENABLED=0 go build -o dashboard . +RUN CGO_ENABLED=0 go build -o watcher . # Runtime stage FROM ubuntu:latest @@ -22,11 +22,11 @@ WORKDIR /app RUN apt-get update && apt-get install -y ca-certificates tzdata curl wget && rm -rf /var/lib/apt/lists/* # Copy binary from builder -COPY --from=builder /build/dashboard/dashboard . +COPY --from=builder /build/watcher/watcher . # Copy static files -COPY --from=builder /build/dashboard/static ./static +COPY --from=builder /build/watcher/static ./static EXPOSE 4321 -CMD ["./dashboard"] +CMD ["./watcher"] diff --git a/api_feeds.go b/api_feeds.go new file mode 100644 index 0000000..2984eeb --- /dev/null +++ b/api_feeds.go @@ -0,0 +1,452 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "strings" + "time" + + "github.com/1440news/shared" + "github.com/jackc/pgx/v5" +) + +// GetItemsByFeed retrieves items for a feed +func (d *Dashboard) GetItemsByFeed(feedURL string, limit int) ([]*shared.Item, error) { + rows, err := d.db.Query(` + SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at + FROM items + WHERE feed_url = $1 + ORDER BY pub_date DESC + LIMIT $2 + `, feedURL, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var items []*shared.Item + for rows.Next() { + var item shared.Item + var guid, title, link, description, content, author *string + var pubDate, discoveredAt, updatedAt *time.Time + if err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil { + continue + } + item.GUID = shared.StringValue(guid) + item.Title = shared.StringValue(title) + item.Link = shared.StringValue(link) + item.Description = shared.StringValue(description) + item.Content = shared.StringValue(content) + item.Author = shared.StringValue(author) + if pubDate != nil { + item.PubDate = *pubDate + } + if discoveredAt != nil { + item.DiscoveredAt = *discoveredAt + } + if updatedAt != nil { + item.UpdatedAt = *updatedAt + } + items = append(items, &item) + } + return items, nil +} + +func (d *Dashboard) handleAPIFeedInfo(w http.ResponseWriter, r *http.Request) { + feedURL := r.URL.Query().Get("url") + if feedURL == "" { + http.Error(w, "url parameter required", http.StatusBadRequest) + return + } + + type FeedDetails struct { + URL string `json:"url"` + Type string `json:"type,omitempty"` + Category string `json:"category,omitempty"` + Title string `json:"title,omitempty"` + Description string `json:"description,omitempty"` + Language string `json:"language,omitempty"` + SiteURL string `json:"siteUrl,omitempty"` + DiscoveredAt string `json:"discoveredAt,omitempty"` + LastCheckedAt string `json:"lastCheckedAt,omitempty"` + NextCheckAt string `json:"nextCheckAt,omitempty"` + LastBuildDate string `json:"lastBuildDate,omitempty"` + Status string `json:"status,omitempty"` + LastError string `json:"lastError,omitempty"` + ItemCount int `json:"itemCount,omitempty"` + OldestItemDate string `json:"oldestItemDate,omitempty"` + NewestItemDate string `json:"newestItemDate,omitempty"` + PublishStatus string `json:"publishStatus,omitempty"` + PublishAccount string `json:"publishAccount,omitempty"` + } + + var f FeedDetails + var category, title, description, language, siteUrl *string + var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time + var status, lastError *string + var oldestItemDate, newestItemDate *time.Time + var itemCount *int + var discoveredAt time.Time + var publishStatus, publishAccount *string + + err := d.db.QueryRow(` + SELECT url, type, category, title, description, language, site_url, + discovered_at, last_checked_at, next_check_at, last_build_date, + status, last_error, + (SELECT COUNT(*) FROM items WHERE feed_url = feeds.url) as item_count, + oldest_item_date, newest_item_date, + publish_status, publish_account + FROM feeds WHERE url = $1 + `, feedURL).Scan( + &f.URL, &f.Type, &category, &title, &description, &language, &siteUrl, + &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, + &status, &lastError, + &itemCount, &oldestItemDate, &newestItemDate, + &publishStatus, &publishAccount, + ) + + if err == pgx.ErrNoRows { + http.Error(w, "feed not found", http.StatusNotFound) + return + } + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + f.Category = shared.StringValue(category) + f.Title = shared.StringValue(title) + f.Description = shared.StringValue(description) + f.Language = shared.StringValue(language) + f.SiteURL = shared.StringValue(siteUrl) + f.DiscoveredAt = discoveredAt.Format(time.RFC3339) + if lastCheckedAt != nil { + f.LastCheckedAt = lastCheckedAt.Format(time.RFC3339) + } + if nextCheckAt != nil { + f.NextCheckAt = nextCheckAt.Format(time.RFC3339) + } + if lastBuildDate != nil { + f.LastBuildDate = lastBuildDate.Format(time.RFC3339) + } + f.Status = shared.StringValue(status) + f.LastError = shared.StringValue(lastError) + if itemCount != nil { + f.ItemCount = *itemCount + } + if oldestItemDate != nil { + f.OldestItemDate = oldestItemDate.Format(time.RFC3339) + } + if newestItemDate != nil { + f.NewestItemDate = newestItemDate.Format(time.RFC3339) + } + f.PublishStatus = shared.StringValue(publishStatus) + f.PublishAccount = shared.StringValue(publishAccount) + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(f) +} + +func (d *Dashboard) handleAPIFeedItems(w http.ResponseWriter, r *http.Request) { + feedURL := r.URL.Query().Get("url") + if feedURL == "" { + http.Error(w, "url parameter required", http.StatusBadRequest) + return + } + + limit := 50 + if l := r.URL.Query().Get("limit"); l != "" { + fmt.Sscanf(l, "%d", &limit) + if limit > 100 { + limit = 100 + } + } + + items, err := d.GetItemsByFeed(feedURL, limit) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if items == nil { + items = []*shared.Item{} + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(items) +} + +func (d *Dashboard) handleAPIFeedsByStatus(w http.ResponseWriter, r *http.Request) { + status := r.URL.Query().Get("status") + if status == "" { + http.Error(w, "status parameter required", http.StatusBadRequest) + return + } + + limit := 100 + offset := 0 + if l := r.URL.Query().Get("limit"); l != "" { + fmt.Sscanf(l, "%d", &limit) + if limit > 500 { + limit = 500 + } + } + if o := r.URL.Query().Get("offset"); o != "" { + fmt.Sscanf(o, "%d", &offset) + } + + rows, err := d.db.Query(` + SELECT url, title, type, domain_host, domain_tld, status, last_error, item_count + FROM feeds + WHERE status = $1 + ORDER BY url ASC + LIMIT $2 OFFSET $3 + `, status, limit, offset) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer rows.Close() + + type FeedInfo struct { + URL string `json:"url"` + Title string `json:"title,omitempty"` + Type string `json:"type"` + SourceHost string `json:"source_host"` + TLD string `json:"tld"` + Status string `json:"status"` + LastError string `json:"last_error,omitempty"` + ItemCount int `json:"item_count,omitempty"` + } + + var feeds []FeedInfo + for rows.Next() { + var f FeedInfo + var title, sourceHost, tld, lastError *string + var itemCount *int + if err := rows.Scan(&f.URL, &title, &f.Type, &sourceHost, &tld, &f.Status, &lastError, &itemCount); err != nil { + continue + } + f.Title = shared.StringValue(title) + f.SourceHost = shared.StringValue(sourceHost) + f.TLD = shared.StringValue(tld) + f.LastError = shared.StringValue(lastError) + if itemCount != nil { + f.ItemCount = *itemCount + } + feeds = append(feeds, f) + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(feeds) +} + +// handleAPIFeeds lists feeds with optional publish_status filter +func (d *Dashboard) handleAPIFeeds(w http.ResponseWriter, r *http.Request) { + publishStatus := r.URL.Query().Get("publish_status") + limit := 100 + offset := 0 + if l := r.URL.Query().Get("limit"); l != "" { + fmt.Sscanf(l, "%d", &limit) + if limit > 500 { + limit = 500 + } + } + if o := r.URL.Query().Get("offset"); o != "" { + fmt.Sscanf(o, "%d", &offset) + } + + var rows pgx.Rows + var err error + if publishStatus != "" { + rows, err = d.db.Query(` + SELECT url, title, type, domain_host, domain_tld, status, last_error, item_count, publish_status, language + FROM feeds + WHERE publish_status = $1 + ORDER BY url ASC + LIMIT $2 OFFSET $3 + `, publishStatus, limit, offset) + } else { + rows, err = d.db.Query(` + SELECT url, title, type, domain_host, domain_tld, status, last_error, item_count, publish_status, language + FROM feeds + ORDER BY url ASC + LIMIT $1 OFFSET $2 + `, limit, offset) + } + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer rows.Close() + + type FeedInfo struct { + URL string `json:"url"` + Title string `json:"title,omitempty"` + Type string `json:"type"` + SourceHost string `json:"source_host"` + TLD string `json:"tld"` + Status string `json:"status"` + LastError string `json:"last_error,omitempty"` + ItemCount int `json:"item_count,omitempty"` + PublishStatus string `json:"publish_status,omitempty"` + Language string `json:"language,omitempty"` + } + + var feeds []FeedInfo + for rows.Next() { + var f FeedInfo + var title, sourceHost, tld, lastError, publishStatus, language *string + var itemCount *int + if err := rows.Scan(&f.URL, &title, &f.Type, &sourceHost, &tld, &f.Status, &lastError, &itemCount, &publishStatus, &language); err != nil { + continue + } + f.Title = shared.StringValue(title) + f.SourceHost = shared.StringValue(sourceHost) + f.TLD = shared.StringValue(tld) + f.LastError = shared.StringValue(lastError) + f.PublishStatus = shared.StringValue(publishStatus) + f.Language = shared.StringValue(language) + if itemCount != nil { + f.ItemCount = *itemCount + } + feeds = append(feeds, f) + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(feeds) +} + +func (d *Dashboard) filterFeeds(w http.ResponseWriter, tld, domain, status string, languages []string, limit, offset int) { + var args []interface{} + argNum := 1 + query := ` + SELECT url, title, type, category, domain_host, domain_tld, status, last_error, item_count, language + FROM feeds + WHERE 1=1` + + if tld != "" { + query += fmt.Sprintf(" AND domain_tld = $%d", argNum) + args = append(args, tld) + argNum++ + } + if domain != "" { + // Parse domain into host and tld parts + domainHost := shared.StripTLD(domain) + domainTLD := shared.GetTLD(domain) + query += fmt.Sprintf(" AND domain_host = $%d AND domain_tld = $%d", argNum, argNum+1) + args = append(args, domainHost, domainTLD) + argNum += 2 + } + if status != "" { + query += fmt.Sprintf(" AND status = $%d", argNum) + args = append(args, status) + argNum++ + } + if len(languages) > 0 { + // Build IN clause for languages, handling 'unknown' as empty string + placeholders := make([]string, len(languages)) + for i, lang := range languages { + placeholders[i] = fmt.Sprintf("$%d", argNum) + if lang == "unknown" { + args = append(args, "") + } else { + args = append(args, lang) + } + argNum++ + } + query += fmt.Sprintf(" AND COALESCE(language, '') IN (%s)", strings.Join(placeholders, ",")) + } + + query += fmt.Sprintf(" ORDER BY url ASC LIMIT $%d OFFSET $%d", argNum, argNum+1) + args = append(args, limit, offset) + + rows, err := d.db.Query(query, args...) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer rows.Close() + + type FeedInfo struct { + URL string `json:"url"` + Title string `json:"title,omitempty"` + Type string `json:"type"` + Category string `json:"category"` + SourceHost string `json:"source_host"` + TLD string `json:"tld"` + Status string `json:"status"` + LastError string `json:"last_error,omitempty"` + ItemCount int `json:"item_count,omitempty"` + Language string `json:"language,omitempty"` + } + + var feeds []FeedInfo + for rows.Next() { + var f FeedInfo + var title, category, sourceHost, tldVal, lastError, language *string + var itemCount *int + if err := rows.Scan(&f.URL, &title, &f.Type, &category, &sourceHost, &tldVal, &f.Status, &lastError, &itemCount, &language); err != nil { + continue + } + f.Title = shared.StringValue(title) + if category != nil && *category != "" { + f.Category = *category + } else { + f.Category = "main" + } + f.SourceHost = shared.StringValue(sourceHost) + f.TLD = shared.StringValue(tldVal) + f.LastError = shared.StringValue(lastError) + if itemCount != nil { + f.ItemCount = *itemCount + } + f.Language = shared.StringValue(language) + feeds = append(feeds, f) + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "type": "feeds", + "data": feeds, + }) +} + +// handleAPICheckFeed - NOTE: This requires crawler functionality (feed checking) +// For now, returns an error. In the future, could call crawler API. +func (d *Dashboard) handleAPICheckFeed(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Feed checking not available in standalone dashboard. Use crawler service.", http.StatusNotImplemented) +} + +// handleAPILanguages returns distinct languages with counts +func (d *Dashboard) handleAPILanguages(w http.ResponseWriter, r *http.Request) { + rows, err := d.db.Query(` + SELECT COALESCE(NULLIF(language, ''), 'unknown') as lang, COUNT(*) as cnt + FROM feeds + GROUP BY lang + ORDER BY cnt DESC + `) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer rows.Close() + + type LangInfo struct { + Language string `json:"language"` + Count int `json:"count"` + } + + var languages []LangInfo + for rows.Next() { + var l LangInfo + if err := rows.Scan(&l.Language, &l.Count); err != nil { + continue + } + languages = append(languages, l) + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(languages) +} diff --git a/api_publish.go b/api_publish.go new file mode 100644 index 0000000..f961c5d --- /dev/null +++ b/api_publish.go @@ -0,0 +1,575 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "time" + + "github.com/1440news/shared" +) + +// Database helper methods for publish-related queries + +// SetPublishStatus updates a feed's publish status and account +func (d *Dashboard) SetPublishStatus(feedURL, status, account string) error { + _, err := d.db.Exec(` + UPDATE feeds SET publish_status = $1, publish_account = $2 WHERE url = $3 + `, status, shared.NullableString(account), feedURL) + return err +} + +// GetFeedsByPublishStatus returns feeds with a specific publish status +func (d *Dashboard) GetFeedsByPublishStatus(status string) ([]*shared.Feed, error) { + rows, err := d.db.Query(` + SELECT url, type, category, title, description, language, site_url, + discovered_at, last_checked_at, next_check_at, last_build_date, + status, last_error, last_error_at, + source_url, domain_host, domain_tld, + item_count, oldest_item_date, newest_item_date, no_update, + publish_status, publish_account + FROM feeds + WHERE publish_status = $1 + ORDER BY url ASC + `, status) + if err != nil { + return nil, err + } + defer rows.Close() + + var feeds []*shared.Feed + for rows.Next() { + f := &shared.Feed{} + var category, title, description, language, siteUrl *string + var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt *time.Time + var feedStatus, lastError *string + var sourceUrl, domainHost, domainTLD *string + var itemCount, noUpdate *int + var oldestItemDate, newestItemDate *time.Time + var publishStatus, publishAccount *string + + if err := rows.Scan(&f.URL, &f.Type, &category, &title, &description, &language, &siteUrl, + &f.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, + &feedStatus, &lastError, &lastErrorAt, + &sourceUrl, &domainHost, &domainTLD, + &itemCount, &oldestItemDate, &newestItemDate, &noUpdate, + &publishStatus, &publishAccount); err != nil { + continue + } + f.Category = shared.StringValue(category) + f.Title = shared.StringValue(title) + f.Description = shared.StringValue(description) + f.Language = shared.StringValue(language) + f.SiteURL = shared.StringValue(siteUrl) + if lastCheckedAt != nil { + f.LastCheckedAt = *lastCheckedAt + } + if nextCheckAt != nil { + f.NextCheckAt = *nextCheckAt + } + if lastBuildDate != nil { + f.LastBuildDate = *lastBuildDate + } + f.Status = shared.StringValue(feedStatus) + f.LastError = shared.StringValue(lastError) + if lastErrorAt != nil { + f.LastErrorAt = *lastErrorAt + } + f.SourceURL = shared.StringValue(sourceUrl) + f.DomainHost = shared.StringValue(domainHost) + f.DomainTLD = shared.StringValue(domainTLD) + if itemCount != nil { + f.ItemCount = *itemCount + } + if oldestItemDate != nil { + f.OldestItemDate = *oldestItemDate + } + if newestItemDate != nil { + f.NewestItemDate = *newestItemDate + } + if noUpdate != nil { + f.NoUpdate = *noUpdate + } + f.PublishStatus = shared.StringValue(publishStatus) + f.PublishAccount = shared.StringValue(publishAccount) + + feeds = append(feeds, f) + } + return feeds, nil +} + +// GetUnpublishedItemCount returns count of unpublished items for a feed +func (d *Dashboard) GetUnpublishedItemCount(feedURL string) (int, error) { + var count int + err := d.db.QueryRow(` + SELECT COUNT(*) FROM items WHERE feed_url = $1 AND published_at IS NULL + `, feedURL).Scan(&count) + return count, err +} + +// GetPublishCandidates returns feeds pending review that have items +func (d *Dashboard) GetPublishCandidates(limit int) ([]*shared.Feed, error) { + rows, err := d.db.Query(` + SELECT url, type, category, title, description, domain_host, domain_tld, item_count + FROM feeds + WHERE publish_status = 'hold' + AND status = 'pass' + AND item_count > 0 + AND language = 'en' + ORDER BY item_count DESC + LIMIT $1 + `, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var feeds []*shared.Feed + for rows.Next() { + f := &shared.Feed{} + var category, title, description, domainHost, domainTLD *string + var itemCount *int + if err := rows.Scan(&f.URL, &f.Type, &category, &title, &description, &domainHost, &domainTLD, &itemCount); err != nil { + continue + } + f.Category = shared.StringValue(category) + if f.Category == "" { + f.Category = "main" + } + f.Title = shared.StringValue(title) + f.Description = shared.StringValue(description) + f.DomainHost = shared.StringValue(domainHost) + f.DomainTLD = shared.StringValue(domainTLD) + if itemCount != nil { + f.ItemCount = *itemCount + } + feeds = append(feeds, f) + } + return feeds, nil +} + +// GetUnpublishedItems returns unpublished items for a feed +func (d *Dashboard) GetUnpublishedItems(feedURL string, limit int) ([]*shared.Item, error) { + rows, err := d.db.Query(` + SELECT feed_url, guid, title, link, description, content, author, pub_date, discovered_at, updated_at + FROM items + WHERE feed_url = $1 AND published_at IS NULL + ORDER BY pub_date ASC + LIMIT $2 + `, feedURL, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var items []*shared.Item + for rows.Next() { + var item shared.Item + var guid, title, link, description, content, author *string + var pubDate, discoveredAt, updatedAt *time.Time + if err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil { + continue + } + item.GUID = shared.StringValue(guid) + item.Title = shared.StringValue(title) + item.Link = shared.StringValue(link) + item.Description = shared.StringValue(description) + item.Content = shared.StringValue(content) + item.Author = shared.StringValue(author) + if pubDate != nil { + item.PubDate = *pubDate + } + if discoveredAt != nil { + item.DiscoveredAt = *discoveredAt + } + if updatedAt != nil { + item.UpdatedAt = *updatedAt + } + items = append(items, &item) + } + return items, nil +} + +// getFeed retrieves a single feed by URL +func (d *Dashboard) getFeed(feedURL string) (*shared.Feed, error) { + f := &shared.Feed{} + var category, title, description, language, siteUrl *string + var lastCheckedAt, nextCheckAt, lastBuildDate, lastErrorAt *time.Time + var status, lastError *string + var sourceUrl, domainHost, domainTLD *string + var itemCount, noUpdate *int + var oldestItemDate, newestItemDate *time.Time + var publishStatus, publishAccount *string + var etag, lastModified *string + + err := d.db.QueryRow(` + SELECT url, type, category, title, description, language, site_url, + discovered_at, last_checked_at, next_check_at, last_build_date, + etag, last_modified, + status, last_error, last_error_at, + source_url, domain_host, domain_tld, + item_count, oldest_item_date, newest_item_date, no_update, + publish_status, publish_account + FROM feeds WHERE url = $1 + `, feedURL).Scan(&f.URL, &f.Type, &category, &title, &description, &language, &siteUrl, + &f.DiscoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, + &etag, &lastModified, + &status, &lastError, &lastErrorAt, + &sourceUrl, &domainHost, &domainTLD, + &itemCount, &oldestItemDate, &newestItemDate, &noUpdate, + &publishStatus, &publishAccount) + if err != nil { + return nil, err + } + + f.Category = shared.StringValue(category) + f.Title = shared.StringValue(title) + f.Description = shared.StringValue(description) + f.Language = shared.StringValue(language) + f.SiteURL = shared.StringValue(siteUrl) + if lastCheckedAt != nil { + f.LastCheckedAt = *lastCheckedAt + } + if nextCheckAt != nil { + f.NextCheckAt = *nextCheckAt + } + if lastBuildDate != nil { + f.LastBuildDate = *lastBuildDate + } + f.ETag = shared.StringValue(etag) + f.LastModified = shared.StringValue(lastModified) + f.Status = shared.StringValue(status) + f.LastError = shared.StringValue(lastError) + if lastErrorAt != nil { + f.LastErrorAt = *lastErrorAt + } + f.SourceURL = shared.StringValue(sourceUrl) + f.DomainHost = shared.StringValue(domainHost) + f.DomainTLD = shared.StringValue(domainTLD) + if itemCount != nil { + f.ItemCount = *itemCount + } + if oldestItemDate != nil { + f.OldestItemDate = *oldestItemDate + } + if newestItemDate != nil { + f.NewestItemDate = *newestItemDate + } + if noUpdate != nil { + f.NoUpdate = *noUpdate + } + f.PublishStatus = shared.StringValue(publishStatus) + f.PublishAccount = shared.StringValue(publishAccount) + + return f, nil +} + +// API Handlers + +// handleAPIEnablePublish sets a feed's publish status to 'pass' (database only, no PDS account creation) +func (d *Dashboard) handleAPIEnablePublish(w http.ResponseWriter, r *http.Request) { + feedURL := r.URL.Query().Get("url") + account := r.URL.Query().Get("account") + if feedURL == "" { + http.Error(w, "url parameter required", http.StatusBadRequest) + return + } + + feedURL = shared.NormalizeURL(feedURL) + + // Auto-derive account handle if not provided + if account == "" { + account = shared.DeriveHandleFromFeed(feedURL) + if account == "" { + http.Error(w, "could not derive account handle from URL", http.StatusBadRequest) + return + } + } + + // Check feed exists + feed, err := d.getFeed(feedURL) + if err != nil { + http.Error(w, "feed not found", http.StatusNotFound) + return + } + if feed == nil { + http.Error(w, "feed not found", http.StatusNotFound) + return + } + + if err := d.SetPublishStatus(feedURL, "pass", account); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + // Get unpublished count + count, _ := d.GetUnpublishedItemCount(feedURL) + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "status": "pass", + "url": feedURL, + "account": account, + "unpublished_items": count, + "note": "PDS account must be created via publisher service", + }) +} + +// handleAPIDeriveHandle shows what handle would be derived from a feed URL +func (d *Dashboard) handleAPIDeriveHandle(w http.ResponseWriter, r *http.Request) { + feedURL := r.URL.Query().Get("url") + if feedURL == "" { + http.Error(w, "url parameter required", http.StatusBadRequest) + return + } + + handle := shared.DeriveHandleFromFeed(feedURL) + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "url": feedURL, + "handle": handle, + }) +} + +// handleAPIDisablePublish sets a feed's publish status to 'skip' +func (d *Dashboard) handleAPIDisablePublish(w http.ResponseWriter, r *http.Request) { + feedURL := r.URL.Query().Get("url") + if feedURL == "" { + http.Error(w, "url parameter required", http.StatusBadRequest) + return + } + + feedURL = shared.NormalizeURL(feedURL) + + if err := d.SetPublishStatus(feedURL, "skip", ""); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "status": "skip", + "url": feedURL, + }) +} + +// handleAPIPublishEnabled returns all feeds with publish status 'pass' +func (d *Dashboard) handleAPIPublishEnabled(w http.ResponseWriter, r *http.Request) { + feeds, err := d.GetFeedsByPublishStatus("pass") + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + type FeedPublishInfo struct { + URL string `json:"url"` + Title string `json:"title"` + Account string `json:"account"` + UnpublishedCount int `json:"unpublished_count"` + } + + var result []FeedPublishInfo + for _, f := range feeds { + count, _ := d.GetUnpublishedItemCount(f.URL) + result = append(result, FeedPublishInfo{ + URL: f.URL, + Title: f.Title, + Account: f.PublishAccount, + UnpublishedCount: count, + }) + } + + if result == nil { + result = []FeedPublishInfo{} + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(result) +} + +// handleAPIPublishDenied returns all feeds with publish status 'skip' +func (d *Dashboard) handleAPIPublishDenied(w http.ResponseWriter, r *http.Request) { + feeds, err := d.GetFeedsByPublishStatus("skip") + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + type FeedDeniedInfo struct { + URL string `json:"url"` + Title string `json:"title"` + SourceHost string `json:"source_host"` + } + + var result []FeedDeniedInfo + for _, f := range feeds { + result = append(result, FeedDeniedInfo{ + URL: f.URL, + Title: f.Title, + SourceHost: f.DomainHost, + }) + } + + if result == nil { + result = []FeedDeniedInfo{} + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(result) +} + +// handleAPIPublishCandidates returns feeds pending review that have items +func (d *Dashboard) handleAPIPublishCandidates(w http.ResponseWriter, r *http.Request) { + limit := 50 + if l := r.URL.Query().Get("limit"); l != "" { + fmt.Sscanf(l, "%d", &limit) + if limit > 200 { + limit = 200 + } + } + + feeds, err := d.GetPublishCandidates(limit) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + type CandidateInfo struct { + URL string `json:"url"` + Title string `json:"title"` + Category string `json:"category"` + SourceHost string `json:"source_host"` + ItemCount int `json:"item_count"` + DerivedHandle string `json:"derived_handle"` + } + + var result []CandidateInfo + for _, f := range feeds { + result = append(result, CandidateInfo{ + URL: f.URL, + Title: f.Title, + Category: f.Category, + SourceHost: f.DomainHost, + ItemCount: f.ItemCount, + DerivedHandle: shared.DeriveHandleFromFeed(f.URL), + }) + } + + if result == nil { + result = []CandidateInfo{} + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(result) +} + +// handleAPISetPublishStatus sets the publish status for a feed (database only) +func (d *Dashboard) handleAPISetPublishStatus(w http.ResponseWriter, r *http.Request) { + feedURL := r.URL.Query().Get("url") + status := r.URL.Query().Get("status") + account := r.URL.Query().Get("account") + + if feedURL == "" { + http.Error(w, "url parameter required", http.StatusBadRequest) + return + } + if status != "pass" && status != "skip" && status != "hold" { + http.Error(w, "status must be 'pass', 'hold', or 'skip' (use publisher service for 'drop')", http.StatusBadRequest) + return + } + + feedURL = shared.NormalizeURL(feedURL) + + result := map[string]interface{}{ + "url": feedURL, + "status": status, + } + + // Handle 'pass' - set account + if status == "pass" { + if account == "" { + account = shared.DeriveHandleFromFeed(feedURL) + } + result["account"] = account + result["note"] = "PDS account must be created via publisher service" + } + + // Handle 'hold' and 'skip' - preserve current account + if status == "hold" || status == "skip" { + feed, _ := d.getFeed(feedURL) + if feed != nil { + account = feed.PublishAccount + } + } + + if err := d.SetPublishStatus(feedURL, status, account); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + result["account"] = account + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(result) +} + +// handleAPIUnpublishedItems returns unpublished items for a feed +func (d *Dashboard) handleAPIUnpublishedItems(w http.ResponseWriter, r *http.Request) { + feedURL := r.URL.Query().Get("url") + if feedURL == "" { + http.Error(w, "url parameter required", http.StatusBadRequest) + return + } + + limit := 50 + if l := r.URL.Query().Get("limit"); l != "" { + fmt.Sscanf(l, "%d", &limit) + if limit > 200 { + limit = 200 + } + } + + items, err := d.GetUnpublishedItems(feedURL, limit) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if items == nil { + items = []*shared.Item{} + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(items) +} + +// The following handlers require PDS interaction and should use the publisher service + +func (d *Dashboard) handleAPITestPublish(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Publishing requires the publisher service. This endpoint is not available in standalone dashboard.", http.StatusNotImplemented) +} + +func (d *Dashboard) handleAPIPublishFeed(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Publishing requires the publisher service. This endpoint is not available in standalone dashboard.", http.StatusNotImplemented) +} + +func (d *Dashboard) handleAPICreateAccount(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Account creation requires the publisher service. This endpoint is not available in standalone dashboard.", http.StatusNotImplemented) +} + +func (d *Dashboard) handleAPIPublishFeedFull(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Publishing requires the publisher service. This endpoint is not available in standalone dashboard.", http.StatusNotImplemented) +} + +func (d *Dashboard) handleAPIUpdateProfile(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Profile updates require the publisher service. This endpoint is not available in standalone dashboard.", http.StatusNotImplemented) +} + +func (d *Dashboard) handleAPIResetAllPublishing(w http.ResponseWriter, r *http.Request) { + http.Error(w, "This destructive operation requires the publisher service. This endpoint is not available in standalone dashboard.", http.StatusNotImplemented) +} + +func (d *Dashboard) handleAPIRefreshProfiles(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Profile refresh requires the publisher service. This endpoint is not available in standalone dashboard.", http.StatusNotImplemented) +} diff --git a/api_search.go b/api_search.go new file mode 100644 index 0000000..1249c0d --- /dev/null +++ b/api_search.go @@ -0,0 +1,312 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "strings" + "time" + + "github.com/1440news/shared" + "github.com/jackc/pgx/v5" +) + +// SearchResult represents a search result with feed and matching items +type SearchResult struct { + Feed SearchFeed `json:"feed"` + Items []SearchItem `json:"items"` +} + +type SearchFeed struct { + URL string `json:"url"` + Type string `json:"type"` + Category string `json:"category"` + Title string `json:"title"` + Description string `json:"description"` + Language string `json:"language"` + SiteURL string `json:"site_url"` + DiscoveredAt string `json:"discovered_at"` + LastCheckedAt string `json:"last_checked_at"` + NextCheckAt string `json:"next_check_at"` + LastBuildDate string `json:"last_build_date"` + Status string `json:"status"` + LastError string `json:"last_error"` + LastErrorAt string `json:"last_error_at"` + SourceURL string `json:"source_url"` + SourceHost string `json:"source_host"` + TLD string `json:"tld"` + ItemCount int `json:"item_count"` + OldestItemDate string `json:"oldest_item_date"` + NewestItemDate string `json:"newest_item_date"` + NoUpdate bool `json:"no_update"` +} + +type SearchItem struct { + FeedURL string `json:"feed_url"` + GUID string `json:"guid"` + Title string `json:"title"` + Link string `json:"link"` + Description string `json:"description"` + Content string `json:"content"` + Author string `json:"author"` + PubDate string `json:"pub_date"` + DiscoveredAt string `json:"discovered_at"` + UpdatedAt string `json:"updated_at"` +} + +func (d *Dashboard) handleAPISearch(w http.ResponseWriter, r *http.Request) { + query := r.URL.Query().Get("q") + if query == "" { + http.Error(w, "q parameter required", http.StatusBadRequest) + return + } + + limit := 100 + if l := r.URL.Query().Get("limit"); l != "" { + fmt.Sscanf(l, "%d", &limit) + if limit > 500 { + limit = 500 + } + } + + // Results map: feedURL -> SearchResult + results := make(map[string]*SearchResult) + + // Helper to scan feed row into SearchFeed + scanFeed := func(rows pgx.Rows) (string, SearchFeed, bool) { + var url string + var feedType, category, title, description, language, siteUrl *string + var discoveredAt time.Time + var lastCheckedAt, nextCheckAt, lastBuildDate *time.Time + var itemCount *int + var status, lastError *string + var lastErrorAt *time.Time + var sourceUrl, sourceHost, tld *string + var oldestItemDate, newestItemDate *time.Time + var noUpdate *bool + + if err := rows.Scan(&url, &feedType, &category, &title, &description, &language, &siteUrl, + &discoveredAt, &lastCheckedAt, &nextCheckAt, &lastBuildDate, + &status, &lastError, &lastErrorAt, + &sourceUrl, &sourceHost, &tld, + &itemCount, &oldestItemDate, &newestItemDate, &noUpdate); err != nil { + return "", SearchFeed{}, false + } + cat := shared.StringValue(category) + if cat == "" { + cat = "main" + } + sf := SearchFeed{ + URL: url, + Type: shared.StringValue(feedType), + Category: cat, + Title: shared.StringValue(title), + Description: shared.StringValue(description), + Language: shared.StringValue(language), + SiteURL: shared.StringValue(siteUrl), + DiscoveredAt: discoveredAt.Format(time.RFC3339), + Status: shared.StringValue(status), + LastError: shared.StringValue(lastError), + SourceURL: shared.StringValue(sourceUrl), + SourceHost: shared.StringValue(sourceHost), + TLD: shared.StringValue(tld), + } + if lastCheckedAt != nil { + sf.LastCheckedAt = lastCheckedAt.Format(time.RFC3339) + } + if nextCheckAt != nil { + sf.NextCheckAt = nextCheckAt.Format(time.RFC3339) + } + if lastBuildDate != nil { + sf.LastBuildDate = lastBuildDate.Format(time.RFC3339) + } + if lastErrorAt != nil { + sf.LastErrorAt = lastErrorAt.Format(time.RFC3339) + } + if itemCount != nil { + sf.ItemCount = *itemCount + } + if oldestItemDate != nil { + sf.OldestItemDate = oldestItemDate.Format(time.RFC3339) + } + if newestItemDate != nil { + sf.NewestItemDate = newestItemDate.Format(time.RFC3339) + } + if noUpdate != nil { + sf.NoUpdate = *noUpdate + } + return url, sf, true + } + + // Search feeds by source_host (LIKE search for domain matching) + // Use LOWER() to leverage trigram index + lowerPattern := "%" + strings.ToLower(query) + "%" + hostRows, err := d.db.Query(` + SELECT url, type, category, title, description, language, site_url, + discovered_at, last_checked_at, next_check_at, last_build_date, + status, last_error, last_error_at, + source_url, domain_host as source_host, domain_tld as tld, + item_count, oldest_item_date, newest_item_date, no_update + FROM feeds + WHERE LOWER(domain_host) LIKE $1 OR LOWER(url) LIKE $1 + LIMIT $2 + `, lowerPattern, limit) + if err == nil { + defer hostRows.Close() + for hostRows.Next() { + if url, feed, ok := scanFeed(hostRows); ok { + if _, exists := results[url]; !exists { + results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}} + } + } + } + } + + // Search feeds via full-text search + tsQuery := shared.ToSearchQuery(query) + feedRows, err := d.db.Query(` + SELECT url, type, category, title, description, language, site_url, + discovered_at, last_checked_at, next_check_at, last_build_date, + status, last_error, last_error_at, + source_url, domain_host as source_host, domain_tld as tld, + item_count, oldest_item_date, newest_item_date, no_update + FROM feeds + WHERE search_vector @@ to_tsquery('english', $1) + LIMIT $2 + `, tsQuery, limit) + if err == nil { + defer feedRows.Close() + for feedRows.Next() { + if url, feed, ok := scanFeed(feedRows); ok { + if _, exists := results[url]; !exists { + results[url] = &SearchResult{Feed: feed, Items: []SearchItem{}} + } + } + } + } + + // Search items via full-text search + itemRows, err := d.db.Query(` + SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, i.author, i.pub_date, i.discovered_at, i.updated_at + FROM items i + WHERE i.search_vector @@ to_tsquery('english', $1) + ORDER BY i.pub_date DESC + LIMIT $2 + `, tsQuery, limit) + if err == nil { + defer itemRows.Close() + for itemRows.Next() { + var feedUrl string + var guid, title, link, description, content, author *string + var pubDate, discoveredAt, updatedAt *time.Time + if err := itemRows.Scan(&feedUrl, &guid, &title, &link, &description, &content, &author, &pubDate, &discoveredAt, &updatedAt); err != nil { + continue + } + + item := SearchItem{ + FeedURL: feedUrl, + GUID: shared.StringValue(guid), + Title: shared.StringValue(title), + Link: shared.StringValue(link), + Description: shared.StringValue(description), + Content: shared.StringValue(content), + Author: shared.StringValue(author), + } + if pubDate != nil { + item.PubDate = pubDate.Format(time.RFC3339) + } + if discoveredAt != nil { + item.DiscoveredAt = discoveredAt.Format(time.RFC3339) + } + if updatedAt != nil { + item.UpdatedAt = updatedAt.Format(time.RFC3339) + } + + // Add to existing result or create new one + if result, exists := results[feedUrl]; exists { + result.Items = append(result.Items, item) + } else { + // Fetch feed info for this item's feed + var fType, fCategory, fTitle, fDesc, fLang, fSiteUrl *string + var fDiscoveredAt time.Time + var fLastCheckedAt, fNextCheckAt, fLastBuildDate *time.Time + var fItemCount *int + var fStatus, fLastError *string + var fLastErrorAt *time.Time + var fSourceUrl, fSourceHost, fTLD *string + var fOldestItemDate, fNewestItemDate *time.Time + var fNoUpdate *bool + + d.db.QueryRow(` + SELECT type, category, title, description, language, site_url, + discovered_at, last_checked_at, next_check_at, last_build_date, + status, last_error, last_error_at, + source_url, domain_host as source_host, domain_tld as tld, + item_count, oldest_item_date, newest_item_date, no_update + FROM feeds WHERE url = $1 + `, feedUrl).Scan(&fType, &fCategory, &fTitle, &fDesc, &fLang, &fSiteUrl, + &fDiscoveredAt, &fLastCheckedAt, &fNextCheckAt, &fLastBuildDate, + &fStatus, &fLastError, &fLastErrorAt, + &fSourceUrl, &fSourceHost, &fTLD, + &fItemCount, &fOldestItemDate, &fNewestItemDate, &fNoUpdate) + + fCat := shared.StringValue(fCategory) + if fCat == "" { + fCat = "main" + } + sf := SearchFeed{ + URL: feedUrl, + Type: shared.StringValue(fType), + Category: fCat, + Title: shared.StringValue(fTitle), + Description: shared.StringValue(fDesc), + Language: shared.StringValue(fLang), + SiteURL: shared.StringValue(fSiteUrl), + DiscoveredAt: fDiscoveredAt.Format(time.RFC3339), + Status: shared.StringValue(fStatus), + LastError: shared.StringValue(fLastError), + SourceURL: shared.StringValue(fSourceUrl), + SourceHost: shared.StringValue(fSourceHost), + TLD: shared.StringValue(fTLD), + } + if fLastCheckedAt != nil { + sf.LastCheckedAt = fLastCheckedAt.Format(time.RFC3339) + } + if fNextCheckAt != nil { + sf.NextCheckAt = fNextCheckAt.Format(time.RFC3339) + } + if fLastBuildDate != nil { + sf.LastBuildDate = fLastBuildDate.Format(time.RFC3339) + } + if fLastErrorAt != nil { + sf.LastErrorAt = fLastErrorAt.Format(time.RFC3339) + } + if fItemCount != nil { + sf.ItemCount = *fItemCount + } + if fOldestItemDate != nil { + sf.OldestItemDate = fOldestItemDate.Format(time.RFC3339) + } + if fNewestItemDate != nil { + sf.NewestItemDate = fNewestItemDate.Format(time.RFC3339) + } + if fNoUpdate != nil { + sf.NoUpdate = *fNoUpdate + } + results[feedUrl] = &SearchResult{ + Feed: sf, + Items: []SearchItem{item}, + } + } + } + } + + // Convert map to slice + var resultList []SearchResult + for _, r := range results { + resultList = append(resultList, *r) + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resultList) +} diff --git a/docker-compose.yml b/docker-compose.yml index b72136f..34aac5a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,10 @@ services: - dashboard: + watcher: build: context: .. - dockerfile: dashboard/Dockerfile - image: atproto-1440news-dashboard - container_name: atproto-1440news-dashboard + dockerfile: watcher/Dockerfile + image: atproto-1440news-watcher + container_name: atproto-1440news-watcher restart: unless-stopped stop_grace_period: 30s env_file: @@ -23,21 +23,21 @@ services: - atproto labels: - "traefik.enable=true" - # Production: HTTPS with Let's Encrypt for dashboard.1440.news - - "traefik.http.routers.dashboard-1440.rule=Host(`dashboard.1440.news`)" - - "traefik.http.routers.dashboard-1440.entrypoints=https" - - "traefik.http.routers.dashboard-1440.tls.certresolver=letsencrypt-dns" + # Production: HTTPS for dashboard.1440.news and watcher.1440.news + - "traefik.http.routers.watcher-1440.rule=Host(`dashboard.1440.news`) || Host(`watcher.1440.news`)" + - "traefik.http.routers.watcher-1440.entrypoints=https" + - "traefik.http.routers.watcher-1440.tls.certresolver=letsencrypt-dns" # Production: HTTP to HTTPS redirect - - "traefik.http.routers.dashboard-1440-redirect.rule=Host(`dashboard.1440.news`)" - - "traefik.http.routers.dashboard-1440-redirect.entrypoints=http" - - "traefik.http.routers.dashboard-1440-redirect.middlewares=https-redirect" + - "traefik.http.routers.watcher-1440-redirect.rule=Host(`dashboard.1440.news`) || Host(`watcher.1440.news`)" + - "traefik.http.routers.watcher-1440-redirect.entrypoints=http" + - "traefik.http.routers.watcher-1440-redirect.middlewares=https-redirect" - "traefik.http.middlewares.https-redirect.redirectscheme.scheme=https" - "traefik.http.middlewares.https-redirect.redirectscheme.permanent=true" # Local development - - "traefik.http.routers.dashboard-1440-local.rule=Host(`dashboard.1440.localhost`)" - - "traefik.http.routers.dashboard-1440-local.entrypoints=http" + - "traefik.http.routers.watcher-1440-local.rule=Host(`dashboard.1440.localhost`) || Host(`watcher.1440.localhost`)" + - "traefik.http.routers.watcher-1440-local.entrypoints=http" # Shared service - - "traefik.http.services.dashboard-1440.loadbalancer.server.port=4321" + - "traefik.http.services.watcher-1440.loadbalancer.server.port=4321" secrets: db_password: diff --git a/go.mod b/go.mod index 906afbc..2042738 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/1440news/dashboard +module github.com/1440news/watcher go 1.24.0 diff --git a/oauth.env b/oauth.env new file mode 100644 index 0000000..d1fe1ca --- /dev/null +++ b/oauth.env @@ -0,0 +1,3 @@ +# OAuth Configuration for 1440.news Dashboard +OAUTH_COOKIE_SECRET=3d020d6988f954463e659d727e878374eee29062fbc3279377b8af2fa3c9e2bf +OAUTH_PRIVATE_JWK={"crv":"P-256","d":"WjBTrsQ7fmpw1eX0WoC0hSXX0JlXQU1t7Lly0ozRslU","kid":"1769802331","kty":"EC","x":"2EWqW7sIIS_7qffTRcKx7LoSg2wJZYRDu7jWwyFB11k","y":"M1XR_SjyxqKJ0TlMUWnjJhYympUiIsLQVuN5iQg2spg"}