diff --git a/CLAUDE.md b/CLAUDE.md index 1c2ec06..61745f5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,13 +47,14 @@ Multi-file Go application that crawls websites for RSS/Atom feeds, stores them i ### Concurrent Loops (main.go) -The application runs six independent goroutine loops: +The application runs five independent goroutine loops: - **Import loop** - Reads `vertices.txt.gz` and inserts domains into DB in batches of 100 (status='pass') - **Crawl loop** - Worker pool crawls approved domains for feed discovery - **Feed check loop** - Worker pool re-checks known feeds for updates (conditional HTTP) - **Stats loop** - Updates cached dashboard statistics every minute - **Cleanup loop** - Removes items older than 12 months (weekly) -- **Publish loop** - Autopublishes items from approved feeds to AT Protocol PDS + +Note: Publishing is handled by the separate `publish` service. ### File Structure @@ -67,7 +68,6 @@ The application runs six independent goroutine loops: | `util.go` | URL normalization, host utilities, TLD extraction | | `db.go` | PostgreSQL schema (domains, feeds, items tables with tsvector FTS) | | `dashboard.go` | HTTP server, JSON APIs, HTML template | -| `publisher.go` | AT Protocol PDS integration for posting items | | `oauth.go` | OAuth 2.0 client wrapper for AT Protocol authentication | | `oauth_session.go` | Session management with AES-256-GCM encrypted cookies | | `oauth_middleware.go` | RequireAuth middleware for protecting routes | diff --git a/crawler.go b/crawler.go index 0be1c46..fe0dad6 100644 --- a/crawler.go +++ b/crawler.go @@ -3,12 +3,10 @@ package main import ( "context" "crypto/tls" - "encoding/json" "fmt" "io" "net" "net/http" - "os" "strings" "sync" "sync/atomic" @@ -147,365 +145,6 @@ func (c *Crawler) StartMaintenanceLoop() { } } -// StartPublishLoop automatically publishes unpublished items for approved feeds -// Grabs up to 50 items sorted by discovered_at, publishes one per second, then reloops -func (c *Crawler) StartPublishLoop() { - // Load PDS credentials from environment or pds.env file - pdsHost := os.Getenv("PDS_HOST") - pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD") - - if pdsHost == "" || pdsAdminPassword == "" { - if data, err := os.ReadFile("pds.env"); err == nil { - for _, line := range strings.Split(string(data), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "#") || line == "" { - continue - } - parts := strings.SplitN(line, "=", 2) - if len(parts) == 2 { - key := strings.TrimSpace(parts[0]) - value := strings.TrimSpace(parts[1]) - switch key { - case "PDS_HOST": - pdsHost = value - case "PDS_ADMIN_PASSWORD": - pdsAdminPassword = value - } - } - } - } - } - - if pdsHost == "" || pdsAdminPassword == "" { - fmt.Println("Publish loop: PDS credentials not configured, skipping") - return - } - - fmt.Printf("Publish loop: starting with PDS %s\n", pdsHost) - feedPassword := "feed1440!" - - // Cache sessions per account - sessions := make(map[string]*PDSSession) - publisher := NewPublisher(pdsHost) - - // Refresh existing account profiles on startup - c.RefreshAllProfiles(publisher, feedPassword) - - for { - if c.IsShuttingDown() { - return - } - - // Get up to 50 unpublished items from approved feeds, sorted by discovered_at ASC - items, err := c.GetAllUnpublishedItems(50) - if err != nil { - fmt.Printf("Publish loop error: %v\n", err) - time.Sleep(1 * time.Second) - continue - } - - if len(items) == 0 { - time.Sleep(1 * time.Second) - continue - } - - // Publish one item per second - for _, item := range items { - if c.IsShuttingDown() { - return - } - // Get or create session for this feed's account - account := c.getAccountForFeed(item.FeedURL) - if account == "" { - time.Sleep(1 * time.Second) - continue - } - - session, ok := sessions[account] - if !ok { - // Try to log in - session, err = publisher.CreateSession(account, feedPassword) - if err != nil { - // Account might not exist - try to create it - inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1) - if err != nil { - fmt.Printf("Publish: failed to create invite for %s: %v\n", account, err) - time.Sleep(1 * time.Second) - continue - } - - email := account + "@1440.news" - session, err = publisher.CreateAccount(account, email, feedPassword, inviteCode) - if err != nil { - fmt.Printf("Publish: failed to create account %s: %v\n", account, err) - time.Sleep(1 * time.Second) - continue - } - fmt.Printf("Publish: created account %s\n", account) - c.db.Exec("UPDATE feeds SET publish_account = $1 WHERE url = $2", account, item.FeedURL) - - // Set up profile for new account - feedInfo := c.getFeedInfo(item.FeedURL) - if feedInfo != nil { - displayName := feedInfo.Title - if displayName == "" { - displayName = account - } - // Build description with feed URL (strip HTML tags) - description := stripHTML(feedInfo.Description) - if description == "" { - description = "News feed via 1440.news" - } - // Add feed URL as first line of description - feedURLFull := "https://" + item.FeedURL - description = feedURLFull + "\n\n" + description - // Truncate if needed - if len(displayName) > 64 { - displayName = displayName[:61] + "..." - } - if len(description) > 256 { - description = description[:253] + "..." - } - // Fetch and upload favicon as avatar - var avatar *BlobRef - faviconSource := feedInfo.SiteURL - if faviconSource == "" { - // Fallback to deriving from feed URL - faviconSource = feedInfo.SourceHost - } - if faviconSource != "" { - faviconURL := publisher.FetchFavicon(faviconSource) - if faviconURL != "" { - avatar = publisher.fetchAndUploadImage(session, faviconURL) - } - } - if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil { - fmt.Printf("Publish: failed to set profile for %s: %v\n", account, err) - } else { - fmt.Printf("Publish: set profile for %s\n", account) - } - - // Have directory account follow this new account - if err := publisher.FollowAsDirectory(session.DID); err != nil { - fmt.Printf("Publish: directory follow failed for %s: %v\n", account, err) - } else { - fmt.Printf("Publish: directory now following %s\n", account) - } - } - } - sessions[account] = session - } - - // Shorten URLs before publishing - itemToPublish := item - if item.Link != "" { - if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil { - fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL) - itemToPublish.Link = shortURL - } else { - fmt.Printf("Publish: short URL failed for %s: %v\n", item.Link[:min(40, len(item.Link))], err) - } - } - - // Publish the item - uri, err := publisher.PublishItem(session, &itemToPublish) - if err != nil { - fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err) - // Clear session cache on auth errors - if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") { - delete(sessions, account) - } - } else { - c.MarkItemPublished(item.FeedURL, item.GUID, uri) - fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account) - } - - time.Sleep(1 * time.Second) - } - - time.Sleep(1 * time.Second) - } -} - -// getAccountForFeed returns the publish account for a feed URL -func (c *Crawler) getAccountForFeed(feedURL string) string { - var account *string - err := c.db.QueryRow(` - SELECT publish_account FROM feeds - WHERE url = $1 AND publish_status = 'pass' AND status = 'pass' - `, feedURL).Scan(&account) - if err != nil || account == nil || *account == "" { - // Derive handle from feed URL - return DeriveHandleFromFeed(feedURL) - } - return *account -} - -// FeedInfo holds basic feed metadata for profile setup -type FeedInfo struct { - Title string - Description string - SiteURL string - SourceHost string -} - -// getFeedInfo returns feed metadata for profile setup -func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo { - var title, description, siteURL, sourceHost *string - err := c.db.QueryRow(` - SELECT title, description, site_url, domain_host as source_host FROM feeds WHERE url = $1 - `, feedURL).Scan(&title, &description, &siteURL, &sourceHost) - if err != nil { - return nil - } - return &FeedInfo{ - Title: StringValue(title), - Description: StringValue(description), - SiteURL: StringValue(siteURL), - SourceHost: StringValue(sourceHost), - } -} - -// RefreshAllProfiles updates profiles for all existing accounts with feed URLs -func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) { - rows, err := c.db.Query(` - SELECT url, title, description, site_url, domain_host as source_host, publish_account - FROM feeds - WHERE publish_account IS NOT NULL AND publish_account <> '' - `) - if err != nil { - fmt.Printf("RefreshProfiles: query error: %v\n", err) - return - } - defer rows.Close() - - for rows.Next() { - var feedURL, account string - var title, description, siteURL, sourceHost *string - if err := rows.Scan(&feedURL, &title, &description, &siteURL, &sourceHost, &account); err != nil { - continue - } - - // Login to account - session, err := publisher.CreateSession(account, feedPassword) - if err != nil { - fmt.Printf("RefreshProfiles: login failed for %s: %v\n", account, err) - continue - } - - // Build profile - displayName := StringValue(title) - if displayName == "" { - displayName = account - } - desc := stripHTML(StringValue(description)) - if desc == "" { - desc = "News feed via 1440.news" - } - // Add feed URL as first line - feedURLFull := "https://" + feedURL - desc = feedURLFull + "\n\n" + desc - - // Truncate if needed - if len(displayName) > 64 { - displayName = displayName[:61] + "..." - } - if len(desc) > 256 { - desc = desc[:253] + "..." - } - - // Fetch and upload favicon as avatar - var avatar *BlobRef - faviconSource := StringValue(siteURL) - if faviconSource == "" { - // Fallback to source host - faviconSource = StringValue(sourceHost) - } - if faviconSource != "" { - faviconURL := publisher.FetchFavicon(faviconSource) - if faviconURL != "" { - avatar = publisher.fetchAndUploadImage(session, faviconURL) - } - } - - if err := publisher.UpdateProfile(session, displayName, desc, avatar); err != nil { - fmt.Printf("RefreshProfiles: update failed for %s: %v\n", account, err) - } else { - fmt.Printf("RefreshProfiles: updated %s\n", account) - } - } -} - -// GetAllUnpublishedItems returns unpublished items from all approved feeds -func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) { - rows, err := c.db.Query(` - SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content, - i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags, - i.enclosure_url, i.enclosure_type, i.enclosure_length - FROM items i - JOIN feeds f ON i.feed_url = f.url - WHERE f.publish_status = 'pass' - AND f.status = 'pass' - AND i.published_at IS NULL - ORDER BY i.discovered_at ASC - LIMIT $1 - `, limit) - if err != nil { - return nil, err - } - defer rows.Close() - - var items []Item - for rows.Next() { - var item Item - var guid, title, link, description, content, author, imageURLsJSON, tagsJSON *string - var pubDate, discoveredAt *time.Time - var enclosureURL, enclosureType *string - var enclosureLength *int64 - - err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description, - &content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON, - &enclosureURL, &enclosureType, &enclosureLength) - if err != nil { - continue - } - - item.GUID = StringValue(guid) - item.Title = StringValue(title) - item.Link = StringValue(link) - item.Description = StringValue(description) - item.Content = StringValue(content) - item.Author = StringValue(author) - item.PubDate = TimeValue(pubDate) - item.DiscoveredAt = TimeValue(discoveredAt) - - // Parse image URLs from JSON array - if imageURLsJSON != nil && *imageURLsJSON != "" { - json.Unmarshal([]byte(*imageURLsJSON), &item.ImageURLs) - } - - // Parse tags from JSON array - if tagsJSON != nil && *tagsJSON != "" { - json.Unmarshal([]byte(*tagsJSON), &item.Tags) - } - - // Parse enclosure - if enclosureURL != nil && *enclosureURL != "" { - item.Enclosure = &Enclosure{ - URL: *enclosureURL, - Type: StringValue(enclosureType), - } - if enclosureLength != nil { - item.Enclosure.Length = *enclosureLength - } - } - - items = append(items, item) - } - - return items, nil -} - // dnsResolver uses local caching DNS (infra-dns) with fallback to system var dnsResolver = &net.Resolver{ PreferGo: true, diff --git a/feed.go b/feed.go index a1c879b..a9beecb 100644 --- a/feed.go +++ b/feed.go @@ -452,15 +452,9 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) { } // SetPublishStatus sets the publish status for a feed ('hold', 'pass', 'skip') -// If status is 'pass', the account handle is also set (auto-derived if empty) func (c *Crawler) SetPublishStatus(feedURL, status, account string) error { feedURL = normalizeURL(feedURL) - // Auto-derive account if passing and not provided - if status == "pass" && account == "" { - account = DeriveHandleFromFeed(feedURL) - } - _, err := c.db.Exec(` UPDATE feeds SET publish_status = $1, publish_account = $2 WHERE url = $3 `, status, NullableString(account), feedURL) diff --git a/handle.go b/handle.go deleted file mode 100644 index 4995ba1..0000000 --- a/handle.go +++ /dev/null @@ -1,262 +0,0 @@ -package main - -import ( - "net/url" - "regexp" - "strings" -) - -// DeriveHandleFromFeed generates an AT Protocol handle from a feed URL -// Format: {domain}-{category}.1440.news -// AT Protocol allows up to 63 characters per label, but the PDS -// restricts the first segment to 18 characters for local handles. -// Examples: -// -// feeds.bbci.co.uk/news/technology/rss.xml → bbc-technology.1440.news -// news.ycombinator.com/rss → ycombinator.1440.news -func DeriveHandleFromFeed(feedURL string) string { - const maxSubdomainLen = 18 // PDS limit for first segment - - // Ensure we have a scheme for parsing - if !strings.Contains(feedURL, "://") { - feedURL = "https://" + feedURL - } - - u, err := url.Parse(feedURL) - if err != nil { - return "" - } - - hostname := strings.ToLower(u.Hostname()) - path := strings.ToLower(u.Path) - - // Remove common feed suffixes/extensions - suffixesToRemove := []string{".xml", ".rss", ".atom", ".json", "/rss", "/feed", "/atom", "/index"} - for _, suffix := range suffixesToRemove { - path = strings.TrimSuffix(path, suffix) - } - - // Split path into segments and filter noise - segments := strings.Split(strings.Trim(path, "/"), "/") - skipPathWords := map[string]bool{ - "rss": true, "feed": true, "feeds": true, "atom": true, - "xml": true, "default": true, "index": true, "services": true, - "nyt": true, - } - - var pathParts []string - for _, seg := range segments { - seg = cleanHandleSegment(seg) - if seg != "" && !skipPathWords[seg] { - pathParts = append(pathParts, seg) - } - } - - // Split hostname and extract the meaningful domain - hostParts := strings.Split(hostname, ".") - - // Two-part TLDs to handle specially - twoPartTLDs := map[string]bool{ - "co.uk": true, "com.au": true, "co.nz": true, "co.jp": true, - "com.br": true, "co.in": true, "org.uk": true, "ac.uk": true, - } - - // Check for two-part TLD - if len(hostParts) >= 2 { - possibleTwoPartTLD := hostParts[len(hostParts)-2] + "." + hostParts[len(hostParts)-1] - if twoPartTLDs[possibleTwoPartTLD] { - hostParts = hostParts[:len(hostParts)-2] - } else { - // Single TLD - remove it - singleTLDs := map[string]bool{ - "com": true, "org": true, "net": true, "io": true, - "edu": true, "gov": true, "uk": true, "de": true, "fr": true, - } - if singleTLDs[hostParts[len(hostParts)-1]] { - hostParts = hostParts[:len(hostParts)-1] - } - } - } - - // Skip noise subdomains - skipHostWords := map[string]bool{ - "www": true, "feeds": true, "rss": true, "feed": true, - "api": true, "cdn": true, "static": true, "news": true, - } - - var meaningfulHostParts []string - for _, part := range hostParts { - if !skipHostWords[part] && part != "" { - meaningfulHostParts = append(meaningfulHostParts, part) - } - } - - // Get the main domain (e.g., "bbci", "ycombinator", "nytimes") - var mainDomain string - if len(meaningfulHostParts) > 0 { - mainDomain = meaningfulHostParts[len(meaningfulHostParts)-1] - } else if len(hostParts) > 0 { - mainDomain = hostParts[len(hostParts)-1] - } - - // Special case: "bbci" should become "bbc" - if mainDomain == "bbci" { - mainDomain = "bbc" - } - - // Abbreviations for long category names to fit 18-char limit - categoryAbbrevs := map[string]string{ - "science-and-environment": "sci-env", - "entertainment-and-arts": "ent-arts", - "science-environment": "sci-env", - "entertainment-arts": "ent-arts", - "technology": "tech", - "business": "biz", - "international": "intl", - "environment": "env", - "entertainment": "ent", - "politics": "pol", - } - - // Build subdomain: domain + category (from path) - var subdomain string - if len(pathParts) > 0 { - // Use last meaningful path part as category (e.g., "technology" from /news/technology/) - category := pathParts[len(pathParts)-1] - // Skip generic categories - if category == "news" && len(pathParts) == 1 { - subdomain = mainDomain - } else { - // Try to abbreviate if the full subdomain would be too long - fullSubdomain := mainDomain + "-" + category - if len(fullSubdomain) > maxSubdomainLen { - if abbrev, ok := categoryAbbrevs[category]; ok { - category = abbrev - } - } - subdomain = mainDomain + "-" + category - } - } else { - subdomain = mainDomain - } - - // If still too long, just use main hostname - if len(subdomain) > maxSubdomainLen { - subdomain = mainDomain - } - - // Final safety: truncate if still too long - if len(subdomain) > maxSubdomainLen { - subdomain = subdomain[:maxSubdomainLen] - } - - subdomain = strings.Trim(subdomain, "-") - - // Collapse multiple hyphens - for strings.Contains(subdomain, "--") { - subdomain = strings.ReplaceAll(subdomain, "--", "-") - } - - return subdomain + ".1440.news" -} - -// cleanHandleSegment sanitizes a string for use in an AT Protocol handle segment -// Handle segments must be alphanumeric with hyphens, no leading/trailing hyphens -func cleanHandleSegment(s string) string { - // Remove file extensions - if idx := strings.LastIndex(s, "."); idx > 0 { - s = s[:idx] - } - - // Convert to lowercase - s = strings.ToLower(s) - - // Strip common feed prefixes/suffixes from the segment itself - // e.g., "showrss" → "show", "rssworld" → "world" - feedAffixes := []string{"rss", "feed", "atom", "xml"} - for _, affix := range feedAffixes { - // Strip suffix (e.g., "showrss" → "show") - if strings.HasSuffix(s, affix) && len(s) > len(affix) { - s = strings.TrimSuffix(s, affix) - break - } - // Strip prefix (e.g., "rssworld" → "world") - if strings.HasPrefix(s, affix) && len(s) > len(affix) { - s = strings.TrimPrefix(s, affix) - break - } - } - - // Replace underscores and other separators with hyphens - s = strings.ReplaceAll(s, "_", "-") - s = strings.ReplaceAll(s, " ", "-") - - // Remove any characters that aren't alphanumeric or hyphens - reg := regexp.MustCompile(`[^a-z0-9-]`) - s = reg.ReplaceAllString(s, "") - - // Collapse multiple hyphens - for strings.Contains(s, "--") { - s = strings.ReplaceAll(s, "--", "-") - } - - // Trim leading/trailing hyphens - s = strings.Trim(s, "-") - - return s -} - -// SplitHandle extracts the path prefix and hostname from a derived handle -// Example: show.news.ycombinator.com.1440.news → ("show", "news.ycombinator.com") -func SplitHandle(handle string) (prefix string, hostname string) { - // Remove .1440.news suffix - handle = strings.TrimSuffix(handle, ".1440.news") - - parts := strings.Split(handle, ".") - - // Try to find where hostname starts by looking for valid hostname patterns - if len(parts) >= 2 { - for i := 0; i < len(parts)-1; i++ { - remaining := strings.Join(parts[i:], ".") - if looksLikeHostname(remaining) { - if i > 0 { - prefix = strings.Join(parts[:i], ".") - } - hostname = remaining - return - } - } - } - - // Fallback: no prefix, entire thing is hostname - hostname = handle - return "", hostname -} - -func isLikelyTLDPart(s string) bool { - tlds := map[string]bool{ - "com": true, "org": true, "net": true, "edu": true, "gov": true, - "io": true, "co": true, "uk": true, "de": true, "fr": true, - "jp": true, "au": true, "ca": true, "nl": true, "se": true, - "news": true, "blog": true, "tech": true, "dev": true, - } - return tlds[s] -} - -func isTwoPartTLD(first, second string) bool { - twoPartTLDs := map[string]bool{ - "co.uk": true, "com.au": true, "co.jp": true, "co.nz": true, - "org.uk": true, "net.au": true, "com.br": true, - } - return twoPartTLDs[first+"."+second] -} - -func looksLikeHostname(s string) bool { - // A hostname typically has at least one dot and ends with a TLD-like part - parts := strings.Split(s, ".") - if len(parts) < 2 { - return false - } - lastPart := parts[len(parts)-1] - return isLikelyTLDPart(lastPart) -} diff --git a/image.go b/image.go deleted file mode 100644 index d4b7aea..0000000 --- a/image.go +++ /dev/null @@ -1,381 +0,0 @@ -package main - -import ( - "bytes" - "fmt" - "image" - _ "image/gif" - "image/jpeg" - _ "image/png" - "io" - "net/http" - "net/url" - "strings" - "time" - - "go.deanishe.net/favicon" - "golang.org/x/image/draw" - _ "golang.org/x/image/webp" -) - -// ImageUploadResult contains the uploaded blob and image dimensions -type ImageUploadResult struct { - Blob *BlobRef - Width int - Height int -} - -// uploadImages fetches and uploads up to 4 images, returning BskyImage structs -func (p *Publisher) uploadImages(session *PDSSession, imageURLs []string, altText string) []BskyImage { - var images []BskyImage - maxImages := 4 - if len(imageURLs) < maxImages { - maxImages = len(imageURLs) - } - - for i := 0; i < maxImages; i++ { - result := p.fetchAndUploadImageWithDimensions(session, imageURLs[i]) - if result != nil && result.Blob != nil { - img := BskyImage{ - Alt: altText, - Image: result.Blob, - } - if result.Width > 0 && result.Height > 0 { - img.AspectRatio = &BskyAspectRatio{ - Width: result.Width, - Height: result.Height, - } - } - images = append(images, img) - } - } - - return images -} - -// FetchFavicon tries to get a favicon URL for a site -// Uses go.deanishe.net/favicon library which parses HTML, manifests, and checks common paths -// Returns the favicon URL or empty string if not found -func (p *Publisher) FetchFavicon(siteURL string) string { - if siteURL == "" { - return "" - } - - // Ensure URL has scheme - if !strings.Contains(siteURL, "://") { - siteURL = "https://" + siteURL - } - u, err := url.Parse(siteURL) - if err != nil { - return "" - } - - // Create finder with custom HTTP client - // Note: Don't use IgnoreNoSize as it filters out valid favicon.ico files that don't have size metadata - finder := favicon.New( - favicon.WithClient(p.httpClient), - ) - - // Find icons - library checks HTML tags, manifests, OG images, common paths - icons, err := finder.Find(siteURL) - if err == nil && len(icons) > 0 { - // Filter and score icons for avatar use - // Prefer: square icons, reasonable size, PNG format, actual favicons over OG images - var bestIcon string - var bestScore int - - for _, icon := range icons { - // Skip tiny icons (likely tracking pixels) - if icon.Width > 0 && icon.Width < 32 { - continue - } - - // Skip Open Graph images (meant for link previews, usually wide banners) - lowerURL := strings.ToLower(icon.URL) - if strings.Contains(lowerURL, "og-image") || strings.Contains(lowerURL, "og_image") || - strings.Contains(lowerURL, "opengraph") || strings.Contains(lowerURL, "twitter") { - continue - } - - // Skip wide images (aspect ratio > 1.5 means it's a banner, not an icon) - if icon.Width > 0 && icon.Height > 0 { - ratio := float64(icon.Width) / float64(icon.Height) - if ratio > 1.5 || ratio < 0.67 { - continue - } - } - - // Score the icon - score := 0 - - // Prefer actual favicon paths - if strings.Contains(lowerURL, "favicon") || strings.Contains(lowerURL, "icon") || - strings.Contains(lowerURL, "apple-touch") { - score += 100 - } - - // Prefer PNG over other formats - if icon.MimeType == "image/png" { - score += 50 - } else if icon.MimeType == "image/x-icon" || strings.HasSuffix(lowerURL, ".ico") { - score += 40 - } else if icon.MimeType == "image/jpeg" { - score += 10 // JPEG less preferred for icons - } - - // Prefer larger icons (but not too large) - if icon.Width >= 64 && icon.Width <= 512 { - score += 30 - } else if icon.Width > 0 { - score += 10 - } - - if score > bestScore { - bestScore = score - bestIcon = icon.URL - } - } - - if bestIcon != "" { - return bestIcon - } - - // Fall back to first non-OG icon - for _, icon := range icons { - lowerURL := strings.ToLower(icon.URL) - if !strings.Contains(lowerURL, "og-image") && !strings.Contains(lowerURL, "og_image") { - return icon.URL - } - } - } - - // Fallback to Google's favicon service (reliable, returns PNG) - return fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host) -} - -func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *BlobRef { - result := p.fetchAndUploadImageWithDimensions(session, imageURL) - if result == nil { - return nil - } - return result.Blob -} - -// upgradeImageURL attempts to get a larger version of known CDN image URLs -func upgradeImageURL(imageURL string) string { - // BBC images: /standard/240/ -> /standard/800/ - if strings.Contains(imageURL, "ichef.bbci.co.uk") { - imageURL = strings.Replace(imageURL, "/standard/240/", "/standard/800/", 1) - imageURL = strings.Replace(imageURL, "/standard/480/", "/standard/800/", 1) - } - return imageURL -} - -func (p *Publisher) fetchAndUploadImageWithDimensions(session *PDSSession, imageURL string) *ImageUploadResult { - // Upgrade image URL to larger size if possible - imageURL = upgradeImageURL(imageURL) - - // Fetch the image - resp, err := p.httpClient.Get(imageURL) - if err != nil { - return nil - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil - } - - // Check content type - contentType := resp.Header.Get("Content-Type") - if contentType == "" { - // Try to guess from URL - if strings.HasSuffix(strings.ToLower(imageURL), ".png") { - contentType = "image/png" - } else if strings.HasSuffix(strings.ToLower(imageURL), ".gif") { - contentType = "image/gif" - } else if strings.HasSuffix(strings.ToLower(imageURL), ".webp") { - contentType = "image/webp" - } else { - contentType = "image/jpeg" // Default - } - } - - // Only accept image types - if !strings.HasPrefix(contentType, "image/") { - return nil - } - - // Read image data (limit to 2MB to allow for resize headroom) - data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) - if err != nil || len(data) == 0 { - return nil - } - - // Decode image to get dimensions - imgConfig, _, err := image.DecodeConfig(bytes.NewReader(data)) - width, height := 1, 1 // Default if decode fails - if err == nil { - width, height = imgConfig.Width, imgConfig.Height - } - - // Bluesky blob limit is ~976KB, use 900KB as safe threshold - const maxBlobSize = 900 * 1024 - - // If image is too large, resize it - if len(data) > maxBlobSize { - // Decode the full image for resizing - img, _, err := image.Decode(bytes.NewReader(data)) - if err != nil { - return nil // Can't decode, can't resize - } - - // Scale down iteratively until under limit - scaleFactor := 0.9 // Start with 90% and iterate if needed - - for attempt := 0; attempt < 5; attempt++ { - newWidth := int(float64(width) * scaleFactor) - newHeight := int(float64(height) * scaleFactor) - - // Minimum dimensions - if newWidth < 100 { - newWidth = 100 - } - if newHeight < 100 { - newHeight = 100 - } - - // Create resized image - resized := image.NewRGBA(image.Rect(0, 0, newWidth, newHeight)) - draw.CatmullRom.Scale(resized, resized.Bounds(), img, img.Bounds(), draw.Over, nil) - - // Encode as JPEG - var buf bytes.Buffer - if err := jpeg.Encode(&buf, resized, &jpeg.Options{Quality: 85}); err != nil { - return nil - } - - if buf.Len() <= maxBlobSize { - data = buf.Bytes() - width = newWidth - height = newHeight - contentType = "image/jpeg" - break - } - - // Still too large, reduce scale further - scaleFactor *= 0.8 - } - - // If still too large after 5 attempts, give up - if len(data) > maxBlobSize { - return nil - } - } - - // Upload to PDS - blob, err := p.UploadBlob(session, data, contentType) - if err != nil { - return nil - } - - return &ImageUploadResult{ - Blob: blob, - Width: width, - Height: height, - } -} - -// FetchFavicon downloads a favicon/icon from a URL -// Uses go.deanishe.net/favicon library to find the best icon -// Returns the favicon URL or empty string if not found -func FetchFaviconBytes(siteURL string) ([]byte, string, error) { - if !strings.HasPrefix(siteURL, "http") { - siteURL = "https://" + siteURL - } - - u, err := url.Parse(siteURL) - if err != nil { - return nil, "", err - } - - client := &http.Client{Timeout: 10 * time.Second} - - // Use favicon library to find icons - finder := favicon.New( - favicon.WithClient(client), - favicon.IgnoreNoSize, - ) - - icons, err := finder.Find(siteURL) - if err != nil || len(icons) == 0 { - // Fallback to Google's favicon service - googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host) - return fetchIconBytes(client, googleURL) - } - - // Try icons in order (sorted by size, largest first) - // Prefer PNG/JPEG over ICO - var iconURLs []string - for _, icon := range icons { - if icon.Width > 0 && icon.Width < 32 { - continue // Skip tiny icons - } - if icon.MimeType == "image/png" || icon.MimeType == "image/jpeg" { - iconURLs = append([]string{icon.URL}, iconURLs...) // Prepend PNG/JPEG - } else { - iconURLs = append(iconURLs, icon.URL) - } - } - - // If no good icons, use all of them - if len(iconURLs) == 0 { - for _, icon := range icons { - iconURLs = append(iconURLs, icon.URL) - } - } - - // Try to download each icon - for _, iconURL := range iconURLs { - data, mimeType, err := fetchIconBytes(client, iconURL) - if err == nil && len(data) > 0 { - return data, mimeType, nil - } - } - - // Final fallback to Google - googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host) - return fetchIconBytes(client, googleURL) -} - -// fetchIconBytes downloads an icon and returns its bytes and mime type -func fetchIconBytes(client *http.Client, iconURL string) ([]byte, string, error) { - resp, err := client.Get(iconURL) - if err != nil { - return nil, "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, "", fmt.Errorf("HTTP %d", resp.StatusCode) - } - - data, err := io.ReadAll(resp.Body) - if err != nil { - return nil, "", err - } - - // Determine mime type - contentType := resp.Header.Get("Content-Type") - if contentType == "" { - if strings.HasSuffix(iconURL, ".png") { - contentType = "image/png" - } else if strings.HasSuffix(iconURL, ".ico") { - contentType = "image/x-icon" - } else { - contentType = "image/png" - } - } - - return data, contentType, nil -} diff --git a/main.go b/main.go index d7c9a54..b272b26 100644 --- a/main.go +++ b/main.go @@ -43,9 +43,6 @@ func main() { // TLD sync loop (background) - syncs with IANA, marks dead TLDs, adds new ones go crawler.startTLDSyncLoop() - // Publish loop (background) - autopublishes items for approved feeds - go crawler.StartPublishLoop() - // Domain loop (background) - domain_check + feed_crawl go crawler.StartDomainLoop() diff --git a/pds_auth.go b/pds_auth.go deleted file mode 100644 index 8572ebf..0000000 --- a/pds_auth.go +++ /dev/null @@ -1,187 +0,0 @@ -package main - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - "net/http" - "os" - "time" -) - -// CreateSession authenticates with the PDS and returns a session -func (p *Publisher) CreateSession(handle, password string) (*PDSSession, error) { - payload := map[string]string{ - "identifier": handle, - "password": password, - } - body, err := json.Marshal(payload) - if err != nil { - return nil, err - } - - resp, err := p.httpClient.Post( - p.pdsHost+"/xrpc/com.atproto.server.createSession", - "application/json", - bytes.NewReader(body), - ) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - respBody, _ := io.ReadAll(resp.Body) - return nil, fmt.Errorf("auth failed: %s - %s", resp.Status, string(respBody)) - } - - var session PDSSession - if err := json.NewDecoder(resp.Body).Decode(&session); err != nil { - return nil, err - } - - return &session, nil -} - -// CreateAccount creates a new account on the PDS -// Requires an invite code if the PDS has invites enabled -func (p *Publisher) CreateAccount(handle, email, password, inviteCode string) (*PDSSession, error) { - payload := map[string]interface{}{ - "handle": handle, - "email": email, - "password": password, - } - if inviteCode != "" { - payload["inviteCode"] = inviteCode - } - - body, err := json.Marshal(payload) - if err != nil { - return nil, err - } - - resp, err := p.httpClient.Post( - p.pdsHost+"/xrpc/com.atproto.server.createAccount", - "application/json", - bytes.NewReader(body), - ) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - respBody, _ := io.ReadAll(resp.Body) - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("create account failed: %s - %s", resp.Status, string(respBody)) - } - - var session PDSSession - if err := json.Unmarshal(respBody, &session); err != nil { - return nil, err - } - - return &session, nil -} - -// CreateInviteCode creates an invite code using PDS admin password (Basic Auth) -func (p *Publisher) CreateInviteCode(adminPassword string, useCount int) (string, error) { - payload := map[string]interface{}{ - "useCount": useCount, - } - - body, err := json.Marshal(payload) - if err != nil { - return "", err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.server.createInviteCode", bytes.NewReader(body)) - if err != nil { - return "", err - } - req.Header.Set("Content-Type", "application/json") - // PDS admin APIs use Basic Auth with "admin" as username - req.SetBasicAuth("admin", adminPassword) - - resp, err := p.httpClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - respBody, _ := io.ReadAll(resp.Body) - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("create invite failed: %s - %s", resp.Status, string(respBody)) - } - - var result struct { - Code string `json:"code"` - } - if err := json.Unmarshal(respBody, &result); err != nil { - return "", err - } - - return result.Code, nil -} - -// FollowAccount creates a follow record from the authenticated session to the target DID -func (p *Publisher) FollowAccount(session *PDSSession, targetDID string) error { - // Create follow record - now := time.Now().UTC().Format(time.RFC3339) - record := map[string]interface{}{ - "$type": "app.bsky.graph.follow", - "subject": targetDID, - "createdAt": now, - } - - payload := map[string]interface{}{ - "repo": session.DID, - "collection": "app.bsky.graph.follow", - "record": record, - } - - body, err := json.Marshal(payload) - if err != nil { - return err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body)) - if err != nil { - return err - } - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", "Bearer "+session.AccessJwt) - - resp, err := p.httpClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - respBody, _ := io.ReadAll(resp.Body) - return fmt.Errorf("follow failed: %s - %s", resp.Status, string(respBody)) - } - - return nil -} - -// FollowAsDirectory logs in as the directory account and follows the target DID -func (p *Publisher) FollowAsDirectory(targetDID string) error { - dirHandle := os.Getenv("DIRECTORY_HANDLE") - dirPassword := os.Getenv("DIRECTORY_PASSWORD") - - if dirHandle == "" || dirPassword == "" { - // Silently skip if directory account not configured - return nil - } - - session, err := p.CreateSession(dirHandle, dirPassword) - if err != nil { - return fmt.Errorf("directory login failed: %w", err) - } - - return p.FollowAccount(session, targetDID) -} diff --git a/pds_records.go b/pds_records.go deleted file mode 100644 index 97adae7..0000000 --- a/pds_records.go +++ /dev/null @@ -1,349 +0,0 @@ -package main - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - "net/http" - "net/url" - "strings" -) - -// BlobRef represents a blob reference for profile images -type BlobRef struct { - Type string `json:"$type"` - Ref Link `json:"ref"` - MimeType string `json:"mimeType"` - Size int64 `json:"size"` -} - -type Link struct { - Link string `json:"$link"` -} - -// UploadBlob uploads an image to the PDS and returns a blob reference -func (p *Publisher) UploadBlob(session *PDSSession, data []byte, mimeType string) (*BlobRef, error) { - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.uploadBlob", bytes.NewReader(data)) - if err != nil { - return nil, err - } - req.Header.Set("Content-Type", mimeType) - req.Header.Set("Authorization", "Bearer "+session.AccessJwt) - - resp, err := p.httpClient.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - respBody, _ := io.ReadAll(resp.Body) - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("upload blob failed: %s - %s", resp.Status, string(respBody)) - } - - var result struct { - Blob BlobRef `json:"blob"` - } - if err := json.Unmarshal(respBody, &result); err != nil { - return nil, err - } - - return &result.Blob, nil -} - -// UpdateProfile updates the profile for an account -func (p *Publisher) UpdateProfile(session *PDSSession, displayName, description string, avatar *BlobRef) error { - // First, get the current profile to preserve any existing fields - getReq, err := http.NewRequest("GET", - p.pdsHost+"/xrpc/com.atproto.repo.getRecord?repo="+session.DID+"&collection=app.bsky.actor.profile&rkey=self", - nil) - if err != nil { - return err - } - getReq.Header.Set("Authorization", "Bearer "+session.AccessJwt) - - getResp, err := p.httpClient.Do(getReq) - - var existingCID string - profile := map[string]interface{}{ - "$type": "app.bsky.actor.profile", - } - - if err == nil && getResp.StatusCode == http.StatusOK { - defer getResp.Body.Close() - var existing struct { - CID string `json:"cid"` - Value map[string]interface{} `json:"value"` - } - if json.NewDecoder(getResp.Body).Decode(&existing) == nil { - existingCID = existing.CID - profile = existing.Value - } - } else if getResp != nil { - getResp.Body.Close() - } - - // Update fields - if displayName != "" { - profile["displayName"] = displayName - } - if description != "" { - profile["description"] = description - } - if avatar != nil { - profile["avatar"] = avatar - } - - // Put the record - payload := map[string]interface{}{ - "repo": session.DID, - "collection": "app.bsky.actor.profile", - "rkey": "self", - "record": profile, - } - if existingCID != "" { - payload["swapRecord"] = existingCID - } - - body, err := json.Marshal(payload) - if err != nil { - return err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.putRecord", bytes.NewReader(body)) - if err != nil { - return err - } - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", "Bearer "+session.AccessJwt) - - resp, err := p.httpClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - - respBody, _ := io.ReadAll(resp.Body) - - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("update profile failed: %s - %s", resp.Status, string(respBody)) - } - - return nil -} - -// DeleteAllPosts deletes all posts from an account -func (p *Publisher) DeleteAllPosts(session *PDSSession) (int, error) { - deleted := 0 - cursor := "" - - for { - // List records - listURL := fmt.Sprintf("%s/xrpc/com.atproto.repo.listRecords?repo=%s&collection=app.bsky.feed.post&limit=100", - p.pdsHost, session.DID) - if cursor != "" { - listURL += "&cursor=" + url.QueryEscape(cursor) - } - - req, err := http.NewRequest("GET", listURL, nil) - if err != nil { - return deleted, err - } - req.Header.Set("Authorization", "Bearer "+session.AccessJwt) - - resp, err := p.httpClient.Do(req) - if err != nil { - return deleted, err - } - - var result struct { - Records []struct { - URI string `json:"uri"` - } `json:"records"` - Cursor string `json:"cursor"` - } - - respBody, _ := io.ReadAll(resp.Body) - resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return deleted, fmt.Errorf("list records failed: %s - %s", resp.Status, string(respBody)) - } - - if err := json.Unmarshal(respBody, &result); err != nil { - return deleted, err - } - - if len(result.Records) == 0 { - break - } - - // Delete each record - for _, record := range result.Records { - // Extract rkey from URI: at://did:plc:xxx/app.bsky.feed.post/rkey - parts := strings.Split(record.URI, "/") - if len(parts) < 2 { - continue - } - rkey := parts[len(parts)-1] - - if err := p.DeleteRecord(session, "app.bsky.feed.post", rkey); err != nil { - // Continue deleting other records even if one fails - continue - } - deleted++ - } - - cursor = result.Cursor - if cursor == "" { - break - } - } - - return deleted, nil -} - -// DeleteRecord deletes a single record from an account -func (p *Publisher) DeleteRecord(session *PDSSession, collection, rkey string) error { - payload := map[string]interface{}{ - "repo": session.DID, - "collection": collection, - "rkey": rkey, - } - - body, err := json.Marshal(payload) - if err != nil { - return err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.deleteRecord", bytes.NewReader(body)) - if err != nil { - return err - } - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", "Bearer "+session.AccessJwt) - - resp, err := p.httpClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - respBody, _ := io.ReadAll(resp.Body) - return fmt.Errorf("delete record failed: %s - %s", resp.Status, string(respBody)) - } - - return nil -} - -// DeleteAccount deletes an account using PDS admin API -func (p *Publisher) DeleteAccount(adminPassword, did string) error { - payload := map[string]interface{}{ - "did": did, - } - - body, err := json.Marshal(payload) - if err != nil { - return err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.deleteAccount", bytes.NewReader(body)) - if err != nil { - return err - } - req.Header.Set("Content-Type", "application/json") - req.SetBasicAuth("admin", adminPassword) - - resp, err := p.httpClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - respBody, _ := io.ReadAll(resp.Body) - return fmt.Errorf("delete account failed: %s - %s", resp.Status, string(respBody)) - } - - return nil -} - -// TakedownAccount applies a takedown to an account (hides content, preserves data) -func (p *Publisher) TakedownAccount(adminPassword, did, reason string) error { - payload := map[string]interface{}{ - "subject": map[string]interface{}{ - "$type": "com.atproto.admin.defs#repoRef", - "did": did, - }, - "takedown": map[string]interface{}{ - "applied": true, - "ref": reason, - }, - } - - body, err := json.Marshal(payload) - if err != nil { - return err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body)) - if err != nil { - return err - } - req.Header.Set("Content-Type", "application/json") - req.SetBasicAuth("admin", adminPassword) - - resp, err := p.httpClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - respBody, _ := io.ReadAll(resp.Body) - return fmt.Errorf("takedown account failed: %s - %s", resp.Status, string(respBody)) - } - - return nil -} - -// RestoreAccount removes a takedown from an account (makes content visible again) -func (p *Publisher) RestoreAccount(adminPassword, did string) error { - payload := map[string]interface{}{ - "subject": map[string]interface{}{ - "$type": "com.atproto.admin.defs#repoRef", - "did": did, - }, - "takedown": map[string]interface{}{ - "applied": false, - }, - } - - body, err := json.Marshal(payload) - if err != nil { - return err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body)) - if err != nil { - return err - } - req.Header.Set("Content-Type", "application/json") - req.SetBasicAuth("admin", adminPassword) - - resp, err := p.httpClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - respBody, _ := io.ReadAll(resp.Body) - return fmt.Errorf("restore account failed: %s - %s", resp.Status, string(respBody)) - } - - return nil -} diff --git a/publisher.go b/publisher.go deleted file mode 100644 index 2678409..0000000 --- a/publisher.go +++ /dev/null @@ -1,439 +0,0 @@ -package main - -import ( - "bytes" - "crypto/sha256" - "encoding/json" - "fmt" - "io" - "net/http" - "regexp" - "strings" - "time" -) - -// Publisher handles posting items to AT Protocol PDS -type Publisher struct { - pdsHost string - httpClient *http.Client -} - -// PDSSession holds authentication info for a PDS account -type PDSSession struct { - DID string `json:"did"` - Handle string `json:"handle"` - AccessJwt string `json:"accessJwt"` - RefreshJwt string `json:"refreshJwt"` -} - -// BskyPost represents an app.bsky.feed.post record -type BskyPost struct { - Type string `json:"$type"` - Text string `json:"text"` - CreatedAt string `json:"createdAt"` - Facets []BskyFacet `json:"facets,omitempty"` - Embed *BskyEmbed `json:"embed,omitempty"` -} - -type BskyFacet struct { - Index BskyByteSlice `json:"index"` - Features []BskyFeature `json:"features"` -} - -type BskyByteSlice struct { - ByteStart int `json:"byteStart"` - ByteEnd int `json:"byteEnd"` -} - -type BskyFeature struct { - Type string `json:"$type"` - URI string `json:"uri,omitempty"` - Tag string `json:"tag,omitempty"` // For hashtag facets -} - -type BskyEmbed struct { - Type string `json:"$type"` - External *BskyExternal `json:"external,omitempty"` - Images []BskyImage `json:"images,omitempty"` -} - -type BskyExternal struct { - URI string `json:"uri"` - Title string `json:"title"` - Description string `json:"description"` - Thumb *BlobRef `json:"thumb,omitempty"` -} - -type BskyImage struct { - Alt string `json:"alt"` - Image *BlobRef `json:"image"` - AspectRatio *BskyAspectRatio `json:"aspectRatio,omitempty"` -} - -type BskyAspectRatio struct { - Width int `json:"width"` - Height int `json:"height"` -} - -// NewPublisher creates a new Publisher instance -func NewPublisher(pdsHost string) *Publisher { - return &Publisher{ - pdsHost: pdsHost, - httpClient: &http.Client{ - Timeout: 30 * time.Second, - }, - } -} - -// TID alphabet for base32-sortable encoding -const tidAlphabet = "234567abcdefghijklmnopqrstuvwxyz" - -// GenerateRkey creates a deterministic TID-format rkey from a GUID and timestamp -// TIDs are required by Bluesky relay for indexing - custom rkeys don't sync -// Format: 13 chars base32-sortable, 53 bits timestamp + 10 bits clock ID -func GenerateRkey(guid string, timestamp time.Time) string { - if guid == "" { - return "" - } - - // Get microseconds since Unix epoch (53 bits) - microsInt := timestamp.UnixMicro() - if microsInt < 0 { - microsInt = 0 - } - // Convert to uint64 and mask to 53 bits - micros := uint64(microsInt) & ((1 << 53) - 1) - - // Generate deterministic 10-bit clock ID from GUID hash - // Use XOR of multiple hash bytes to reduce collisions - hash := sha256.Sum256([]byte(guid)) - // XOR bytes 0-3 together, then 4-7, combine for more entropy - h1 := uint64(hash[0]) ^ uint64(hash[2]) ^ uint64(hash[4]) ^ uint64(hash[6]) - h2 := uint64(hash[1]) ^ uint64(hash[3]) ^ uint64(hash[5]) ^ uint64(hash[7]) - clockID := (h1 << 2) | (h2 >> 6) - clockID = clockID & ((1 << 10) - 1) // 10 bits = 0-1023 - - // Combine: top bit 0, 53 bits timestamp, 10 bits clock ID - tid := (micros << 10) | clockID - - // Encode as base32-sortable (13 characters) - var result [13]byte - for i := 12; i >= 0; i-- { - result[i] = tidAlphabet[tid&0x1f] - tid >>= 5 - } - - return string(result[:]) -} - -// extractURLs finds all URLs in a string -func extractURLs(text string) []string { - // Match http:// or https:// URLs - urlRegex := regexp.MustCompile(`https?://[^\s<>"'\)]+`) - matches := urlRegex.FindAllString(text, -1) - - // Clean up trailing punctuation - var urls []string - for _, u := range matches { - // Remove trailing punctuation that's likely not part of the URL - u = strings.TrimRight(u, ".,;:!?") - if u != "" { - urls = append(urls, u) - } - } - return urls -} - -// toCamelCaseTag converts a tag string to camelCase hashtag format -// e.g., "Lagos News" -> "lagosNews", "AI" -> "ai", "machine learning" -> "machineLearning" -func toCamelCaseTag(tag string) string { - tag = strings.TrimSpace(tag) - if tag == "" { - return "" - } - - // Remove any # prefix if present - tag = strings.TrimPrefix(tag, "#") - - // Split on spaces and other separators - words := strings.FieldsFunc(tag, func(r rune) bool { - return r == ' ' || r == '-' || r == '_' - }) - - if len(words) == 0 { - return "" - } - - // If single word, return lowercased - if len(words) == 1 { - return strings.ToLower(words[0]) - } - - // Multiple words: lowercase first word, capitalize first letter of subsequent words - var result strings.Builder - for i, word := range words { - if word == "" { - continue - } - runes := []rune(word) - if len(runes) > 0 { - if i == 0 || result.Len() == 0 { - // First word: all lowercase - result.WriteString(strings.ToLower(word)) - } else { - // Subsequent words: capitalize first letter, lowercase rest - result.WriteString(strings.ToUpper(string(runes[0]))) - if len(runes) > 1 { - result.WriteString(strings.ToLower(string(runes[1:]))) - } - } - } - } - return result.String() -} - -// formatTagsForPost converts item tags to hashtag text and facets -// Returns the hashtag line (e.g., "#AI #MachineLearning #News") and facets -func formatTagsForPost(tags []string, textOffset int) (string, []BskyFacet) { - if len(tags) == 0 { - return "", nil - } - - // Dedupe and convert tags - seen := make(map[string]bool) - var hashtags []string - for _, tag := range tags { - camel := toCamelCaseTag(tag) - if camel == "" || seen[strings.ToLower(camel)] { - continue - } - seen[strings.ToLower(camel)] = true - hashtags = append(hashtags, camel) - } - - if len(hashtags) == 0 { - return "", nil - } - - // Limit to 5 tags to keep post compact - if len(hashtags) > 5 { - hashtags = hashtags[:5] - } - - // Build the hashtag line and facets - var line strings.Builder - var facets []BskyFacet - currentOffset := textOffset - - for i, ht := range hashtags { - if i > 0 { - line.WriteString(" ") - currentOffset++ - } - - hashtagText := "#" + ht - byteStart := currentOffset - byteEnd := currentOffset + len(hashtagText) - - line.WriteString(hashtagText) - - facets = append(facets, BskyFacet{ - Index: BskyByteSlice{ - ByteStart: byteStart, - ByteEnd: byteEnd, - }, - Features: []BskyFeature{{ - Type: "app.bsky.richtext.facet#tag", - Tag: ht, - }}, - }) - - currentOffset = byteEnd - } - - return line.String(), facets -} - -// PublishItem posts a feed item to the PDS -// Returns the AT URI of the created record, or error -func (p *Publisher) PublishItem(session *PDSSession, item *Item) (string, error) { - if item.GUID == "" && item.Link == "" { - return "", fmt.Errorf("item has no GUID or link, cannot publish") - } - - // Collect URLs: main link + HN comments link (if applicable) - // Limit to 2 URLs max to stay under 300 grapheme limit - urlSet := make(map[string]bool) - var allURLs []string - - // Add main link first - if item.Link != "" { - urlSet[item.Link] = true - allURLs = append(allURLs, item.Link) - } - - // For HN feeds, add comments link from description (looks like "https://news.ycombinator.com/item?id=...") - descURLs := extractURLs(item.Description) - for _, u := range descURLs { - if strings.Contains(u, "news.ycombinator.com/item") && !urlSet[u] { - urlSet[u] = true - allURLs = append(allURLs, u) - break // Only add one comments link - } - } - - // Add enclosure URL for podcasts/media (audio/video) if we have room - // Bluesky has 300 char limit, so only add if total URLs + minimal title fits - if len(allURLs) < 2 && item.Enclosure != nil && item.Enclosure.URL != "" { - encType := strings.ToLower(item.Enclosure.Type) - if strings.HasPrefix(encType, "audio/") || strings.HasPrefix(encType, "video/") { - if !urlSet[item.Enclosure.URL] { - // Calculate if enclosure would fit (need ~60 chars for title + separators) - currentURLLen := 0 - for _, u := range allURLs { - currentURLLen += len(u) + 2 // +2 for \n\n - } - enclosureLen := len(item.Enclosure.URL) + 2 - if currentURLLen+enclosureLen < 235 { // Leave 60 chars for title - urlSet[item.Enclosure.URL] = true - allURLs = append(allURLs, item.Enclosure.URL) - } - } - } - } - - // Get the primary URL (article link) - primaryURL := "" - if len(allURLs) > 0 { - primaryURL = allURLs[0] - } - - // Use original publication date if available, otherwise current time - createdAt := time.Now() - if !item.PubDate.IsZero() { - createdAt = item.PubDate - } - - // Build post text with hashtags if available - // The link card shows the title, description, and thumbnail - // Clicking the card doesn't trigger the "leaving Bluesky" warning - postText := "" - var facets []BskyFacet - - if len(item.Tags) > 0 { - tagLine, tagFacets := formatTagsForPost(item.Tags, 0) - postText = tagLine - facets = tagFacets - } - - post := BskyPost{ - Type: "app.bsky.feed.post", - Text: postText, - CreatedAt: createdAt.Format(time.RFC3339), - Facets: facets, - } - - // Always use external embed (link card) - clicking the card doesn't show "leaving" warning - if primaryURL != "" { - external := &BskyExternal{ - URI: primaryURL, - Title: item.Title, - Description: truncate(stripHTML(item.Description), 300), - } - - // Add thumbnail from first image if available - if len(item.ImageURLs) > 0 { - if thumb := p.fetchAndUploadImage(session, item.ImageURLs[0]); thumb != nil { - external.Thumb = thumb - } - } - - post.Embed = &BskyEmbed{ - Type: "app.bsky.embed.external", - External: external, - } - } - - // Use GUID + discoveredAt for deterministic rkey - // This allows regenerating a new rkey by updating discoveredAt if needed - guidForRkey := item.GUID - if guidForRkey == "" { - guidForRkey = item.Link - } - // Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt - rkeyTime := item.PubDate - if rkeyTime.IsZero() { - rkeyTime = item.DiscoveredAt - } - rkey := GenerateRkey(guidForRkey, rkeyTime) - - // Create the record with deterministic rkey - payload := map[string]interface{}{ - "repo": session.DID, - "collection": "app.bsky.feed.post", - "rkey": rkey, - "record": post, - } - - body, err := json.Marshal(payload) - if err != nil { - return "", err - } - - req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body)) - if err != nil { - return "", err - } - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", "Bearer "+session.AccessJwt) - - resp, err := p.httpClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - respBody, _ := io.ReadAll(resp.Body) - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("create record failed: %s - %s", resp.Status, string(respBody)) - } - - var result struct { - URI string `json:"uri"` - CID string `json:"cid"` - } - if err := json.Unmarshal(respBody, &result); err != nil { - return "", err - } - - return result.URI, nil -} - -func truncate(s string, maxLen int) string { - if len(s) <= maxLen { - return s - } - return s[:maxLen-3] + "..." -} - -// stripHTML removes HTML tags from a string -func stripHTML(s string) string { - // Remove HTML tags - tagRegex := regexp.MustCompile(`<[^>]*>`) - s = tagRegex.ReplaceAllString(s, "") - - // Decode common HTML entities - s = strings.ReplaceAll(s, "&", "&") - s = strings.ReplaceAll(s, "<", "<") - s = strings.ReplaceAll(s, ">", ">") - s = strings.ReplaceAll(s, """, "\"") - s = strings.ReplaceAll(s, "'", "'") - s = strings.ReplaceAll(s, " ", " ") - - // Collapse whitespace - spaceRegex := regexp.MustCompile(`\s+`) - s = spaceRegex.ReplaceAllString(s, " ") - - return strings.TrimSpace(s) -}