Remove publishing code - now handled by publish service

Publishing functionality has been moved to the standalone publish service.
Removed:
- publisher.go, pds_auth.go, pds_records.go, image.go, handle.go
- StartPublishLoop and related functions from crawler.go
- Publish loop invocation from main.go

Updated CLAUDE.md to reflect the new architecture.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-02-02 15:40:49 -05:00
parent 7b50f5c008
commit 6eaa39f9db
9 changed files with 3 additions and 1991 deletions
+3 -3
View File
@@ -47,13 +47,14 @@ Multi-file Go application that crawls websites for RSS/Atom feeds, stores them i
### Concurrent Loops (main.go)
The application runs six independent goroutine loops:
The application runs five independent goroutine loops:
- **Import loop** - Reads `vertices.txt.gz` and inserts domains into DB in batches of 100 (status='pass')
- **Crawl loop** - Worker pool crawls approved domains for feed discovery
- **Feed check loop** - Worker pool re-checks known feeds for updates (conditional HTTP)
- **Stats loop** - Updates cached dashboard statistics every minute
- **Cleanup loop** - Removes items older than 12 months (weekly)
- **Publish loop** - Autopublishes items from approved feeds to AT Protocol PDS
Note: Publishing is handled by the separate `publish` service.
### File Structure
@@ -67,7 +68,6 @@ The application runs six independent goroutine loops:
| `util.go` | URL normalization, host utilities, TLD extraction |
| `db.go` | PostgreSQL schema (domains, feeds, items tables with tsvector FTS) |
| `dashboard.go` | HTTP server, JSON APIs, HTML template |
| `publisher.go` | AT Protocol PDS integration for posting items |
| `oauth.go` | OAuth 2.0 client wrapper for AT Protocol authentication |
| `oauth_session.go` | Session management with AES-256-GCM encrypted cookies |
| `oauth_middleware.go` | RequireAuth middleware for protecting routes |
-361
View File
@@ -3,12 +3,10 @@ package main
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"os"
"strings"
"sync"
"sync/atomic"
@@ -147,365 +145,6 @@ func (c *Crawler) StartMaintenanceLoop() {
}
}
// StartPublishLoop automatically publishes unpublished items for approved feeds
// Grabs up to 50 items sorted by discovered_at, publishes one per second, then reloops
func (c *Crawler) StartPublishLoop() {
// Load PDS credentials from environment or pds.env file
pdsHost := os.Getenv("PDS_HOST")
pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD")
if pdsHost == "" || pdsAdminPassword == "" {
if data, err := os.ReadFile("pds.env"); err == nil {
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "#") || line == "" {
continue
}
parts := strings.SplitN(line, "=", 2)
if len(parts) == 2 {
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
switch key {
case "PDS_HOST":
pdsHost = value
case "PDS_ADMIN_PASSWORD":
pdsAdminPassword = value
}
}
}
}
}
if pdsHost == "" || pdsAdminPassword == "" {
fmt.Println("Publish loop: PDS credentials not configured, skipping")
return
}
fmt.Printf("Publish loop: starting with PDS %s\n", pdsHost)
feedPassword := "feed1440!"
// Cache sessions per account
sessions := make(map[string]*PDSSession)
publisher := NewPublisher(pdsHost)
// Refresh existing account profiles on startup
c.RefreshAllProfiles(publisher, feedPassword)
for {
if c.IsShuttingDown() {
return
}
// Get up to 50 unpublished items from approved feeds, sorted by discovered_at ASC
items, err := c.GetAllUnpublishedItems(50)
if err != nil {
fmt.Printf("Publish loop error: %v\n", err)
time.Sleep(1 * time.Second)
continue
}
if len(items) == 0 {
time.Sleep(1 * time.Second)
continue
}
// Publish one item per second
for _, item := range items {
if c.IsShuttingDown() {
return
}
// Get or create session for this feed's account
account := c.getAccountForFeed(item.FeedURL)
if account == "" {
time.Sleep(1 * time.Second)
continue
}
session, ok := sessions[account]
if !ok {
// Try to log in
session, err = publisher.CreateSession(account, feedPassword)
if err != nil {
// Account might not exist - try to create it
inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1)
if err != nil {
fmt.Printf("Publish: failed to create invite for %s: %v\n", account, err)
time.Sleep(1 * time.Second)
continue
}
email := account + "@1440.news"
session, err = publisher.CreateAccount(account, email, feedPassword, inviteCode)
if err != nil {
fmt.Printf("Publish: failed to create account %s: %v\n", account, err)
time.Sleep(1 * time.Second)
continue
}
fmt.Printf("Publish: created account %s\n", account)
c.db.Exec("UPDATE feeds SET publish_account = $1 WHERE url = $2", account, item.FeedURL)
// Set up profile for new account
feedInfo := c.getFeedInfo(item.FeedURL)
if feedInfo != nil {
displayName := feedInfo.Title
if displayName == "" {
displayName = account
}
// Build description with feed URL (strip HTML tags)
description := stripHTML(feedInfo.Description)
if description == "" {
description = "News feed via 1440.news"
}
// Add feed URL as first line of description
feedURLFull := "https://" + item.FeedURL
description = feedURLFull + "\n\n" + description
// Truncate if needed
if len(displayName) > 64 {
displayName = displayName[:61] + "..."
}
if len(description) > 256 {
description = description[:253] + "..."
}
// Fetch and upload favicon as avatar
var avatar *BlobRef
faviconSource := feedInfo.SiteURL
if faviconSource == "" {
// Fallback to deriving from feed URL
faviconSource = feedInfo.SourceHost
}
if faviconSource != "" {
faviconURL := publisher.FetchFavicon(faviconSource)
if faviconURL != "" {
avatar = publisher.fetchAndUploadImage(session, faviconURL)
}
}
if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil {
fmt.Printf("Publish: failed to set profile for %s: %v\n", account, err)
} else {
fmt.Printf("Publish: set profile for %s\n", account)
}
// Have directory account follow this new account
if err := publisher.FollowAsDirectory(session.DID); err != nil {
fmt.Printf("Publish: directory follow failed for %s: %v\n", account, err)
} else {
fmt.Printf("Publish: directory now following %s\n", account)
}
}
}
sessions[account] = session
}
// Shorten URLs before publishing
itemToPublish := item
if item.Link != "" {
if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil {
fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL)
itemToPublish.Link = shortURL
} else {
fmt.Printf("Publish: short URL failed for %s: %v\n", item.Link[:min(40, len(item.Link))], err)
}
}
// Publish the item
uri, err := publisher.PublishItem(session, &itemToPublish)
if err != nil {
fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err)
// Clear session cache on auth errors
if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") {
delete(sessions, account)
}
} else {
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account)
}
time.Sleep(1 * time.Second)
}
time.Sleep(1 * time.Second)
}
}
// getAccountForFeed returns the publish account for a feed URL
func (c *Crawler) getAccountForFeed(feedURL string) string {
var account *string
err := c.db.QueryRow(`
SELECT publish_account FROM feeds
WHERE url = $1 AND publish_status = 'pass' AND status = 'pass'
`, feedURL).Scan(&account)
if err != nil || account == nil || *account == "" {
// Derive handle from feed URL
return DeriveHandleFromFeed(feedURL)
}
return *account
}
// FeedInfo holds basic feed metadata for profile setup
type FeedInfo struct {
Title string
Description string
SiteURL string
SourceHost string
}
// getFeedInfo returns feed metadata for profile setup
func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo {
var title, description, siteURL, sourceHost *string
err := c.db.QueryRow(`
SELECT title, description, site_url, domain_host as source_host FROM feeds WHERE url = $1
`, feedURL).Scan(&title, &description, &siteURL, &sourceHost)
if err != nil {
return nil
}
return &FeedInfo{
Title: StringValue(title),
Description: StringValue(description),
SiteURL: StringValue(siteURL),
SourceHost: StringValue(sourceHost),
}
}
// RefreshAllProfiles updates profiles for all existing accounts with feed URLs
func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) {
rows, err := c.db.Query(`
SELECT url, title, description, site_url, domain_host as source_host, publish_account
FROM feeds
WHERE publish_account IS NOT NULL AND publish_account <> ''
`)
if err != nil {
fmt.Printf("RefreshProfiles: query error: %v\n", err)
return
}
defer rows.Close()
for rows.Next() {
var feedURL, account string
var title, description, siteURL, sourceHost *string
if err := rows.Scan(&feedURL, &title, &description, &siteURL, &sourceHost, &account); err != nil {
continue
}
// Login to account
session, err := publisher.CreateSession(account, feedPassword)
if err != nil {
fmt.Printf("RefreshProfiles: login failed for %s: %v\n", account, err)
continue
}
// Build profile
displayName := StringValue(title)
if displayName == "" {
displayName = account
}
desc := stripHTML(StringValue(description))
if desc == "" {
desc = "News feed via 1440.news"
}
// Add feed URL as first line
feedURLFull := "https://" + feedURL
desc = feedURLFull + "\n\n" + desc
// Truncate if needed
if len(displayName) > 64 {
displayName = displayName[:61] + "..."
}
if len(desc) > 256 {
desc = desc[:253] + "..."
}
// Fetch and upload favicon as avatar
var avatar *BlobRef
faviconSource := StringValue(siteURL)
if faviconSource == "" {
// Fallback to source host
faviconSource = StringValue(sourceHost)
}
if faviconSource != "" {
faviconURL := publisher.FetchFavicon(faviconSource)
if faviconURL != "" {
avatar = publisher.fetchAndUploadImage(session, faviconURL)
}
}
if err := publisher.UpdateProfile(session, displayName, desc, avatar); err != nil {
fmt.Printf("RefreshProfiles: update failed for %s: %v\n", account, err)
} else {
fmt.Printf("RefreshProfiles: updated %s\n", account)
}
}
}
// GetAllUnpublishedItems returns unpublished items from all approved feeds
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
rows, err := c.db.Query(`
SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content,
i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags,
i.enclosure_url, i.enclosure_type, i.enclosure_length
FROM items i
JOIN feeds f ON i.feed_url = f.url
WHERE f.publish_status = 'pass'
AND f.status = 'pass'
AND i.published_at IS NULL
ORDER BY i.discovered_at ASC
LIMIT $1
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var items []Item
for rows.Next() {
var item Item
var guid, title, link, description, content, author, imageURLsJSON, tagsJSON *string
var pubDate, discoveredAt *time.Time
var enclosureURL, enclosureType *string
var enclosureLength *int64
err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description,
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON,
&enclosureURL, &enclosureType, &enclosureLength)
if err != nil {
continue
}
item.GUID = StringValue(guid)
item.Title = StringValue(title)
item.Link = StringValue(link)
item.Description = StringValue(description)
item.Content = StringValue(content)
item.Author = StringValue(author)
item.PubDate = TimeValue(pubDate)
item.DiscoveredAt = TimeValue(discoveredAt)
// Parse image URLs from JSON array
if imageURLsJSON != nil && *imageURLsJSON != "" {
json.Unmarshal([]byte(*imageURLsJSON), &item.ImageURLs)
}
// Parse tags from JSON array
if tagsJSON != nil && *tagsJSON != "" {
json.Unmarshal([]byte(*tagsJSON), &item.Tags)
}
// Parse enclosure
if enclosureURL != nil && *enclosureURL != "" {
item.Enclosure = &Enclosure{
URL: *enclosureURL,
Type: StringValue(enclosureType),
}
if enclosureLength != nil {
item.Enclosure.Length = *enclosureLength
}
}
items = append(items, item)
}
return items, nil
}
// dnsResolver uses local caching DNS (infra-dns) with fallback to system
var dnsResolver = &net.Resolver{
PreferGo: true,
-6
View File
@@ -452,15 +452,9 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
}
// SetPublishStatus sets the publish status for a feed ('hold', 'pass', 'skip')
// If status is 'pass', the account handle is also set (auto-derived if empty)
func (c *Crawler) SetPublishStatus(feedURL, status, account string) error {
feedURL = normalizeURL(feedURL)
// Auto-derive account if passing and not provided
if status == "pass" && account == "" {
account = DeriveHandleFromFeed(feedURL)
}
_, err := c.db.Exec(`
UPDATE feeds SET publish_status = $1, publish_account = $2 WHERE url = $3
`, status, NullableString(account), feedURL)
-262
View File
@@ -1,262 +0,0 @@
package main
import (
"net/url"
"regexp"
"strings"
)
// DeriveHandleFromFeed generates an AT Protocol handle from a feed URL
// Format: {domain}-{category}.1440.news
// AT Protocol allows up to 63 characters per label, but the PDS
// restricts the first segment to 18 characters for local handles.
// Examples:
//
// feeds.bbci.co.uk/news/technology/rss.xml → bbc-technology.1440.news
// news.ycombinator.com/rss → ycombinator.1440.news
func DeriveHandleFromFeed(feedURL string) string {
const maxSubdomainLen = 18 // PDS limit for first segment
// Ensure we have a scheme for parsing
if !strings.Contains(feedURL, "://") {
feedURL = "https://" + feedURL
}
u, err := url.Parse(feedURL)
if err != nil {
return ""
}
hostname := strings.ToLower(u.Hostname())
path := strings.ToLower(u.Path)
// Remove common feed suffixes/extensions
suffixesToRemove := []string{".xml", ".rss", ".atom", ".json", "/rss", "/feed", "/atom", "/index"}
for _, suffix := range suffixesToRemove {
path = strings.TrimSuffix(path, suffix)
}
// Split path into segments and filter noise
segments := strings.Split(strings.Trim(path, "/"), "/")
skipPathWords := map[string]bool{
"rss": true, "feed": true, "feeds": true, "atom": true,
"xml": true, "default": true, "index": true, "services": true,
"nyt": true,
}
var pathParts []string
for _, seg := range segments {
seg = cleanHandleSegment(seg)
if seg != "" && !skipPathWords[seg] {
pathParts = append(pathParts, seg)
}
}
// Split hostname and extract the meaningful domain
hostParts := strings.Split(hostname, ".")
// Two-part TLDs to handle specially
twoPartTLDs := map[string]bool{
"co.uk": true, "com.au": true, "co.nz": true, "co.jp": true,
"com.br": true, "co.in": true, "org.uk": true, "ac.uk": true,
}
// Check for two-part TLD
if len(hostParts) >= 2 {
possibleTwoPartTLD := hostParts[len(hostParts)-2] + "." + hostParts[len(hostParts)-1]
if twoPartTLDs[possibleTwoPartTLD] {
hostParts = hostParts[:len(hostParts)-2]
} else {
// Single TLD - remove it
singleTLDs := map[string]bool{
"com": true, "org": true, "net": true, "io": true,
"edu": true, "gov": true, "uk": true, "de": true, "fr": true,
}
if singleTLDs[hostParts[len(hostParts)-1]] {
hostParts = hostParts[:len(hostParts)-1]
}
}
}
// Skip noise subdomains
skipHostWords := map[string]bool{
"www": true, "feeds": true, "rss": true, "feed": true,
"api": true, "cdn": true, "static": true, "news": true,
}
var meaningfulHostParts []string
for _, part := range hostParts {
if !skipHostWords[part] && part != "" {
meaningfulHostParts = append(meaningfulHostParts, part)
}
}
// Get the main domain (e.g., "bbci", "ycombinator", "nytimes")
var mainDomain string
if len(meaningfulHostParts) > 0 {
mainDomain = meaningfulHostParts[len(meaningfulHostParts)-1]
} else if len(hostParts) > 0 {
mainDomain = hostParts[len(hostParts)-1]
}
// Special case: "bbci" should become "bbc"
if mainDomain == "bbci" {
mainDomain = "bbc"
}
// Abbreviations for long category names to fit 18-char limit
categoryAbbrevs := map[string]string{
"science-and-environment": "sci-env",
"entertainment-and-arts": "ent-arts",
"science-environment": "sci-env",
"entertainment-arts": "ent-arts",
"technology": "tech",
"business": "biz",
"international": "intl",
"environment": "env",
"entertainment": "ent",
"politics": "pol",
}
// Build subdomain: domain + category (from path)
var subdomain string
if len(pathParts) > 0 {
// Use last meaningful path part as category (e.g., "technology" from /news/technology/)
category := pathParts[len(pathParts)-1]
// Skip generic categories
if category == "news" && len(pathParts) == 1 {
subdomain = mainDomain
} else {
// Try to abbreviate if the full subdomain would be too long
fullSubdomain := mainDomain + "-" + category
if len(fullSubdomain) > maxSubdomainLen {
if abbrev, ok := categoryAbbrevs[category]; ok {
category = abbrev
}
}
subdomain = mainDomain + "-" + category
}
} else {
subdomain = mainDomain
}
// If still too long, just use main hostname
if len(subdomain) > maxSubdomainLen {
subdomain = mainDomain
}
// Final safety: truncate if still too long
if len(subdomain) > maxSubdomainLen {
subdomain = subdomain[:maxSubdomainLen]
}
subdomain = strings.Trim(subdomain, "-")
// Collapse multiple hyphens
for strings.Contains(subdomain, "--") {
subdomain = strings.ReplaceAll(subdomain, "--", "-")
}
return subdomain + ".1440.news"
}
// cleanHandleSegment sanitizes a string for use in an AT Protocol handle segment
// Handle segments must be alphanumeric with hyphens, no leading/trailing hyphens
func cleanHandleSegment(s string) string {
// Remove file extensions
if idx := strings.LastIndex(s, "."); idx > 0 {
s = s[:idx]
}
// Convert to lowercase
s = strings.ToLower(s)
// Strip common feed prefixes/suffixes from the segment itself
// e.g., "showrss" → "show", "rssworld" → "world"
feedAffixes := []string{"rss", "feed", "atom", "xml"}
for _, affix := range feedAffixes {
// Strip suffix (e.g., "showrss" → "show")
if strings.HasSuffix(s, affix) && len(s) > len(affix) {
s = strings.TrimSuffix(s, affix)
break
}
// Strip prefix (e.g., "rssworld" → "world")
if strings.HasPrefix(s, affix) && len(s) > len(affix) {
s = strings.TrimPrefix(s, affix)
break
}
}
// Replace underscores and other separators with hyphens
s = strings.ReplaceAll(s, "_", "-")
s = strings.ReplaceAll(s, " ", "-")
// Remove any characters that aren't alphanumeric or hyphens
reg := regexp.MustCompile(`[^a-z0-9-]`)
s = reg.ReplaceAllString(s, "")
// Collapse multiple hyphens
for strings.Contains(s, "--") {
s = strings.ReplaceAll(s, "--", "-")
}
// Trim leading/trailing hyphens
s = strings.Trim(s, "-")
return s
}
// SplitHandle extracts the path prefix and hostname from a derived handle
// Example: show.news.ycombinator.com.1440.news → ("show", "news.ycombinator.com")
func SplitHandle(handle string) (prefix string, hostname string) {
// Remove .1440.news suffix
handle = strings.TrimSuffix(handle, ".1440.news")
parts := strings.Split(handle, ".")
// Try to find where hostname starts by looking for valid hostname patterns
if len(parts) >= 2 {
for i := 0; i < len(parts)-1; i++ {
remaining := strings.Join(parts[i:], ".")
if looksLikeHostname(remaining) {
if i > 0 {
prefix = strings.Join(parts[:i], ".")
}
hostname = remaining
return
}
}
}
// Fallback: no prefix, entire thing is hostname
hostname = handle
return "", hostname
}
func isLikelyTLDPart(s string) bool {
tlds := map[string]bool{
"com": true, "org": true, "net": true, "edu": true, "gov": true,
"io": true, "co": true, "uk": true, "de": true, "fr": true,
"jp": true, "au": true, "ca": true, "nl": true, "se": true,
"news": true, "blog": true, "tech": true, "dev": true,
}
return tlds[s]
}
func isTwoPartTLD(first, second string) bool {
twoPartTLDs := map[string]bool{
"co.uk": true, "com.au": true, "co.jp": true, "co.nz": true,
"org.uk": true, "net.au": true, "com.br": true,
}
return twoPartTLDs[first+"."+second]
}
func looksLikeHostname(s string) bool {
// A hostname typically has at least one dot and ends with a TLD-like part
parts := strings.Split(s, ".")
if len(parts) < 2 {
return false
}
lastPart := parts[len(parts)-1]
return isLikelyTLDPart(lastPart)
}
-381
View File
@@ -1,381 +0,0 @@
package main
import (
"bytes"
"fmt"
"image"
_ "image/gif"
"image/jpeg"
_ "image/png"
"io"
"net/http"
"net/url"
"strings"
"time"
"go.deanishe.net/favicon"
"golang.org/x/image/draw"
_ "golang.org/x/image/webp"
)
// ImageUploadResult contains the uploaded blob and image dimensions
type ImageUploadResult struct {
Blob *BlobRef
Width int
Height int
}
// uploadImages fetches and uploads up to 4 images, returning BskyImage structs
func (p *Publisher) uploadImages(session *PDSSession, imageURLs []string, altText string) []BskyImage {
var images []BskyImage
maxImages := 4
if len(imageURLs) < maxImages {
maxImages = len(imageURLs)
}
for i := 0; i < maxImages; i++ {
result := p.fetchAndUploadImageWithDimensions(session, imageURLs[i])
if result != nil && result.Blob != nil {
img := BskyImage{
Alt: altText,
Image: result.Blob,
}
if result.Width > 0 && result.Height > 0 {
img.AspectRatio = &BskyAspectRatio{
Width: result.Width,
Height: result.Height,
}
}
images = append(images, img)
}
}
return images
}
// FetchFavicon tries to get a favicon URL for a site
// Uses go.deanishe.net/favicon library which parses HTML, manifests, and checks common paths
// Returns the favicon URL or empty string if not found
func (p *Publisher) FetchFavicon(siteURL string) string {
if siteURL == "" {
return ""
}
// Ensure URL has scheme
if !strings.Contains(siteURL, "://") {
siteURL = "https://" + siteURL
}
u, err := url.Parse(siteURL)
if err != nil {
return ""
}
// Create finder with custom HTTP client
// Note: Don't use IgnoreNoSize as it filters out valid favicon.ico files that don't have size metadata
finder := favicon.New(
favicon.WithClient(p.httpClient),
)
// Find icons - library checks HTML <link> tags, manifests, OG images, common paths
icons, err := finder.Find(siteURL)
if err == nil && len(icons) > 0 {
// Filter and score icons for avatar use
// Prefer: square icons, reasonable size, PNG format, actual favicons over OG images
var bestIcon string
var bestScore int
for _, icon := range icons {
// Skip tiny icons (likely tracking pixels)
if icon.Width > 0 && icon.Width < 32 {
continue
}
// Skip Open Graph images (meant for link previews, usually wide banners)
lowerURL := strings.ToLower(icon.URL)
if strings.Contains(lowerURL, "og-image") || strings.Contains(lowerURL, "og_image") ||
strings.Contains(lowerURL, "opengraph") || strings.Contains(lowerURL, "twitter") {
continue
}
// Skip wide images (aspect ratio > 1.5 means it's a banner, not an icon)
if icon.Width > 0 && icon.Height > 0 {
ratio := float64(icon.Width) / float64(icon.Height)
if ratio > 1.5 || ratio < 0.67 {
continue
}
}
// Score the icon
score := 0
// Prefer actual favicon paths
if strings.Contains(lowerURL, "favicon") || strings.Contains(lowerURL, "icon") ||
strings.Contains(lowerURL, "apple-touch") {
score += 100
}
// Prefer PNG over other formats
if icon.MimeType == "image/png" {
score += 50
} else if icon.MimeType == "image/x-icon" || strings.HasSuffix(lowerURL, ".ico") {
score += 40
} else if icon.MimeType == "image/jpeg" {
score += 10 // JPEG less preferred for icons
}
// Prefer larger icons (but not too large)
if icon.Width >= 64 && icon.Width <= 512 {
score += 30
} else if icon.Width > 0 {
score += 10
}
if score > bestScore {
bestScore = score
bestIcon = icon.URL
}
}
if bestIcon != "" {
return bestIcon
}
// Fall back to first non-OG icon
for _, icon := range icons {
lowerURL := strings.ToLower(icon.URL)
if !strings.Contains(lowerURL, "og-image") && !strings.Contains(lowerURL, "og_image") {
return icon.URL
}
}
}
// Fallback to Google's favicon service (reliable, returns PNG)
return fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
}
func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *BlobRef {
result := p.fetchAndUploadImageWithDimensions(session, imageURL)
if result == nil {
return nil
}
return result.Blob
}
// upgradeImageURL attempts to get a larger version of known CDN image URLs
func upgradeImageURL(imageURL string) string {
// BBC images: /standard/240/ -> /standard/800/
if strings.Contains(imageURL, "ichef.bbci.co.uk") {
imageURL = strings.Replace(imageURL, "/standard/240/", "/standard/800/", 1)
imageURL = strings.Replace(imageURL, "/standard/480/", "/standard/800/", 1)
}
return imageURL
}
func (p *Publisher) fetchAndUploadImageWithDimensions(session *PDSSession, imageURL string) *ImageUploadResult {
// Upgrade image URL to larger size if possible
imageURL = upgradeImageURL(imageURL)
// Fetch the image
resp, err := p.httpClient.Get(imageURL)
if err != nil {
return nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil
}
// Check content type
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
// Try to guess from URL
if strings.HasSuffix(strings.ToLower(imageURL), ".png") {
contentType = "image/png"
} else if strings.HasSuffix(strings.ToLower(imageURL), ".gif") {
contentType = "image/gif"
} else if strings.HasSuffix(strings.ToLower(imageURL), ".webp") {
contentType = "image/webp"
} else {
contentType = "image/jpeg" // Default
}
}
// Only accept image types
if !strings.HasPrefix(contentType, "image/") {
return nil
}
// Read image data (limit to 2MB to allow for resize headroom)
data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
if err != nil || len(data) == 0 {
return nil
}
// Decode image to get dimensions
imgConfig, _, err := image.DecodeConfig(bytes.NewReader(data))
width, height := 1, 1 // Default if decode fails
if err == nil {
width, height = imgConfig.Width, imgConfig.Height
}
// Bluesky blob limit is ~976KB, use 900KB as safe threshold
const maxBlobSize = 900 * 1024
// If image is too large, resize it
if len(data) > maxBlobSize {
// Decode the full image for resizing
img, _, err := image.Decode(bytes.NewReader(data))
if err != nil {
return nil // Can't decode, can't resize
}
// Scale down iteratively until under limit
scaleFactor := 0.9 // Start with 90% and iterate if needed
for attempt := 0; attempt < 5; attempt++ {
newWidth := int(float64(width) * scaleFactor)
newHeight := int(float64(height) * scaleFactor)
// Minimum dimensions
if newWidth < 100 {
newWidth = 100
}
if newHeight < 100 {
newHeight = 100
}
// Create resized image
resized := image.NewRGBA(image.Rect(0, 0, newWidth, newHeight))
draw.CatmullRom.Scale(resized, resized.Bounds(), img, img.Bounds(), draw.Over, nil)
// Encode as JPEG
var buf bytes.Buffer
if err := jpeg.Encode(&buf, resized, &jpeg.Options{Quality: 85}); err != nil {
return nil
}
if buf.Len() <= maxBlobSize {
data = buf.Bytes()
width = newWidth
height = newHeight
contentType = "image/jpeg"
break
}
// Still too large, reduce scale further
scaleFactor *= 0.8
}
// If still too large after 5 attempts, give up
if len(data) > maxBlobSize {
return nil
}
}
// Upload to PDS
blob, err := p.UploadBlob(session, data, contentType)
if err != nil {
return nil
}
return &ImageUploadResult{
Blob: blob,
Width: width,
Height: height,
}
}
// FetchFavicon downloads a favicon/icon from a URL
// Uses go.deanishe.net/favicon library to find the best icon
// Returns the favicon URL or empty string if not found
func FetchFaviconBytes(siteURL string) ([]byte, string, error) {
if !strings.HasPrefix(siteURL, "http") {
siteURL = "https://" + siteURL
}
u, err := url.Parse(siteURL)
if err != nil {
return nil, "", err
}
client := &http.Client{Timeout: 10 * time.Second}
// Use favicon library to find icons
finder := favicon.New(
favicon.WithClient(client),
favicon.IgnoreNoSize,
)
icons, err := finder.Find(siteURL)
if err != nil || len(icons) == 0 {
// Fallback to Google's favicon service
googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
return fetchIconBytes(client, googleURL)
}
// Try icons in order (sorted by size, largest first)
// Prefer PNG/JPEG over ICO
var iconURLs []string
for _, icon := range icons {
if icon.Width > 0 && icon.Width < 32 {
continue // Skip tiny icons
}
if icon.MimeType == "image/png" || icon.MimeType == "image/jpeg" {
iconURLs = append([]string{icon.URL}, iconURLs...) // Prepend PNG/JPEG
} else {
iconURLs = append(iconURLs, icon.URL)
}
}
// If no good icons, use all of them
if len(iconURLs) == 0 {
for _, icon := range icons {
iconURLs = append(iconURLs, icon.URL)
}
}
// Try to download each icon
for _, iconURL := range iconURLs {
data, mimeType, err := fetchIconBytes(client, iconURL)
if err == nil && len(data) > 0 {
return data, mimeType, nil
}
}
// Final fallback to Google
googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
return fetchIconBytes(client, googleURL)
}
// fetchIconBytes downloads an icon and returns its bytes and mime type
func fetchIconBytes(client *http.Client, iconURL string) ([]byte, string, error) {
resp, err := client.Get(iconURL)
if err != nil {
return nil, "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, "", err
}
// Determine mime type
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
if strings.HasSuffix(iconURL, ".png") {
contentType = "image/png"
} else if strings.HasSuffix(iconURL, ".ico") {
contentType = "image/x-icon"
} else {
contentType = "image/png"
}
}
return data, contentType, nil
}
-3
View File
@@ -43,9 +43,6 @@ func main() {
// TLD sync loop (background) - syncs with IANA, marks dead TLDs, adds new ones
go crawler.startTLDSyncLoop()
// Publish loop (background) - autopublishes items for approved feeds
go crawler.StartPublishLoop()
// Domain loop (background) - domain_check + feed_crawl
go crawler.StartDomainLoop()
-187
View File
@@ -1,187 +0,0 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"time"
)
// CreateSession authenticates with the PDS and returns a session
func (p *Publisher) CreateSession(handle, password string) (*PDSSession, error) {
payload := map[string]string{
"identifier": handle,
"password": password,
}
body, err := json.Marshal(payload)
if err != nil {
return nil, err
}
resp, err := p.httpClient.Post(
p.pdsHost+"/xrpc/com.atproto.server.createSession",
"application/json",
bytes.NewReader(body),
)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("auth failed: %s - %s", resp.Status, string(respBody))
}
var session PDSSession
if err := json.NewDecoder(resp.Body).Decode(&session); err != nil {
return nil, err
}
return &session, nil
}
// CreateAccount creates a new account on the PDS
// Requires an invite code if the PDS has invites enabled
func (p *Publisher) CreateAccount(handle, email, password, inviteCode string) (*PDSSession, error) {
payload := map[string]interface{}{
"handle": handle,
"email": email,
"password": password,
}
if inviteCode != "" {
payload["inviteCode"] = inviteCode
}
body, err := json.Marshal(payload)
if err != nil {
return nil, err
}
resp, err := p.httpClient.Post(
p.pdsHost+"/xrpc/com.atproto.server.createAccount",
"application/json",
bytes.NewReader(body),
)
if err != nil {
return nil, err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("create account failed: %s - %s", resp.Status, string(respBody))
}
var session PDSSession
if err := json.Unmarshal(respBody, &session); err != nil {
return nil, err
}
return &session, nil
}
// CreateInviteCode creates an invite code using PDS admin password (Basic Auth)
func (p *Publisher) CreateInviteCode(adminPassword string, useCount int) (string, error) {
payload := map[string]interface{}{
"useCount": useCount,
}
body, err := json.Marshal(payload)
if err != nil {
return "", err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.server.createInviteCode", bytes.NewReader(body))
if err != nil {
return "", err
}
req.Header.Set("Content-Type", "application/json")
// PDS admin APIs use Basic Auth with "admin" as username
req.SetBasicAuth("admin", adminPassword)
resp, err := p.httpClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("create invite failed: %s - %s", resp.Status, string(respBody))
}
var result struct {
Code string `json:"code"`
}
if err := json.Unmarshal(respBody, &result); err != nil {
return "", err
}
return result.Code, nil
}
// FollowAccount creates a follow record from the authenticated session to the target DID
func (p *Publisher) FollowAccount(session *PDSSession, targetDID string) error {
// Create follow record
now := time.Now().UTC().Format(time.RFC3339)
record := map[string]interface{}{
"$type": "app.bsky.graph.follow",
"subject": targetDID,
"createdAt": now,
}
payload := map[string]interface{}{
"repo": session.DID,
"collection": "app.bsky.graph.follow",
"record": record,
}
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("follow failed: %s - %s", resp.Status, string(respBody))
}
return nil
}
// FollowAsDirectory logs in as the directory account and follows the target DID
func (p *Publisher) FollowAsDirectory(targetDID string) error {
dirHandle := os.Getenv("DIRECTORY_HANDLE")
dirPassword := os.Getenv("DIRECTORY_PASSWORD")
if dirHandle == "" || dirPassword == "" {
// Silently skip if directory account not configured
return nil
}
session, err := p.CreateSession(dirHandle, dirPassword)
if err != nil {
return fmt.Errorf("directory login failed: %w", err)
}
return p.FollowAccount(session, targetDID)
}
-349
View File
@@ -1,349 +0,0 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
)
// BlobRef represents a blob reference for profile images
type BlobRef struct {
Type string `json:"$type"`
Ref Link `json:"ref"`
MimeType string `json:"mimeType"`
Size int64 `json:"size"`
}
type Link struct {
Link string `json:"$link"`
}
// UploadBlob uploads an image to the PDS and returns a blob reference
func (p *Publisher) UploadBlob(session *PDSSession, data []byte, mimeType string) (*BlobRef, error) {
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.uploadBlob", bytes.NewReader(data))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", mimeType)
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("upload blob failed: %s - %s", resp.Status, string(respBody))
}
var result struct {
Blob BlobRef `json:"blob"`
}
if err := json.Unmarshal(respBody, &result); err != nil {
return nil, err
}
return &result.Blob, nil
}
// UpdateProfile updates the profile for an account
func (p *Publisher) UpdateProfile(session *PDSSession, displayName, description string, avatar *BlobRef) error {
// First, get the current profile to preserve any existing fields
getReq, err := http.NewRequest("GET",
p.pdsHost+"/xrpc/com.atproto.repo.getRecord?repo="+session.DID+"&collection=app.bsky.actor.profile&rkey=self",
nil)
if err != nil {
return err
}
getReq.Header.Set("Authorization", "Bearer "+session.AccessJwt)
getResp, err := p.httpClient.Do(getReq)
var existingCID string
profile := map[string]interface{}{
"$type": "app.bsky.actor.profile",
}
if err == nil && getResp.StatusCode == http.StatusOK {
defer getResp.Body.Close()
var existing struct {
CID string `json:"cid"`
Value map[string]interface{} `json:"value"`
}
if json.NewDecoder(getResp.Body).Decode(&existing) == nil {
existingCID = existing.CID
profile = existing.Value
}
} else if getResp != nil {
getResp.Body.Close()
}
// Update fields
if displayName != "" {
profile["displayName"] = displayName
}
if description != "" {
profile["description"] = description
}
if avatar != nil {
profile["avatar"] = avatar
}
// Put the record
payload := map[string]interface{}{
"repo": session.DID,
"collection": "app.bsky.actor.profile",
"rkey": "self",
"record": profile,
}
if existingCID != "" {
payload["swapRecord"] = existingCID
}
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.putRecord", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("update profile failed: %s - %s", resp.Status, string(respBody))
}
return nil
}
// DeleteAllPosts deletes all posts from an account
func (p *Publisher) DeleteAllPosts(session *PDSSession) (int, error) {
deleted := 0
cursor := ""
for {
// List records
listURL := fmt.Sprintf("%s/xrpc/com.atproto.repo.listRecords?repo=%s&collection=app.bsky.feed.post&limit=100",
p.pdsHost, session.DID)
if cursor != "" {
listURL += "&cursor=" + url.QueryEscape(cursor)
}
req, err := http.NewRequest("GET", listURL, nil)
if err != nil {
return deleted, err
}
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return deleted, err
}
var result struct {
Records []struct {
URI string `json:"uri"`
} `json:"records"`
Cursor string `json:"cursor"`
}
respBody, _ := io.ReadAll(resp.Body)
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return deleted, fmt.Errorf("list records failed: %s - %s", resp.Status, string(respBody))
}
if err := json.Unmarshal(respBody, &result); err != nil {
return deleted, err
}
if len(result.Records) == 0 {
break
}
// Delete each record
for _, record := range result.Records {
// Extract rkey from URI: at://did:plc:xxx/app.bsky.feed.post/rkey
parts := strings.Split(record.URI, "/")
if len(parts) < 2 {
continue
}
rkey := parts[len(parts)-1]
if err := p.DeleteRecord(session, "app.bsky.feed.post", rkey); err != nil {
// Continue deleting other records even if one fails
continue
}
deleted++
}
cursor = result.Cursor
if cursor == "" {
break
}
}
return deleted, nil
}
// DeleteRecord deletes a single record from an account
func (p *Publisher) DeleteRecord(session *PDSSession, collection, rkey string) error {
payload := map[string]interface{}{
"repo": session.DID,
"collection": collection,
"rkey": rkey,
}
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.deleteRecord", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("delete record failed: %s - %s", resp.Status, string(respBody))
}
return nil
}
// DeleteAccount deletes an account using PDS admin API
func (p *Publisher) DeleteAccount(adminPassword, did string) error {
payload := map[string]interface{}{
"did": did,
}
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.deleteAccount", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth("admin", adminPassword)
resp, err := p.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("delete account failed: %s - %s", resp.Status, string(respBody))
}
return nil
}
// TakedownAccount applies a takedown to an account (hides content, preserves data)
func (p *Publisher) TakedownAccount(adminPassword, did, reason string) error {
payload := map[string]interface{}{
"subject": map[string]interface{}{
"$type": "com.atproto.admin.defs#repoRef",
"did": did,
},
"takedown": map[string]interface{}{
"applied": true,
"ref": reason,
},
}
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth("admin", adminPassword)
resp, err := p.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("takedown account failed: %s - %s", resp.Status, string(respBody))
}
return nil
}
// RestoreAccount removes a takedown from an account (makes content visible again)
func (p *Publisher) RestoreAccount(adminPassword, did string) error {
payload := map[string]interface{}{
"subject": map[string]interface{}{
"$type": "com.atproto.admin.defs#repoRef",
"did": did,
},
"takedown": map[string]interface{}{
"applied": false,
},
}
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth("admin", adminPassword)
resp, err := p.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("restore account failed: %s - %s", resp.Status, string(respBody))
}
return nil
}
-439
View File
@@ -1,439 +0,0 @@
package main
import (
"bytes"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"time"
)
// Publisher handles posting items to AT Protocol PDS
type Publisher struct {
pdsHost string
httpClient *http.Client
}
// PDSSession holds authentication info for a PDS account
type PDSSession struct {
DID string `json:"did"`
Handle string `json:"handle"`
AccessJwt string `json:"accessJwt"`
RefreshJwt string `json:"refreshJwt"`
}
// BskyPost represents an app.bsky.feed.post record
type BskyPost struct {
Type string `json:"$type"`
Text string `json:"text"`
CreatedAt string `json:"createdAt"`
Facets []BskyFacet `json:"facets,omitempty"`
Embed *BskyEmbed `json:"embed,omitempty"`
}
type BskyFacet struct {
Index BskyByteSlice `json:"index"`
Features []BskyFeature `json:"features"`
}
type BskyByteSlice struct {
ByteStart int `json:"byteStart"`
ByteEnd int `json:"byteEnd"`
}
type BskyFeature struct {
Type string `json:"$type"`
URI string `json:"uri,omitempty"`
Tag string `json:"tag,omitempty"` // For hashtag facets
}
type BskyEmbed struct {
Type string `json:"$type"`
External *BskyExternal `json:"external,omitempty"`
Images []BskyImage `json:"images,omitempty"`
}
type BskyExternal struct {
URI string `json:"uri"`
Title string `json:"title"`
Description string `json:"description"`
Thumb *BlobRef `json:"thumb,omitempty"`
}
type BskyImage struct {
Alt string `json:"alt"`
Image *BlobRef `json:"image"`
AspectRatio *BskyAspectRatio `json:"aspectRatio,omitempty"`
}
type BskyAspectRatio struct {
Width int `json:"width"`
Height int `json:"height"`
}
// NewPublisher creates a new Publisher instance
func NewPublisher(pdsHost string) *Publisher {
return &Publisher{
pdsHost: pdsHost,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// TID alphabet for base32-sortable encoding
const tidAlphabet = "234567abcdefghijklmnopqrstuvwxyz"
// GenerateRkey creates a deterministic TID-format rkey from a GUID and timestamp
// TIDs are required by Bluesky relay for indexing - custom rkeys don't sync
// Format: 13 chars base32-sortable, 53 bits timestamp + 10 bits clock ID
func GenerateRkey(guid string, timestamp time.Time) string {
if guid == "" {
return ""
}
// Get microseconds since Unix epoch (53 bits)
microsInt := timestamp.UnixMicro()
if microsInt < 0 {
microsInt = 0
}
// Convert to uint64 and mask to 53 bits
micros := uint64(microsInt) & ((1 << 53) - 1)
// Generate deterministic 10-bit clock ID from GUID hash
// Use XOR of multiple hash bytes to reduce collisions
hash := sha256.Sum256([]byte(guid))
// XOR bytes 0-3 together, then 4-7, combine for more entropy
h1 := uint64(hash[0]) ^ uint64(hash[2]) ^ uint64(hash[4]) ^ uint64(hash[6])
h2 := uint64(hash[1]) ^ uint64(hash[3]) ^ uint64(hash[5]) ^ uint64(hash[7])
clockID := (h1 << 2) | (h2 >> 6)
clockID = clockID & ((1 << 10) - 1) // 10 bits = 0-1023
// Combine: top bit 0, 53 bits timestamp, 10 bits clock ID
tid := (micros << 10) | clockID
// Encode as base32-sortable (13 characters)
var result [13]byte
for i := 12; i >= 0; i-- {
result[i] = tidAlphabet[tid&0x1f]
tid >>= 5
}
return string(result[:])
}
// extractURLs finds all URLs in a string
func extractURLs(text string) []string {
// Match http:// or https:// URLs
urlRegex := regexp.MustCompile(`https?://[^\s<>"'\)]+`)
matches := urlRegex.FindAllString(text, -1)
// Clean up trailing punctuation
var urls []string
for _, u := range matches {
// Remove trailing punctuation that's likely not part of the URL
u = strings.TrimRight(u, ".,;:!?")
if u != "" {
urls = append(urls, u)
}
}
return urls
}
// toCamelCaseTag converts a tag string to camelCase hashtag format
// e.g., "Lagos News" -> "lagosNews", "AI" -> "ai", "machine learning" -> "machineLearning"
func toCamelCaseTag(tag string) string {
tag = strings.TrimSpace(tag)
if tag == "" {
return ""
}
// Remove any # prefix if present
tag = strings.TrimPrefix(tag, "#")
// Split on spaces and other separators
words := strings.FieldsFunc(tag, func(r rune) bool {
return r == ' ' || r == '-' || r == '_'
})
if len(words) == 0 {
return ""
}
// If single word, return lowercased
if len(words) == 1 {
return strings.ToLower(words[0])
}
// Multiple words: lowercase first word, capitalize first letter of subsequent words
var result strings.Builder
for i, word := range words {
if word == "" {
continue
}
runes := []rune(word)
if len(runes) > 0 {
if i == 0 || result.Len() == 0 {
// First word: all lowercase
result.WriteString(strings.ToLower(word))
} else {
// Subsequent words: capitalize first letter, lowercase rest
result.WriteString(strings.ToUpper(string(runes[0])))
if len(runes) > 1 {
result.WriteString(strings.ToLower(string(runes[1:])))
}
}
}
}
return result.String()
}
// formatTagsForPost converts item tags to hashtag text and facets
// Returns the hashtag line (e.g., "#AI #MachineLearning #News") and facets
func formatTagsForPost(tags []string, textOffset int) (string, []BskyFacet) {
if len(tags) == 0 {
return "", nil
}
// Dedupe and convert tags
seen := make(map[string]bool)
var hashtags []string
for _, tag := range tags {
camel := toCamelCaseTag(tag)
if camel == "" || seen[strings.ToLower(camel)] {
continue
}
seen[strings.ToLower(camel)] = true
hashtags = append(hashtags, camel)
}
if len(hashtags) == 0 {
return "", nil
}
// Limit to 5 tags to keep post compact
if len(hashtags) > 5 {
hashtags = hashtags[:5]
}
// Build the hashtag line and facets
var line strings.Builder
var facets []BskyFacet
currentOffset := textOffset
for i, ht := range hashtags {
if i > 0 {
line.WriteString(" ")
currentOffset++
}
hashtagText := "#" + ht
byteStart := currentOffset
byteEnd := currentOffset + len(hashtagText)
line.WriteString(hashtagText)
facets = append(facets, BskyFacet{
Index: BskyByteSlice{
ByteStart: byteStart,
ByteEnd: byteEnd,
},
Features: []BskyFeature{{
Type: "app.bsky.richtext.facet#tag",
Tag: ht,
}},
})
currentOffset = byteEnd
}
return line.String(), facets
}
// PublishItem posts a feed item to the PDS
// Returns the AT URI of the created record, or error
func (p *Publisher) PublishItem(session *PDSSession, item *Item) (string, error) {
if item.GUID == "" && item.Link == "" {
return "", fmt.Errorf("item has no GUID or link, cannot publish")
}
// Collect URLs: main link + HN comments link (if applicable)
// Limit to 2 URLs max to stay under 300 grapheme limit
urlSet := make(map[string]bool)
var allURLs []string
// Add main link first
if item.Link != "" {
urlSet[item.Link] = true
allURLs = append(allURLs, item.Link)
}
// For HN feeds, add comments link from description (looks like "https://news.ycombinator.com/item?id=...")
descURLs := extractURLs(item.Description)
for _, u := range descURLs {
if strings.Contains(u, "news.ycombinator.com/item") && !urlSet[u] {
urlSet[u] = true
allURLs = append(allURLs, u)
break // Only add one comments link
}
}
// Add enclosure URL for podcasts/media (audio/video) if we have room
// Bluesky has 300 char limit, so only add if total URLs + minimal title fits
if len(allURLs) < 2 && item.Enclosure != nil && item.Enclosure.URL != "" {
encType := strings.ToLower(item.Enclosure.Type)
if strings.HasPrefix(encType, "audio/") || strings.HasPrefix(encType, "video/") {
if !urlSet[item.Enclosure.URL] {
// Calculate if enclosure would fit (need ~60 chars for title + separators)
currentURLLen := 0
for _, u := range allURLs {
currentURLLen += len(u) + 2 // +2 for \n\n
}
enclosureLen := len(item.Enclosure.URL) + 2
if currentURLLen+enclosureLen < 235 { // Leave 60 chars for title
urlSet[item.Enclosure.URL] = true
allURLs = append(allURLs, item.Enclosure.URL)
}
}
}
}
// Get the primary URL (article link)
primaryURL := ""
if len(allURLs) > 0 {
primaryURL = allURLs[0]
}
// Use original publication date if available, otherwise current time
createdAt := time.Now()
if !item.PubDate.IsZero() {
createdAt = item.PubDate
}
// Build post text with hashtags if available
// The link card shows the title, description, and thumbnail
// Clicking the card doesn't trigger the "leaving Bluesky" warning
postText := ""
var facets []BskyFacet
if len(item.Tags) > 0 {
tagLine, tagFacets := formatTagsForPost(item.Tags, 0)
postText = tagLine
facets = tagFacets
}
post := BskyPost{
Type: "app.bsky.feed.post",
Text: postText,
CreatedAt: createdAt.Format(time.RFC3339),
Facets: facets,
}
// Always use external embed (link card) - clicking the card doesn't show "leaving" warning
if primaryURL != "" {
external := &BskyExternal{
URI: primaryURL,
Title: item.Title,
Description: truncate(stripHTML(item.Description), 300),
}
// Add thumbnail from first image if available
if len(item.ImageURLs) > 0 {
if thumb := p.fetchAndUploadImage(session, item.ImageURLs[0]); thumb != nil {
external.Thumb = thumb
}
}
post.Embed = &BskyEmbed{
Type: "app.bsky.embed.external",
External: external,
}
}
// Use GUID + discoveredAt for deterministic rkey
// This allows regenerating a new rkey by updating discoveredAt if needed
guidForRkey := item.GUID
if guidForRkey == "" {
guidForRkey = item.Link
}
// Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt
rkeyTime := item.PubDate
if rkeyTime.IsZero() {
rkeyTime = item.DiscoveredAt
}
rkey := GenerateRkey(guidForRkey, rkeyTime)
// Create the record with deterministic rkey
payload := map[string]interface{}{
"repo": session.DID,
"collection": "app.bsky.feed.post",
"rkey": rkey,
"record": post,
}
body, err := json.Marshal(payload)
if err != nil {
return "", err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body))
if err != nil {
return "", err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("create record failed: %s - %s", resp.Status, string(respBody))
}
var result struct {
URI string `json:"uri"`
CID string `json:"cid"`
}
if err := json.Unmarshal(respBody, &result); err != nil {
return "", err
}
return result.URI, nil
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen-3] + "..."
}
// stripHTML removes HTML tags from a string
func stripHTML(s string) string {
// Remove HTML tags
tagRegex := regexp.MustCompile(`<[^>]*>`)
s = tagRegex.ReplaceAllString(s, "")
// Decode common HTML entities
s = strings.ReplaceAll(s, "&amp;", "&")
s = strings.ReplaceAll(s, "&lt;", "<")
s = strings.ReplaceAll(s, "&gt;", ">")
s = strings.ReplaceAll(s, "&quot;", "\"")
s = strings.ReplaceAll(s, "&#39;", "'")
s = strings.ReplaceAll(s, "&nbsp;", " ")
// Collapse whitespace
spaceRegex := regexp.MustCompile(`\s+`)
s = spaceRegex.ReplaceAllString(s, " ")
return strings.TrimSpace(s)
}