Remove publishing code - now handled by publish service
Publishing functionality has been moved to the standalone publish service. Removed: - publisher.go, pds_auth.go, pds_records.go, image.go, handle.go - StartPublishLoop and related functions from crawler.go - Publish loop invocation from main.go Updated CLAUDE.md to reflect the new architecture. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -47,13 +47,14 @@ Multi-file Go application that crawls websites for RSS/Atom feeds, stores them i
|
||||
|
||||
### Concurrent Loops (main.go)
|
||||
|
||||
The application runs six independent goroutine loops:
|
||||
The application runs five independent goroutine loops:
|
||||
- **Import loop** - Reads `vertices.txt.gz` and inserts domains into DB in batches of 100 (status='pass')
|
||||
- **Crawl loop** - Worker pool crawls approved domains for feed discovery
|
||||
- **Feed check loop** - Worker pool re-checks known feeds for updates (conditional HTTP)
|
||||
- **Stats loop** - Updates cached dashboard statistics every minute
|
||||
- **Cleanup loop** - Removes items older than 12 months (weekly)
|
||||
- **Publish loop** - Autopublishes items from approved feeds to AT Protocol PDS
|
||||
|
||||
Note: Publishing is handled by the separate `publish` service.
|
||||
|
||||
### File Structure
|
||||
|
||||
@@ -67,7 +68,6 @@ The application runs six independent goroutine loops:
|
||||
| `util.go` | URL normalization, host utilities, TLD extraction |
|
||||
| `db.go` | PostgreSQL schema (domains, feeds, items tables with tsvector FTS) |
|
||||
| `dashboard.go` | HTTP server, JSON APIs, HTML template |
|
||||
| `publisher.go` | AT Protocol PDS integration for posting items |
|
||||
| `oauth.go` | OAuth 2.0 client wrapper for AT Protocol authentication |
|
||||
| `oauth_session.go` | Session management with AES-256-GCM encrypted cookies |
|
||||
| `oauth_middleware.go` | RequireAuth middleware for protecting routes |
|
||||
|
||||
-361
@@ -3,12 +3,10 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -147,365 +145,6 @@ func (c *Crawler) StartMaintenanceLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
// StartPublishLoop automatically publishes unpublished items for approved feeds
|
||||
// Grabs up to 50 items sorted by discovered_at, publishes one per second, then reloops
|
||||
func (c *Crawler) StartPublishLoop() {
|
||||
// Load PDS credentials from environment or pds.env file
|
||||
pdsHost := os.Getenv("PDS_HOST")
|
||||
pdsAdminPassword := os.Getenv("PDS_ADMIN_PASSWORD")
|
||||
|
||||
if pdsHost == "" || pdsAdminPassword == "" {
|
||||
if data, err := os.ReadFile("pds.env"); err == nil {
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "#") || line == "" {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
if len(parts) == 2 {
|
||||
key := strings.TrimSpace(parts[0])
|
||||
value := strings.TrimSpace(parts[1])
|
||||
switch key {
|
||||
case "PDS_HOST":
|
||||
pdsHost = value
|
||||
case "PDS_ADMIN_PASSWORD":
|
||||
pdsAdminPassword = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if pdsHost == "" || pdsAdminPassword == "" {
|
||||
fmt.Println("Publish loop: PDS credentials not configured, skipping")
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Publish loop: starting with PDS %s\n", pdsHost)
|
||||
feedPassword := "feed1440!"
|
||||
|
||||
// Cache sessions per account
|
||||
sessions := make(map[string]*PDSSession)
|
||||
publisher := NewPublisher(pdsHost)
|
||||
|
||||
// Refresh existing account profiles on startup
|
||||
c.RefreshAllProfiles(publisher, feedPassword)
|
||||
|
||||
for {
|
||||
if c.IsShuttingDown() {
|
||||
return
|
||||
}
|
||||
|
||||
// Get up to 50 unpublished items from approved feeds, sorted by discovered_at ASC
|
||||
items, err := c.GetAllUnpublishedItems(50)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish loop error: %v\n", err)
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(items) == 0 {
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
// Publish one item per second
|
||||
for _, item := range items {
|
||||
if c.IsShuttingDown() {
|
||||
return
|
||||
}
|
||||
// Get or create session for this feed's account
|
||||
account := c.getAccountForFeed(item.FeedURL)
|
||||
if account == "" {
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
session, ok := sessions[account]
|
||||
if !ok {
|
||||
// Try to log in
|
||||
session, err = publisher.CreateSession(account, feedPassword)
|
||||
if err != nil {
|
||||
// Account might not exist - try to create it
|
||||
inviteCode, err := publisher.CreateInviteCode(pdsAdminPassword, 1)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish: failed to create invite for %s: %v\n", account, err)
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
email := account + "@1440.news"
|
||||
session, err = publisher.CreateAccount(account, email, feedPassword, inviteCode)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish: failed to create account %s: %v\n", account, err)
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Publish: created account %s\n", account)
|
||||
c.db.Exec("UPDATE feeds SET publish_account = $1 WHERE url = $2", account, item.FeedURL)
|
||||
|
||||
// Set up profile for new account
|
||||
feedInfo := c.getFeedInfo(item.FeedURL)
|
||||
if feedInfo != nil {
|
||||
displayName := feedInfo.Title
|
||||
if displayName == "" {
|
||||
displayName = account
|
||||
}
|
||||
// Build description with feed URL (strip HTML tags)
|
||||
description := stripHTML(feedInfo.Description)
|
||||
if description == "" {
|
||||
description = "News feed via 1440.news"
|
||||
}
|
||||
// Add feed URL as first line of description
|
||||
feedURLFull := "https://" + item.FeedURL
|
||||
description = feedURLFull + "\n\n" + description
|
||||
// Truncate if needed
|
||||
if len(displayName) > 64 {
|
||||
displayName = displayName[:61] + "..."
|
||||
}
|
||||
if len(description) > 256 {
|
||||
description = description[:253] + "..."
|
||||
}
|
||||
// Fetch and upload favicon as avatar
|
||||
var avatar *BlobRef
|
||||
faviconSource := feedInfo.SiteURL
|
||||
if faviconSource == "" {
|
||||
// Fallback to deriving from feed URL
|
||||
faviconSource = feedInfo.SourceHost
|
||||
}
|
||||
if faviconSource != "" {
|
||||
faviconURL := publisher.FetchFavicon(faviconSource)
|
||||
if faviconURL != "" {
|
||||
avatar = publisher.fetchAndUploadImage(session, faviconURL)
|
||||
}
|
||||
}
|
||||
if err := publisher.UpdateProfile(session, displayName, description, avatar); err != nil {
|
||||
fmt.Printf("Publish: failed to set profile for %s: %v\n", account, err)
|
||||
} else {
|
||||
fmt.Printf("Publish: set profile for %s\n", account)
|
||||
}
|
||||
|
||||
// Have directory account follow this new account
|
||||
if err := publisher.FollowAsDirectory(session.DID); err != nil {
|
||||
fmt.Printf("Publish: directory follow failed for %s: %v\n", account, err)
|
||||
} else {
|
||||
fmt.Printf("Publish: directory now following %s\n", account)
|
||||
}
|
||||
}
|
||||
}
|
||||
sessions[account] = session
|
||||
}
|
||||
|
||||
// Shorten URLs before publishing
|
||||
itemToPublish := item
|
||||
if item.Link != "" {
|
||||
if shortURL, err := c.GetShortURLForPost(item.Link, item.GUID, item.FeedURL); err == nil {
|
||||
fmt.Printf("Publish: shortened %s -> %s\n", item.Link[:min(40, len(item.Link))], shortURL)
|
||||
itemToPublish.Link = shortURL
|
||||
} else {
|
||||
fmt.Printf("Publish: short URL failed for %s: %v\n", item.Link[:min(40, len(item.Link))], err)
|
||||
}
|
||||
}
|
||||
|
||||
// Publish the item
|
||||
uri, err := publisher.PublishItem(session, &itemToPublish)
|
||||
if err != nil {
|
||||
fmt.Printf("Publish: failed item %s: %v\n", item.GUID[:min(40, len(item.GUID))], err)
|
||||
// Clear session cache on auth errors
|
||||
if strings.Contains(err.Error(), "401") || strings.Contains(err.Error(), "auth") {
|
||||
delete(sessions, account)
|
||||
}
|
||||
} else {
|
||||
c.MarkItemPublished(item.FeedURL, item.GUID, uri)
|
||||
fmt.Printf("Publish: %s -> %s\n", item.Title[:min(40, len(item.Title))], account)
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// getAccountForFeed returns the publish account for a feed URL
|
||||
func (c *Crawler) getAccountForFeed(feedURL string) string {
|
||||
var account *string
|
||||
err := c.db.QueryRow(`
|
||||
SELECT publish_account FROM feeds
|
||||
WHERE url = $1 AND publish_status = 'pass' AND status = 'pass'
|
||||
`, feedURL).Scan(&account)
|
||||
if err != nil || account == nil || *account == "" {
|
||||
// Derive handle from feed URL
|
||||
return DeriveHandleFromFeed(feedURL)
|
||||
}
|
||||
return *account
|
||||
}
|
||||
|
||||
// FeedInfo holds basic feed metadata for profile setup
|
||||
type FeedInfo struct {
|
||||
Title string
|
||||
Description string
|
||||
SiteURL string
|
||||
SourceHost string
|
||||
}
|
||||
|
||||
// getFeedInfo returns feed metadata for profile setup
|
||||
func (c *Crawler) getFeedInfo(feedURL string) *FeedInfo {
|
||||
var title, description, siteURL, sourceHost *string
|
||||
err := c.db.QueryRow(`
|
||||
SELECT title, description, site_url, domain_host as source_host FROM feeds WHERE url = $1
|
||||
`, feedURL).Scan(&title, &description, &siteURL, &sourceHost)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &FeedInfo{
|
||||
Title: StringValue(title),
|
||||
Description: StringValue(description),
|
||||
SiteURL: StringValue(siteURL),
|
||||
SourceHost: StringValue(sourceHost),
|
||||
}
|
||||
}
|
||||
|
||||
// RefreshAllProfiles updates profiles for all existing accounts with feed URLs
|
||||
func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT url, title, description, site_url, domain_host as source_host, publish_account
|
||||
FROM feeds
|
||||
WHERE publish_account IS NOT NULL AND publish_account <> ''
|
||||
`)
|
||||
if err != nil {
|
||||
fmt.Printf("RefreshProfiles: query error: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var feedURL, account string
|
||||
var title, description, siteURL, sourceHost *string
|
||||
if err := rows.Scan(&feedURL, &title, &description, &siteURL, &sourceHost, &account); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Login to account
|
||||
session, err := publisher.CreateSession(account, feedPassword)
|
||||
if err != nil {
|
||||
fmt.Printf("RefreshProfiles: login failed for %s: %v\n", account, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Build profile
|
||||
displayName := StringValue(title)
|
||||
if displayName == "" {
|
||||
displayName = account
|
||||
}
|
||||
desc := stripHTML(StringValue(description))
|
||||
if desc == "" {
|
||||
desc = "News feed via 1440.news"
|
||||
}
|
||||
// Add feed URL as first line
|
||||
feedURLFull := "https://" + feedURL
|
||||
desc = feedURLFull + "\n\n" + desc
|
||||
|
||||
// Truncate if needed
|
||||
if len(displayName) > 64 {
|
||||
displayName = displayName[:61] + "..."
|
||||
}
|
||||
if len(desc) > 256 {
|
||||
desc = desc[:253] + "..."
|
||||
}
|
||||
|
||||
// Fetch and upload favicon as avatar
|
||||
var avatar *BlobRef
|
||||
faviconSource := StringValue(siteURL)
|
||||
if faviconSource == "" {
|
||||
// Fallback to source host
|
||||
faviconSource = StringValue(sourceHost)
|
||||
}
|
||||
if faviconSource != "" {
|
||||
faviconURL := publisher.FetchFavicon(faviconSource)
|
||||
if faviconURL != "" {
|
||||
avatar = publisher.fetchAndUploadImage(session, faviconURL)
|
||||
}
|
||||
}
|
||||
|
||||
if err := publisher.UpdateProfile(session, displayName, desc, avatar); err != nil {
|
||||
fmt.Printf("RefreshProfiles: update failed for %s: %v\n", account, err)
|
||||
} else {
|
||||
fmt.Printf("RefreshProfiles: updated %s\n", account)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetAllUnpublishedItems returns unpublished items from all approved feeds
|
||||
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT i.feed_url, i.guid, i.title, i.link, i.description, i.content,
|
||||
i.author, i.pub_date, i.discovered_at, i.image_urls, i.tags,
|
||||
i.enclosure_url, i.enclosure_type, i.enclosure_length
|
||||
FROM items i
|
||||
JOIN feeds f ON i.feed_url = f.url
|
||||
WHERE f.publish_status = 'pass'
|
||||
AND f.status = 'pass'
|
||||
AND i.published_at IS NULL
|
||||
ORDER BY i.discovered_at ASC
|
||||
LIMIT $1
|
||||
`, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var items []Item
|
||||
for rows.Next() {
|
||||
var item Item
|
||||
var guid, title, link, description, content, author, imageURLsJSON, tagsJSON *string
|
||||
var pubDate, discoveredAt *time.Time
|
||||
var enclosureURL, enclosureType *string
|
||||
var enclosureLength *int64
|
||||
|
||||
err := rows.Scan(&item.FeedURL, &guid, &title, &link, &description,
|
||||
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON, &tagsJSON,
|
||||
&enclosureURL, &enclosureType, &enclosureLength)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
item.GUID = StringValue(guid)
|
||||
item.Title = StringValue(title)
|
||||
item.Link = StringValue(link)
|
||||
item.Description = StringValue(description)
|
||||
item.Content = StringValue(content)
|
||||
item.Author = StringValue(author)
|
||||
item.PubDate = TimeValue(pubDate)
|
||||
item.DiscoveredAt = TimeValue(discoveredAt)
|
||||
|
||||
// Parse image URLs from JSON array
|
||||
if imageURLsJSON != nil && *imageURLsJSON != "" {
|
||||
json.Unmarshal([]byte(*imageURLsJSON), &item.ImageURLs)
|
||||
}
|
||||
|
||||
// Parse tags from JSON array
|
||||
if tagsJSON != nil && *tagsJSON != "" {
|
||||
json.Unmarshal([]byte(*tagsJSON), &item.Tags)
|
||||
}
|
||||
|
||||
// Parse enclosure
|
||||
if enclosureURL != nil && *enclosureURL != "" {
|
||||
item.Enclosure = &Enclosure{
|
||||
URL: *enclosureURL,
|
||||
Type: StringValue(enclosureType),
|
||||
}
|
||||
if enclosureLength != nil {
|
||||
item.Enclosure.Length = *enclosureLength
|
||||
}
|
||||
}
|
||||
|
||||
items = append(items, item)
|
||||
}
|
||||
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// dnsResolver uses local caching DNS (infra-dns) with fallback to system
|
||||
var dnsResolver = &net.Resolver{
|
||||
PreferGo: true,
|
||||
|
||||
@@ -452,15 +452,9 @@ func scanFeeds(rows pgx.Rows) ([]*Feed, error) {
|
||||
}
|
||||
|
||||
// SetPublishStatus sets the publish status for a feed ('hold', 'pass', 'skip')
|
||||
// If status is 'pass', the account handle is also set (auto-derived if empty)
|
||||
func (c *Crawler) SetPublishStatus(feedURL, status, account string) error {
|
||||
feedURL = normalizeURL(feedURL)
|
||||
|
||||
// Auto-derive account if passing and not provided
|
||||
if status == "pass" && account == "" {
|
||||
account = DeriveHandleFromFeed(feedURL)
|
||||
}
|
||||
|
||||
_, err := c.db.Exec(`
|
||||
UPDATE feeds SET publish_status = $1, publish_account = $2 WHERE url = $3
|
||||
`, status, NullableString(account), feedURL)
|
||||
|
||||
@@ -1,262 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DeriveHandleFromFeed generates an AT Protocol handle from a feed URL
|
||||
// Format: {domain}-{category}.1440.news
|
||||
// AT Protocol allows up to 63 characters per label, but the PDS
|
||||
// restricts the first segment to 18 characters for local handles.
|
||||
// Examples:
|
||||
//
|
||||
// feeds.bbci.co.uk/news/technology/rss.xml → bbc-technology.1440.news
|
||||
// news.ycombinator.com/rss → ycombinator.1440.news
|
||||
func DeriveHandleFromFeed(feedURL string) string {
|
||||
const maxSubdomainLen = 18 // PDS limit for first segment
|
||||
|
||||
// Ensure we have a scheme for parsing
|
||||
if !strings.Contains(feedURL, "://") {
|
||||
feedURL = "https://" + feedURL
|
||||
}
|
||||
|
||||
u, err := url.Parse(feedURL)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
hostname := strings.ToLower(u.Hostname())
|
||||
path := strings.ToLower(u.Path)
|
||||
|
||||
// Remove common feed suffixes/extensions
|
||||
suffixesToRemove := []string{".xml", ".rss", ".atom", ".json", "/rss", "/feed", "/atom", "/index"}
|
||||
for _, suffix := range suffixesToRemove {
|
||||
path = strings.TrimSuffix(path, suffix)
|
||||
}
|
||||
|
||||
// Split path into segments and filter noise
|
||||
segments := strings.Split(strings.Trim(path, "/"), "/")
|
||||
skipPathWords := map[string]bool{
|
||||
"rss": true, "feed": true, "feeds": true, "atom": true,
|
||||
"xml": true, "default": true, "index": true, "services": true,
|
||||
"nyt": true,
|
||||
}
|
||||
|
||||
var pathParts []string
|
||||
for _, seg := range segments {
|
||||
seg = cleanHandleSegment(seg)
|
||||
if seg != "" && !skipPathWords[seg] {
|
||||
pathParts = append(pathParts, seg)
|
||||
}
|
||||
}
|
||||
|
||||
// Split hostname and extract the meaningful domain
|
||||
hostParts := strings.Split(hostname, ".")
|
||||
|
||||
// Two-part TLDs to handle specially
|
||||
twoPartTLDs := map[string]bool{
|
||||
"co.uk": true, "com.au": true, "co.nz": true, "co.jp": true,
|
||||
"com.br": true, "co.in": true, "org.uk": true, "ac.uk": true,
|
||||
}
|
||||
|
||||
// Check for two-part TLD
|
||||
if len(hostParts) >= 2 {
|
||||
possibleTwoPartTLD := hostParts[len(hostParts)-2] + "." + hostParts[len(hostParts)-1]
|
||||
if twoPartTLDs[possibleTwoPartTLD] {
|
||||
hostParts = hostParts[:len(hostParts)-2]
|
||||
} else {
|
||||
// Single TLD - remove it
|
||||
singleTLDs := map[string]bool{
|
||||
"com": true, "org": true, "net": true, "io": true,
|
||||
"edu": true, "gov": true, "uk": true, "de": true, "fr": true,
|
||||
}
|
||||
if singleTLDs[hostParts[len(hostParts)-1]] {
|
||||
hostParts = hostParts[:len(hostParts)-1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Skip noise subdomains
|
||||
skipHostWords := map[string]bool{
|
||||
"www": true, "feeds": true, "rss": true, "feed": true,
|
||||
"api": true, "cdn": true, "static": true, "news": true,
|
||||
}
|
||||
|
||||
var meaningfulHostParts []string
|
||||
for _, part := range hostParts {
|
||||
if !skipHostWords[part] && part != "" {
|
||||
meaningfulHostParts = append(meaningfulHostParts, part)
|
||||
}
|
||||
}
|
||||
|
||||
// Get the main domain (e.g., "bbci", "ycombinator", "nytimes")
|
||||
var mainDomain string
|
||||
if len(meaningfulHostParts) > 0 {
|
||||
mainDomain = meaningfulHostParts[len(meaningfulHostParts)-1]
|
||||
} else if len(hostParts) > 0 {
|
||||
mainDomain = hostParts[len(hostParts)-1]
|
||||
}
|
||||
|
||||
// Special case: "bbci" should become "bbc"
|
||||
if mainDomain == "bbci" {
|
||||
mainDomain = "bbc"
|
||||
}
|
||||
|
||||
// Abbreviations for long category names to fit 18-char limit
|
||||
categoryAbbrevs := map[string]string{
|
||||
"science-and-environment": "sci-env",
|
||||
"entertainment-and-arts": "ent-arts",
|
||||
"science-environment": "sci-env",
|
||||
"entertainment-arts": "ent-arts",
|
||||
"technology": "tech",
|
||||
"business": "biz",
|
||||
"international": "intl",
|
||||
"environment": "env",
|
||||
"entertainment": "ent",
|
||||
"politics": "pol",
|
||||
}
|
||||
|
||||
// Build subdomain: domain + category (from path)
|
||||
var subdomain string
|
||||
if len(pathParts) > 0 {
|
||||
// Use last meaningful path part as category (e.g., "technology" from /news/technology/)
|
||||
category := pathParts[len(pathParts)-1]
|
||||
// Skip generic categories
|
||||
if category == "news" && len(pathParts) == 1 {
|
||||
subdomain = mainDomain
|
||||
} else {
|
||||
// Try to abbreviate if the full subdomain would be too long
|
||||
fullSubdomain := mainDomain + "-" + category
|
||||
if len(fullSubdomain) > maxSubdomainLen {
|
||||
if abbrev, ok := categoryAbbrevs[category]; ok {
|
||||
category = abbrev
|
||||
}
|
||||
}
|
||||
subdomain = mainDomain + "-" + category
|
||||
}
|
||||
} else {
|
||||
subdomain = mainDomain
|
||||
}
|
||||
|
||||
// If still too long, just use main hostname
|
||||
if len(subdomain) > maxSubdomainLen {
|
||||
subdomain = mainDomain
|
||||
}
|
||||
|
||||
// Final safety: truncate if still too long
|
||||
if len(subdomain) > maxSubdomainLen {
|
||||
subdomain = subdomain[:maxSubdomainLen]
|
||||
}
|
||||
|
||||
subdomain = strings.Trim(subdomain, "-")
|
||||
|
||||
// Collapse multiple hyphens
|
||||
for strings.Contains(subdomain, "--") {
|
||||
subdomain = strings.ReplaceAll(subdomain, "--", "-")
|
||||
}
|
||||
|
||||
return subdomain + ".1440.news"
|
||||
}
|
||||
|
||||
// cleanHandleSegment sanitizes a string for use in an AT Protocol handle segment
|
||||
// Handle segments must be alphanumeric with hyphens, no leading/trailing hyphens
|
||||
func cleanHandleSegment(s string) string {
|
||||
// Remove file extensions
|
||||
if idx := strings.LastIndex(s, "."); idx > 0 {
|
||||
s = s[:idx]
|
||||
}
|
||||
|
||||
// Convert to lowercase
|
||||
s = strings.ToLower(s)
|
||||
|
||||
// Strip common feed prefixes/suffixes from the segment itself
|
||||
// e.g., "showrss" → "show", "rssworld" → "world"
|
||||
feedAffixes := []string{"rss", "feed", "atom", "xml"}
|
||||
for _, affix := range feedAffixes {
|
||||
// Strip suffix (e.g., "showrss" → "show")
|
||||
if strings.HasSuffix(s, affix) && len(s) > len(affix) {
|
||||
s = strings.TrimSuffix(s, affix)
|
||||
break
|
||||
}
|
||||
// Strip prefix (e.g., "rssworld" → "world")
|
||||
if strings.HasPrefix(s, affix) && len(s) > len(affix) {
|
||||
s = strings.TrimPrefix(s, affix)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Replace underscores and other separators with hyphens
|
||||
s = strings.ReplaceAll(s, "_", "-")
|
||||
s = strings.ReplaceAll(s, " ", "-")
|
||||
|
||||
// Remove any characters that aren't alphanumeric or hyphens
|
||||
reg := regexp.MustCompile(`[^a-z0-9-]`)
|
||||
s = reg.ReplaceAllString(s, "")
|
||||
|
||||
// Collapse multiple hyphens
|
||||
for strings.Contains(s, "--") {
|
||||
s = strings.ReplaceAll(s, "--", "-")
|
||||
}
|
||||
|
||||
// Trim leading/trailing hyphens
|
||||
s = strings.Trim(s, "-")
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// SplitHandle extracts the path prefix and hostname from a derived handle
|
||||
// Example: show.news.ycombinator.com.1440.news → ("show", "news.ycombinator.com")
|
||||
func SplitHandle(handle string) (prefix string, hostname string) {
|
||||
// Remove .1440.news suffix
|
||||
handle = strings.TrimSuffix(handle, ".1440.news")
|
||||
|
||||
parts := strings.Split(handle, ".")
|
||||
|
||||
// Try to find where hostname starts by looking for valid hostname patterns
|
||||
if len(parts) >= 2 {
|
||||
for i := 0; i < len(parts)-1; i++ {
|
||||
remaining := strings.Join(parts[i:], ".")
|
||||
if looksLikeHostname(remaining) {
|
||||
if i > 0 {
|
||||
prefix = strings.Join(parts[:i], ".")
|
||||
}
|
||||
hostname = remaining
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: no prefix, entire thing is hostname
|
||||
hostname = handle
|
||||
return "", hostname
|
||||
}
|
||||
|
||||
func isLikelyTLDPart(s string) bool {
|
||||
tlds := map[string]bool{
|
||||
"com": true, "org": true, "net": true, "edu": true, "gov": true,
|
||||
"io": true, "co": true, "uk": true, "de": true, "fr": true,
|
||||
"jp": true, "au": true, "ca": true, "nl": true, "se": true,
|
||||
"news": true, "blog": true, "tech": true, "dev": true,
|
||||
}
|
||||
return tlds[s]
|
||||
}
|
||||
|
||||
func isTwoPartTLD(first, second string) bool {
|
||||
twoPartTLDs := map[string]bool{
|
||||
"co.uk": true, "com.au": true, "co.jp": true, "co.nz": true,
|
||||
"org.uk": true, "net.au": true, "com.br": true,
|
||||
}
|
||||
return twoPartTLDs[first+"."+second]
|
||||
}
|
||||
|
||||
func looksLikeHostname(s string) bool {
|
||||
// A hostname typically has at least one dot and ends with a TLD-like part
|
||||
parts := strings.Split(s, ".")
|
||||
if len(parts) < 2 {
|
||||
return false
|
||||
}
|
||||
lastPart := parts[len(parts)-1]
|
||||
return isLikelyTLDPart(lastPart)
|
||||
}
|
||||
@@ -1,381 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"image"
|
||||
_ "image/gif"
|
||||
"image/jpeg"
|
||||
_ "image/png"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.deanishe.net/favicon"
|
||||
"golang.org/x/image/draw"
|
||||
_ "golang.org/x/image/webp"
|
||||
)
|
||||
|
||||
// ImageUploadResult contains the uploaded blob and image dimensions
|
||||
type ImageUploadResult struct {
|
||||
Blob *BlobRef
|
||||
Width int
|
||||
Height int
|
||||
}
|
||||
|
||||
// uploadImages fetches and uploads up to 4 images, returning BskyImage structs
|
||||
func (p *Publisher) uploadImages(session *PDSSession, imageURLs []string, altText string) []BskyImage {
|
||||
var images []BskyImage
|
||||
maxImages := 4
|
||||
if len(imageURLs) < maxImages {
|
||||
maxImages = len(imageURLs)
|
||||
}
|
||||
|
||||
for i := 0; i < maxImages; i++ {
|
||||
result := p.fetchAndUploadImageWithDimensions(session, imageURLs[i])
|
||||
if result != nil && result.Blob != nil {
|
||||
img := BskyImage{
|
||||
Alt: altText,
|
||||
Image: result.Blob,
|
||||
}
|
||||
if result.Width > 0 && result.Height > 0 {
|
||||
img.AspectRatio = &BskyAspectRatio{
|
||||
Width: result.Width,
|
||||
Height: result.Height,
|
||||
}
|
||||
}
|
||||
images = append(images, img)
|
||||
}
|
||||
}
|
||||
|
||||
return images
|
||||
}
|
||||
|
||||
// FetchFavicon tries to get a favicon URL for a site
|
||||
// Uses go.deanishe.net/favicon library which parses HTML, manifests, and checks common paths
|
||||
// Returns the favicon URL or empty string if not found
|
||||
func (p *Publisher) FetchFavicon(siteURL string) string {
|
||||
if siteURL == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure URL has scheme
|
||||
if !strings.Contains(siteURL, "://") {
|
||||
siteURL = "https://" + siteURL
|
||||
}
|
||||
u, err := url.Parse(siteURL)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Create finder with custom HTTP client
|
||||
// Note: Don't use IgnoreNoSize as it filters out valid favicon.ico files that don't have size metadata
|
||||
finder := favicon.New(
|
||||
favicon.WithClient(p.httpClient),
|
||||
)
|
||||
|
||||
// Find icons - library checks HTML <link> tags, manifests, OG images, common paths
|
||||
icons, err := finder.Find(siteURL)
|
||||
if err == nil && len(icons) > 0 {
|
||||
// Filter and score icons for avatar use
|
||||
// Prefer: square icons, reasonable size, PNG format, actual favicons over OG images
|
||||
var bestIcon string
|
||||
var bestScore int
|
||||
|
||||
for _, icon := range icons {
|
||||
// Skip tiny icons (likely tracking pixels)
|
||||
if icon.Width > 0 && icon.Width < 32 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip Open Graph images (meant for link previews, usually wide banners)
|
||||
lowerURL := strings.ToLower(icon.URL)
|
||||
if strings.Contains(lowerURL, "og-image") || strings.Contains(lowerURL, "og_image") ||
|
||||
strings.Contains(lowerURL, "opengraph") || strings.Contains(lowerURL, "twitter") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip wide images (aspect ratio > 1.5 means it's a banner, not an icon)
|
||||
if icon.Width > 0 && icon.Height > 0 {
|
||||
ratio := float64(icon.Width) / float64(icon.Height)
|
||||
if ratio > 1.5 || ratio < 0.67 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Score the icon
|
||||
score := 0
|
||||
|
||||
// Prefer actual favicon paths
|
||||
if strings.Contains(lowerURL, "favicon") || strings.Contains(lowerURL, "icon") ||
|
||||
strings.Contains(lowerURL, "apple-touch") {
|
||||
score += 100
|
||||
}
|
||||
|
||||
// Prefer PNG over other formats
|
||||
if icon.MimeType == "image/png" {
|
||||
score += 50
|
||||
} else if icon.MimeType == "image/x-icon" || strings.HasSuffix(lowerURL, ".ico") {
|
||||
score += 40
|
||||
} else if icon.MimeType == "image/jpeg" {
|
||||
score += 10 // JPEG less preferred for icons
|
||||
}
|
||||
|
||||
// Prefer larger icons (but not too large)
|
||||
if icon.Width >= 64 && icon.Width <= 512 {
|
||||
score += 30
|
||||
} else if icon.Width > 0 {
|
||||
score += 10
|
||||
}
|
||||
|
||||
if score > bestScore {
|
||||
bestScore = score
|
||||
bestIcon = icon.URL
|
||||
}
|
||||
}
|
||||
|
||||
if bestIcon != "" {
|
||||
return bestIcon
|
||||
}
|
||||
|
||||
// Fall back to first non-OG icon
|
||||
for _, icon := range icons {
|
||||
lowerURL := strings.ToLower(icon.URL)
|
||||
if !strings.Contains(lowerURL, "og-image") && !strings.Contains(lowerURL, "og_image") {
|
||||
return icon.URL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to Google's favicon service (reliable, returns PNG)
|
||||
return fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
|
||||
}
|
||||
|
||||
func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *BlobRef {
|
||||
result := p.fetchAndUploadImageWithDimensions(session, imageURL)
|
||||
if result == nil {
|
||||
return nil
|
||||
}
|
||||
return result.Blob
|
||||
}
|
||||
|
||||
// upgradeImageURL attempts to get a larger version of known CDN image URLs
|
||||
func upgradeImageURL(imageURL string) string {
|
||||
// BBC images: /standard/240/ -> /standard/800/
|
||||
if strings.Contains(imageURL, "ichef.bbci.co.uk") {
|
||||
imageURL = strings.Replace(imageURL, "/standard/240/", "/standard/800/", 1)
|
||||
imageURL = strings.Replace(imageURL, "/standard/480/", "/standard/800/", 1)
|
||||
}
|
||||
return imageURL
|
||||
}
|
||||
|
||||
func (p *Publisher) fetchAndUploadImageWithDimensions(session *PDSSession, imageURL string) *ImageUploadResult {
|
||||
// Upgrade image URL to larger size if possible
|
||||
imageURL = upgradeImageURL(imageURL)
|
||||
|
||||
// Fetch the image
|
||||
resp, err := p.httpClient.Get(imageURL)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check content type
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if contentType == "" {
|
||||
// Try to guess from URL
|
||||
if strings.HasSuffix(strings.ToLower(imageURL), ".png") {
|
||||
contentType = "image/png"
|
||||
} else if strings.HasSuffix(strings.ToLower(imageURL), ".gif") {
|
||||
contentType = "image/gif"
|
||||
} else if strings.HasSuffix(strings.ToLower(imageURL), ".webp") {
|
||||
contentType = "image/webp"
|
||||
} else {
|
||||
contentType = "image/jpeg" // Default
|
||||
}
|
||||
}
|
||||
|
||||
// Only accept image types
|
||||
if !strings.HasPrefix(contentType, "image/") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read image data (limit to 2MB to allow for resize headroom)
|
||||
data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
||||
if err != nil || len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decode image to get dimensions
|
||||
imgConfig, _, err := image.DecodeConfig(bytes.NewReader(data))
|
||||
width, height := 1, 1 // Default if decode fails
|
||||
if err == nil {
|
||||
width, height = imgConfig.Width, imgConfig.Height
|
||||
}
|
||||
|
||||
// Bluesky blob limit is ~976KB, use 900KB as safe threshold
|
||||
const maxBlobSize = 900 * 1024
|
||||
|
||||
// If image is too large, resize it
|
||||
if len(data) > maxBlobSize {
|
||||
// Decode the full image for resizing
|
||||
img, _, err := image.Decode(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil // Can't decode, can't resize
|
||||
}
|
||||
|
||||
// Scale down iteratively until under limit
|
||||
scaleFactor := 0.9 // Start with 90% and iterate if needed
|
||||
|
||||
for attempt := 0; attempt < 5; attempt++ {
|
||||
newWidth := int(float64(width) * scaleFactor)
|
||||
newHeight := int(float64(height) * scaleFactor)
|
||||
|
||||
// Minimum dimensions
|
||||
if newWidth < 100 {
|
||||
newWidth = 100
|
||||
}
|
||||
if newHeight < 100 {
|
||||
newHeight = 100
|
||||
}
|
||||
|
||||
// Create resized image
|
||||
resized := image.NewRGBA(image.Rect(0, 0, newWidth, newHeight))
|
||||
draw.CatmullRom.Scale(resized, resized.Bounds(), img, img.Bounds(), draw.Over, nil)
|
||||
|
||||
// Encode as JPEG
|
||||
var buf bytes.Buffer
|
||||
if err := jpeg.Encode(&buf, resized, &jpeg.Options{Quality: 85}); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if buf.Len() <= maxBlobSize {
|
||||
data = buf.Bytes()
|
||||
width = newWidth
|
||||
height = newHeight
|
||||
contentType = "image/jpeg"
|
||||
break
|
||||
}
|
||||
|
||||
// Still too large, reduce scale further
|
||||
scaleFactor *= 0.8
|
||||
}
|
||||
|
||||
// If still too large after 5 attempts, give up
|
||||
if len(data) > maxBlobSize {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Upload to PDS
|
||||
blob, err := p.UploadBlob(session, data, contentType)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &ImageUploadResult{
|
||||
Blob: blob,
|
||||
Width: width,
|
||||
Height: height,
|
||||
}
|
||||
}
|
||||
|
||||
// FetchFavicon downloads a favicon/icon from a URL
|
||||
// Uses go.deanishe.net/favicon library to find the best icon
|
||||
// Returns the favicon URL or empty string if not found
|
||||
func FetchFaviconBytes(siteURL string) ([]byte, string, error) {
|
||||
if !strings.HasPrefix(siteURL, "http") {
|
||||
siteURL = "https://" + siteURL
|
||||
}
|
||||
|
||||
u, err := url.Parse(siteURL)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
// Use favicon library to find icons
|
||||
finder := favicon.New(
|
||||
favicon.WithClient(client),
|
||||
favicon.IgnoreNoSize,
|
||||
)
|
||||
|
||||
icons, err := finder.Find(siteURL)
|
||||
if err != nil || len(icons) == 0 {
|
||||
// Fallback to Google's favicon service
|
||||
googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
|
||||
return fetchIconBytes(client, googleURL)
|
||||
}
|
||||
|
||||
// Try icons in order (sorted by size, largest first)
|
||||
// Prefer PNG/JPEG over ICO
|
||||
var iconURLs []string
|
||||
for _, icon := range icons {
|
||||
if icon.Width > 0 && icon.Width < 32 {
|
||||
continue // Skip tiny icons
|
||||
}
|
||||
if icon.MimeType == "image/png" || icon.MimeType == "image/jpeg" {
|
||||
iconURLs = append([]string{icon.URL}, iconURLs...) // Prepend PNG/JPEG
|
||||
} else {
|
||||
iconURLs = append(iconURLs, icon.URL)
|
||||
}
|
||||
}
|
||||
|
||||
// If no good icons, use all of them
|
||||
if len(iconURLs) == 0 {
|
||||
for _, icon := range icons {
|
||||
iconURLs = append(iconURLs, icon.URL)
|
||||
}
|
||||
}
|
||||
|
||||
// Try to download each icon
|
||||
for _, iconURL := range iconURLs {
|
||||
data, mimeType, err := fetchIconBytes(client, iconURL)
|
||||
if err == nil && len(data) > 0 {
|
||||
return data, mimeType, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Final fallback to Google
|
||||
googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
|
||||
return fetchIconBytes(client, googleURL)
|
||||
}
|
||||
|
||||
// fetchIconBytes downloads an icon and returns its bytes and mime type
|
||||
func fetchIconBytes(client *http.Client, iconURL string) ([]byte, string, error) {
|
||||
resp, err := client.Get(iconURL)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// Determine mime type
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if contentType == "" {
|
||||
if strings.HasSuffix(iconURL, ".png") {
|
||||
contentType = "image/png"
|
||||
} else if strings.HasSuffix(iconURL, ".ico") {
|
||||
contentType = "image/x-icon"
|
||||
} else {
|
||||
contentType = "image/png"
|
||||
}
|
||||
}
|
||||
|
||||
return data, contentType, nil
|
||||
}
|
||||
@@ -43,9 +43,6 @@ func main() {
|
||||
// TLD sync loop (background) - syncs with IANA, marks dead TLDs, adds new ones
|
||||
go crawler.startTLDSyncLoop()
|
||||
|
||||
// Publish loop (background) - autopublishes items for approved feeds
|
||||
go crawler.StartPublishLoop()
|
||||
|
||||
// Domain loop (background) - domain_check + feed_crawl
|
||||
go crawler.StartDomainLoop()
|
||||
|
||||
|
||||
-187
@@ -1,187 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CreateSession authenticates with the PDS and returns a session
|
||||
func (p *Publisher) CreateSession(handle, password string) (*PDSSession, error) {
|
||||
payload := map[string]string{
|
||||
"identifier": handle,
|
||||
"password": password,
|
||||
}
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := p.httpClient.Post(
|
||||
p.pdsHost+"/xrpc/com.atproto.server.createSession",
|
||||
"application/json",
|
||||
bytes.NewReader(body),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("auth failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
var session PDSSession
|
||||
if err := json.NewDecoder(resp.Body).Decode(&session); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &session, nil
|
||||
}
|
||||
|
||||
// CreateAccount creates a new account on the PDS
|
||||
// Requires an invite code if the PDS has invites enabled
|
||||
func (p *Publisher) CreateAccount(handle, email, password, inviteCode string) (*PDSSession, error) {
|
||||
payload := map[string]interface{}{
|
||||
"handle": handle,
|
||||
"email": email,
|
||||
"password": password,
|
||||
}
|
||||
if inviteCode != "" {
|
||||
payload["inviteCode"] = inviteCode
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := p.httpClient.Post(
|
||||
p.pdsHost+"/xrpc/com.atproto.server.createAccount",
|
||||
"application/json",
|
||||
bytes.NewReader(body),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("create account failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
var session PDSSession
|
||||
if err := json.Unmarshal(respBody, &session); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &session, nil
|
||||
}
|
||||
|
||||
// CreateInviteCode creates an invite code using PDS admin password (Basic Auth)
|
||||
func (p *Publisher) CreateInviteCode(adminPassword string, useCount int) (string, error) {
|
||||
payload := map[string]interface{}{
|
||||
"useCount": useCount,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.server.createInviteCode", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
// PDS admin APIs use Basic Auth with "admin" as username
|
||||
req.SetBasicAuth("admin", adminPassword)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("create invite failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Code string `json:"code"`
|
||||
}
|
||||
if err := json.Unmarshal(respBody, &result); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return result.Code, nil
|
||||
}
|
||||
|
||||
// FollowAccount creates a follow record from the authenticated session to the target DID
|
||||
func (p *Publisher) FollowAccount(session *PDSSession, targetDID string) error {
|
||||
// Create follow record
|
||||
now := time.Now().UTC().Format(time.RFC3339)
|
||||
record := map[string]interface{}{
|
||||
"$type": "app.bsky.graph.follow",
|
||||
"subject": targetDID,
|
||||
"createdAt": now,
|
||||
}
|
||||
|
||||
payload := map[string]interface{}{
|
||||
"repo": session.DID,
|
||||
"collection": "app.bsky.graph.follow",
|
||||
"record": record,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("follow failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FollowAsDirectory logs in as the directory account and follows the target DID
|
||||
func (p *Publisher) FollowAsDirectory(targetDID string) error {
|
||||
dirHandle := os.Getenv("DIRECTORY_HANDLE")
|
||||
dirPassword := os.Getenv("DIRECTORY_PASSWORD")
|
||||
|
||||
if dirHandle == "" || dirPassword == "" {
|
||||
// Silently skip if directory account not configured
|
||||
return nil
|
||||
}
|
||||
|
||||
session, err := p.CreateSession(dirHandle, dirPassword)
|
||||
if err != nil {
|
||||
return fmt.Errorf("directory login failed: %w", err)
|
||||
}
|
||||
|
||||
return p.FollowAccount(session, targetDID)
|
||||
}
|
||||
-349
@@ -1,349 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// BlobRef represents a blob reference for profile images
|
||||
type BlobRef struct {
|
||||
Type string `json:"$type"`
|
||||
Ref Link `json:"ref"`
|
||||
MimeType string `json:"mimeType"`
|
||||
Size int64 `json:"size"`
|
||||
}
|
||||
|
||||
type Link struct {
|
||||
Link string `json:"$link"`
|
||||
}
|
||||
|
||||
// UploadBlob uploads an image to the PDS and returns a blob reference
|
||||
func (p *Publisher) UploadBlob(session *PDSSession, data []byte, mimeType string) (*BlobRef, error) {
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.uploadBlob", bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", mimeType)
|
||||
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("upload blob failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Blob BlobRef `json:"blob"`
|
||||
}
|
||||
if err := json.Unmarshal(respBody, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &result.Blob, nil
|
||||
}
|
||||
|
||||
// UpdateProfile updates the profile for an account
|
||||
func (p *Publisher) UpdateProfile(session *PDSSession, displayName, description string, avatar *BlobRef) error {
|
||||
// First, get the current profile to preserve any existing fields
|
||||
getReq, err := http.NewRequest("GET",
|
||||
p.pdsHost+"/xrpc/com.atproto.repo.getRecord?repo="+session.DID+"&collection=app.bsky.actor.profile&rkey=self",
|
||||
nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
getReq.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
||||
|
||||
getResp, err := p.httpClient.Do(getReq)
|
||||
|
||||
var existingCID string
|
||||
profile := map[string]interface{}{
|
||||
"$type": "app.bsky.actor.profile",
|
||||
}
|
||||
|
||||
if err == nil && getResp.StatusCode == http.StatusOK {
|
||||
defer getResp.Body.Close()
|
||||
var existing struct {
|
||||
CID string `json:"cid"`
|
||||
Value map[string]interface{} `json:"value"`
|
||||
}
|
||||
if json.NewDecoder(getResp.Body).Decode(&existing) == nil {
|
||||
existingCID = existing.CID
|
||||
profile = existing.Value
|
||||
}
|
||||
} else if getResp != nil {
|
||||
getResp.Body.Close()
|
||||
}
|
||||
|
||||
// Update fields
|
||||
if displayName != "" {
|
||||
profile["displayName"] = displayName
|
||||
}
|
||||
if description != "" {
|
||||
profile["description"] = description
|
||||
}
|
||||
if avatar != nil {
|
||||
profile["avatar"] = avatar
|
||||
}
|
||||
|
||||
// Put the record
|
||||
payload := map[string]interface{}{
|
||||
"repo": session.DID,
|
||||
"collection": "app.bsky.actor.profile",
|
||||
"rkey": "self",
|
||||
"record": profile,
|
||||
}
|
||||
if existingCID != "" {
|
||||
payload["swapRecord"] = existingCID
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.putRecord", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("update profile failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteAllPosts deletes all posts from an account
|
||||
func (p *Publisher) DeleteAllPosts(session *PDSSession) (int, error) {
|
||||
deleted := 0
|
||||
cursor := ""
|
||||
|
||||
for {
|
||||
// List records
|
||||
listURL := fmt.Sprintf("%s/xrpc/com.atproto.repo.listRecords?repo=%s&collection=app.bsky.feed.post&limit=100",
|
||||
p.pdsHost, session.DID)
|
||||
if cursor != "" {
|
||||
listURL += "&cursor=" + url.QueryEscape(cursor)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", listURL, nil)
|
||||
if err != nil {
|
||||
return deleted, err
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return deleted, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Records []struct {
|
||||
URI string `json:"uri"`
|
||||
} `json:"records"`
|
||||
Cursor string `json:"cursor"`
|
||||
}
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return deleted, fmt.Errorf("list records failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(respBody, &result); err != nil {
|
||||
return deleted, err
|
||||
}
|
||||
|
||||
if len(result.Records) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// Delete each record
|
||||
for _, record := range result.Records {
|
||||
// Extract rkey from URI: at://did:plc:xxx/app.bsky.feed.post/rkey
|
||||
parts := strings.Split(record.URI, "/")
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
rkey := parts[len(parts)-1]
|
||||
|
||||
if err := p.DeleteRecord(session, "app.bsky.feed.post", rkey); err != nil {
|
||||
// Continue deleting other records even if one fails
|
||||
continue
|
||||
}
|
||||
deleted++
|
||||
}
|
||||
|
||||
cursor = result.Cursor
|
||||
if cursor == "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return deleted, nil
|
||||
}
|
||||
|
||||
// DeleteRecord deletes a single record from an account
|
||||
func (p *Publisher) DeleteRecord(session *PDSSession, collection, rkey string) error {
|
||||
payload := map[string]interface{}{
|
||||
"repo": session.DID,
|
||||
"collection": collection,
|
||||
"rkey": rkey,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.deleteRecord", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("delete record failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteAccount deletes an account using PDS admin API
|
||||
func (p *Publisher) DeleteAccount(adminPassword, did string) error {
|
||||
payload := map[string]interface{}{
|
||||
"did": did,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.deleteAccount", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.SetBasicAuth("admin", adminPassword)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("delete account failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TakedownAccount applies a takedown to an account (hides content, preserves data)
|
||||
func (p *Publisher) TakedownAccount(adminPassword, did, reason string) error {
|
||||
payload := map[string]interface{}{
|
||||
"subject": map[string]interface{}{
|
||||
"$type": "com.atproto.admin.defs#repoRef",
|
||||
"did": did,
|
||||
},
|
||||
"takedown": map[string]interface{}{
|
||||
"applied": true,
|
||||
"ref": reason,
|
||||
},
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.SetBasicAuth("admin", adminPassword)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("takedown account failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RestoreAccount removes a takedown from an account (makes content visible again)
|
||||
func (p *Publisher) RestoreAccount(adminPassword, did string) error {
|
||||
payload := map[string]interface{}{
|
||||
"subject": map[string]interface{}{
|
||||
"$type": "com.atproto.admin.defs#repoRef",
|
||||
"did": did,
|
||||
},
|
||||
"takedown": map[string]interface{}{
|
||||
"applied": false,
|
||||
},
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.admin.updateSubjectStatus", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.SetBasicAuth("admin", adminPassword)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("restore account failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
-439
@@ -1,439 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Publisher handles posting items to AT Protocol PDS
|
||||
type Publisher struct {
|
||||
pdsHost string
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// PDSSession holds authentication info for a PDS account
|
||||
type PDSSession struct {
|
||||
DID string `json:"did"`
|
||||
Handle string `json:"handle"`
|
||||
AccessJwt string `json:"accessJwt"`
|
||||
RefreshJwt string `json:"refreshJwt"`
|
||||
}
|
||||
|
||||
// BskyPost represents an app.bsky.feed.post record
|
||||
type BskyPost struct {
|
||||
Type string `json:"$type"`
|
||||
Text string `json:"text"`
|
||||
CreatedAt string `json:"createdAt"`
|
||||
Facets []BskyFacet `json:"facets,omitempty"`
|
||||
Embed *BskyEmbed `json:"embed,omitempty"`
|
||||
}
|
||||
|
||||
type BskyFacet struct {
|
||||
Index BskyByteSlice `json:"index"`
|
||||
Features []BskyFeature `json:"features"`
|
||||
}
|
||||
|
||||
type BskyByteSlice struct {
|
||||
ByteStart int `json:"byteStart"`
|
||||
ByteEnd int `json:"byteEnd"`
|
||||
}
|
||||
|
||||
type BskyFeature struct {
|
||||
Type string `json:"$type"`
|
||||
URI string `json:"uri,omitempty"`
|
||||
Tag string `json:"tag,omitempty"` // For hashtag facets
|
||||
}
|
||||
|
||||
type BskyEmbed struct {
|
||||
Type string `json:"$type"`
|
||||
External *BskyExternal `json:"external,omitempty"`
|
||||
Images []BskyImage `json:"images,omitempty"`
|
||||
}
|
||||
|
||||
type BskyExternal struct {
|
||||
URI string `json:"uri"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Thumb *BlobRef `json:"thumb,omitempty"`
|
||||
}
|
||||
|
||||
type BskyImage struct {
|
||||
Alt string `json:"alt"`
|
||||
Image *BlobRef `json:"image"`
|
||||
AspectRatio *BskyAspectRatio `json:"aspectRatio,omitempty"`
|
||||
}
|
||||
|
||||
type BskyAspectRatio struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
}
|
||||
|
||||
// NewPublisher creates a new Publisher instance
|
||||
func NewPublisher(pdsHost string) *Publisher {
|
||||
return &Publisher{
|
||||
pdsHost: pdsHost,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// TID alphabet for base32-sortable encoding
|
||||
const tidAlphabet = "234567abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
// GenerateRkey creates a deterministic TID-format rkey from a GUID and timestamp
|
||||
// TIDs are required by Bluesky relay for indexing - custom rkeys don't sync
|
||||
// Format: 13 chars base32-sortable, 53 bits timestamp + 10 bits clock ID
|
||||
func GenerateRkey(guid string, timestamp time.Time) string {
|
||||
if guid == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Get microseconds since Unix epoch (53 bits)
|
||||
microsInt := timestamp.UnixMicro()
|
||||
if microsInt < 0 {
|
||||
microsInt = 0
|
||||
}
|
||||
// Convert to uint64 and mask to 53 bits
|
||||
micros := uint64(microsInt) & ((1 << 53) - 1)
|
||||
|
||||
// Generate deterministic 10-bit clock ID from GUID hash
|
||||
// Use XOR of multiple hash bytes to reduce collisions
|
||||
hash := sha256.Sum256([]byte(guid))
|
||||
// XOR bytes 0-3 together, then 4-7, combine for more entropy
|
||||
h1 := uint64(hash[0]) ^ uint64(hash[2]) ^ uint64(hash[4]) ^ uint64(hash[6])
|
||||
h2 := uint64(hash[1]) ^ uint64(hash[3]) ^ uint64(hash[5]) ^ uint64(hash[7])
|
||||
clockID := (h1 << 2) | (h2 >> 6)
|
||||
clockID = clockID & ((1 << 10) - 1) // 10 bits = 0-1023
|
||||
|
||||
// Combine: top bit 0, 53 bits timestamp, 10 bits clock ID
|
||||
tid := (micros << 10) | clockID
|
||||
|
||||
// Encode as base32-sortable (13 characters)
|
||||
var result [13]byte
|
||||
for i := 12; i >= 0; i-- {
|
||||
result[i] = tidAlphabet[tid&0x1f]
|
||||
tid >>= 5
|
||||
}
|
||||
|
||||
return string(result[:])
|
||||
}
|
||||
|
||||
// extractURLs finds all URLs in a string
|
||||
func extractURLs(text string) []string {
|
||||
// Match http:// or https:// URLs
|
||||
urlRegex := regexp.MustCompile(`https?://[^\s<>"'\)]+`)
|
||||
matches := urlRegex.FindAllString(text, -1)
|
||||
|
||||
// Clean up trailing punctuation
|
||||
var urls []string
|
||||
for _, u := range matches {
|
||||
// Remove trailing punctuation that's likely not part of the URL
|
||||
u = strings.TrimRight(u, ".,;:!?")
|
||||
if u != "" {
|
||||
urls = append(urls, u)
|
||||
}
|
||||
}
|
||||
return urls
|
||||
}
|
||||
|
||||
// toCamelCaseTag converts a tag string to camelCase hashtag format
|
||||
// e.g., "Lagos News" -> "lagosNews", "AI" -> "ai", "machine learning" -> "machineLearning"
|
||||
func toCamelCaseTag(tag string) string {
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Remove any # prefix if present
|
||||
tag = strings.TrimPrefix(tag, "#")
|
||||
|
||||
// Split on spaces and other separators
|
||||
words := strings.FieldsFunc(tag, func(r rune) bool {
|
||||
return r == ' ' || r == '-' || r == '_'
|
||||
})
|
||||
|
||||
if len(words) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// If single word, return lowercased
|
||||
if len(words) == 1 {
|
||||
return strings.ToLower(words[0])
|
||||
}
|
||||
|
||||
// Multiple words: lowercase first word, capitalize first letter of subsequent words
|
||||
var result strings.Builder
|
||||
for i, word := range words {
|
||||
if word == "" {
|
||||
continue
|
||||
}
|
||||
runes := []rune(word)
|
||||
if len(runes) > 0 {
|
||||
if i == 0 || result.Len() == 0 {
|
||||
// First word: all lowercase
|
||||
result.WriteString(strings.ToLower(word))
|
||||
} else {
|
||||
// Subsequent words: capitalize first letter, lowercase rest
|
||||
result.WriteString(strings.ToUpper(string(runes[0])))
|
||||
if len(runes) > 1 {
|
||||
result.WriteString(strings.ToLower(string(runes[1:])))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result.String()
|
||||
}
|
||||
|
||||
// formatTagsForPost converts item tags to hashtag text and facets
|
||||
// Returns the hashtag line (e.g., "#AI #MachineLearning #News") and facets
|
||||
func formatTagsForPost(tags []string, textOffset int) (string, []BskyFacet) {
|
||||
if len(tags) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// Dedupe and convert tags
|
||||
seen := make(map[string]bool)
|
||||
var hashtags []string
|
||||
for _, tag := range tags {
|
||||
camel := toCamelCaseTag(tag)
|
||||
if camel == "" || seen[strings.ToLower(camel)] {
|
||||
continue
|
||||
}
|
||||
seen[strings.ToLower(camel)] = true
|
||||
hashtags = append(hashtags, camel)
|
||||
}
|
||||
|
||||
if len(hashtags) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// Limit to 5 tags to keep post compact
|
||||
if len(hashtags) > 5 {
|
||||
hashtags = hashtags[:5]
|
||||
}
|
||||
|
||||
// Build the hashtag line and facets
|
||||
var line strings.Builder
|
||||
var facets []BskyFacet
|
||||
currentOffset := textOffset
|
||||
|
||||
for i, ht := range hashtags {
|
||||
if i > 0 {
|
||||
line.WriteString(" ")
|
||||
currentOffset++
|
||||
}
|
||||
|
||||
hashtagText := "#" + ht
|
||||
byteStart := currentOffset
|
||||
byteEnd := currentOffset + len(hashtagText)
|
||||
|
||||
line.WriteString(hashtagText)
|
||||
|
||||
facets = append(facets, BskyFacet{
|
||||
Index: BskyByteSlice{
|
||||
ByteStart: byteStart,
|
||||
ByteEnd: byteEnd,
|
||||
},
|
||||
Features: []BskyFeature{{
|
||||
Type: "app.bsky.richtext.facet#tag",
|
||||
Tag: ht,
|
||||
}},
|
||||
})
|
||||
|
||||
currentOffset = byteEnd
|
||||
}
|
||||
|
||||
return line.String(), facets
|
||||
}
|
||||
|
||||
// PublishItem posts a feed item to the PDS
|
||||
// Returns the AT URI of the created record, or error
|
||||
func (p *Publisher) PublishItem(session *PDSSession, item *Item) (string, error) {
|
||||
if item.GUID == "" && item.Link == "" {
|
||||
return "", fmt.Errorf("item has no GUID or link, cannot publish")
|
||||
}
|
||||
|
||||
// Collect URLs: main link + HN comments link (if applicable)
|
||||
// Limit to 2 URLs max to stay under 300 grapheme limit
|
||||
urlSet := make(map[string]bool)
|
||||
var allURLs []string
|
||||
|
||||
// Add main link first
|
||||
if item.Link != "" {
|
||||
urlSet[item.Link] = true
|
||||
allURLs = append(allURLs, item.Link)
|
||||
}
|
||||
|
||||
// For HN feeds, add comments link from description (looks like "https://news.ycombinator.com/item?id=...")
|
||||
descURLs := extractURLs(item.Description)
|
||||
for _, u := range descURLs {
|
||||
if strings.Contains(u, "news.ycombinator.com/item") && !urlSet[u] {
|
||||
urlSet[u] = true
|
||||
allURLs = append(allURLs, u)
|
||||
break // Only add one comments link
|
||||
}
|
||||
}
|
||||
|
||||
// Add enclosure URL for podcasts/media (audio/video) if we have room
|
||||
// Bluesky has 300 char limit, so only add if total URLs + minimal title fits
|
||||
if len(allURLs) < 2 && item.Enclosure != nil && item.Enclosure.URL != "" {
|
||||
encType := strings.ToLower(item.Enclosure.Type)
|
||||
if strings.HasPrefix(encType, "audio/") || strings.HasPrefix(encType, "video/") {
|
||||
if !urlSet[item.Enclosure.URL] {
|
||||
// Calculate if enclosure would fit (need ~60 chars for title + separators)
|
||||
currentURLLen := 0
|
||||
for _, u := range allURLs {
|
||||
currentURLLen += len(u) + 2 // +2 for \n\n
|
||||
}
|
||||
enclosureLen := len(item.Enclosure.URL) + 2
|
||||
if currentURLLen+enclosureLen < 235 { // Leave 60 chars for title
|
||||
urlSet[item.Enclosure.URL] = true
|
||||
allURLs = append(allURLs, item.Enclosure.URL)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the primary URL (article link)
|
||||
primaryURL := ""
|
||||
if len(allURLs) > 0 {
|
||||
primaryURL = allURLs[0]
|
||||
}
|
||||
|
||||
// Use original publication date if available, otherwise current time
|
||||
createdAt := time.Now()
|
||||
if !item.PubDate.IsZero() {
|
||||
createdAt = item.PubDate
|
||||
}
|
||||
|
||||
// Build post text with hashtags if available
|
||||
// The link card shows the title, description, and thumbnail
|
||||
// Clicking the card doesn't trigger the "leaving Bluesky" warning
|
||||
postText := ""
|
||||
var facets []BskyFacet
|
||||
|
||||
if len(item.Tags) > 0 {
|
||||
tagLine, tagFacets := formatTagsForPost(item.Tags, 0)
|
||||
postText = tagLine
|
||||
facets = tagFacets
|
||||
}
|
||||
|
||||
post := BskyPost{
|
||||
Type: "app.bsky.feed.post",
|
||||
Text: postText,
|
||||
CreatedAt: createdAt.Format(time.RFC3339),
|
||||
Facets: facets,
|
||||
}
|
||||
|
||||
// Always use external embed (link card) - clicking the card doesn't show "leaving" warning
|
||||
if primaryURL != "" {
|
||||
external := &BskyExternal{
|
||||
URI: primaryURL,
|
||||
Title: item.Title,
|
||||
Description: truncate(stripHTML(item.Description), 300),
|
||||
}
|
||||
|
||||
// Add thumbnail from first image if available
|
||||
if len(item.ImageURLs) > 0 {
|
||||
if thumb := p.fetchAndUploadImage(session, item.ImageURLs[0]); thumb != nil {
|
||||
external.Thumb = thumb
|
||||
}
|
||||
}
|
||||
|
||||
post.Embed = &BskyEmbed{
|
||||
Type: "app.bsky.embed.external",
|
||||
External: external,
|
||||
}
|
||||
}
|
||||
|
||||
// Use GUID + discoveredAt for deterministic rkey
|
||||
// This allows regenerating a new rkey by updating discoveredAt if needed
|
||||
guidForRkey := item.GUID
|
||||
if guidForRkey == "" {
|
||||
guidForRkey = item.Link
|
||||
}
|
||||
// Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt
|
||||
rkeyTime := item.PubDate
|
||||
if rkeyTime.IsZero() {
|
||||
rkeyTime = item.DiscoveredAt
|
||||
}
|
||||
rkey := GenerateRkey(guidForRkey, rkeyTime)
|
||||
|
||||
// Create the record with deterministic rkey
|
||||
payload := map[string]interface{}{
|
||||
"repo": session.DID,
|
||||
"collection": "app.bsky.feed.post",
|
||||
"rkey": rkey,
|
||||
"record": post,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("create record failed: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
var result struct {
|
||||
URI string `json:"uri"`
|
||||
CID string `json:"cid"`
|
||||
}
|
||||
if err := json.Unmarshal(respBody, &result); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return result.URI, nil
|
||||
}
|
||||
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen-3] + "..."
|
||||
}
|
||||
|
||||
// stripHTML removes HTML tags from a string
|
||||
func stripHTML(s string) string {
|
||||
// Remove HTML tags
|
||||
tagRegex := regexp.MustCompile(`<[^>]*>`)
|
||||
s = tagRegex.ReplaceAllString(s, "")
|
||||
|
||||
// Decode common HTML entities
|
||||
s = strings.ReplaceAll(s, "&", "&")
|
||||
s = strings.ReplaceAll(s, "<", "<")
|
||||
s = strings.ReplaceAll(s, ">", ">")
|
||||
s = strings.ReplaceAll(s, """, "\"")
|
||||
s = strings.ReplaceAll(s, "'", "'")
|
||||
s = strings.ReplaceAll(s, " ", " ")
|
||||
|
||||
// Collapse whitespace
|
||||
spaceRegex := regexp.MustCompile(`\s+`)
|
||||
s = spaceRegex.ReplaceAllString(s, " ")
|
||||
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
Reference in New Issue
Block a user