- Use labeled links (Article · Audio) instead of raw URLs in posts - Add language filter dropdown to dashboard with toggle selection - Auto-deny feeds with no language on discovery - Add deny/undeny buttons for domains to block crawling - Denied domains set feeds to dead status, preventing future checks Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1196 lines
31 KiB
Go
1196 lines
31 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/sha256"
|
|
"encoding/json"
|
|
"fmt"
|
|
"image"
|
|
_ "image/gif"
|
|
"image/jpeg"
|
|
_ "image/png"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
_ "golang.org/x/image/webp"
|
|
"golang.org/x/image/draw"
|
|
)
|
|
|
|
// Publisher handles posting items to AT Protocol PDS
|
|
type Publisher struct {
|
|
pdsHost string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// PDSSession holds authentication info for a PDS account
|
|
type PDSSession struct {
|
|
DID string `json:"did"`
|
|
Handle string `json:"handle"`
|
|
AccessJwt string `json:"accessJwt"`
|
|
RefreshJwt string `json:"refreshJwt"`
|
|
}
|
|
|
|
// BskyPost represents an app.bsky.feed.post record
|
|
type BskyPost struct {
|
|
Type string `json:"$type"`
|
|
Text string `json:"text"`
|
|
CreatedAt string `json:"createdAt"`
|
|
Facets []BskyFacet `json:"facets,omitempty"`
|
|
Embed *BskyEmbed `json:"embed,omitempty"`
|
|
}
|
|
|
|
type BskyFacet struct {
|
|
Index BskyByteSlice `json:"index"`
|
|
Features []BskyFeature `json:"features"`
|
|
}
|
|
|
|
type BskyByteSlice struct {
|
|
ByteStart int `json:"byteStart"`
|
|
ByteEnd int `json:"byteEnd"`
|
|
}
|
|
|
|
type BskyFeature struct {
|
|
Type string `json:"$type"`
|
|
URI string `json:"uri,omitempty"`
|
|
}
|
|
|
|
type BskyEmbed struct {
|
|
Type string `json:"$type"`
|
|
External *BskyExternal `json:"external,omitempty"`
|
|
Images []BskyImage `json:"images,omitempty"`
|
|
}
|
|
|
|
type BskyExternal struct {
|
|
URI string `json:"uri"`
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
Thumb *BlobRef `json:"thumb,omitempty"`
|
|
}
|
|
|
|
type BskyImage struct {
|
|
Alt string `json:"alt"`
|
|
Image *BlobRef `json:"image"`
|
|
AspectRatio *BskyAspectRatio `json:"aspectRatio,omitempty"`
|
|
}
|
|
|
|
type BskyAspectRatio struct {
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
}
|
|
|
|
// NewPublisher creates a new Publisher instance
|
|
func NewPublisher(pdsHost string) *Publisher {
|
|
return &Publisher{
|
|
pdsHost: pdsHost,
|
|
httpClient: &http.Client{
|
|
Timeout: 30 * time.Second,
|
|
},
|
|
}
|
|
}
|
|
|
|
// CreateSession authenticates with the PDS and returns a session
|
|
func (p *Publisher) CreateSession(handle, password string) (*PDSSession, error) {
|
|
payload := map[string]string{
|
|
"identifier": handle,
|
|
"password": password,
|
|
}
|
|
body, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := p.httpClient.Post(
|
|
p.pdsHost+"/xrpc/com.atproto.server.createSession",
|
|
"application/json",
|
|
bytes.NewReader(body),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
return nil, fmt.Errorf("auth failed: %s - %s", resp.Status, string(respBody))
|
|
}
|
|
|
|
var session PDSSession
|
|
if err := json.NewDecoder(resp.Body).Decode(&session); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &session, nil
|
|
}
|
|
|
|
// CreateAccount creates a new account on the PDS
|
|
// Requires an invite code if the PDS has invites enabled
|
|
func (p *Publisher) CreateAccount(handle, email, password, inviteCode string) (*PDSSession, error) {
|
|
payload := map[string]interface{}{
|
|
"handle": handle,
|
|
"email": email,
|
|
"password": password,
|
|
}
|
|
if inviteCode != "" {
|
|
payload["inviteCode"] = inviteCode
|
|
}
|
|
|
|
body, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := p.httpClient.Post(
|
|
p.pdsHost+"/xrpc/com.atproto.server.createAccount",
|
|
"application/json",
|
|
bytes.NewReader(body),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("create account failed: %s - %s", resp.Status, string(respBody))
|
|
}
|
|
|
|
var session PDSSession
|
|
if err := json.Unmarshal(respBody, &session); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &session, nil
|
|
}
|
|
|
|
// CreateInviteCode creates an invite code using PDS admin password (Basic Auth)
|
|
func (p *Publisher) CreateInviteCode(adminPassword string, useCount int) (string, error) {
|
|
payload := map[string]interface{}{
|
|
"useCount": useCount,
|
|
}
|
|
|
|
body, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.server.createInviteCode", bytes.NewReader(body))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
// PDS admin APIs use Basic Auth with "admin" as username
|
|
req.SetBasicAuth("admin", adminPassword)
|
|
|
|
resp, err := p.httpClient.Do(req)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", fmt.Errorf("create invite failed: %s - %s", resp.Status, string(respBody))
|
|
}
|
|
|
|
var result struct {
|
|
Code string `json:"code"`
|
|
}
|
|
if err := json.Unmarshal(respBody, &result); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return result.Code, nil
|
|
}
|
|
|
|
// TID alphabet for base32-sortable encoding
|
|
const tidAlphabet = "234567abcdefghijklmnopqrstuvwxyz"
|
|
|
|
// GenerateRkey creates a deterministic TID-format rkey from a GUID and timestamp
|
|
// TIDs are required by Bluesky relay for indexing - custom rkeys don't sync
|
|
// Format: 13 chars base32-sortable, 53 bits timestamp + 10 bits clock ID
|
|
func GenerateRkey(guid string, timestamp time.Time) string {
|
|
if guid == "" {
|
|
return ""
|
|
}
|
|
|
|
// Get microseconds since Unix epoch (53 bits)
|
|
microsInt := timestamp.UnixMicro()
|
|
if microsInt < 0 {
|
|
microsInt = 0
|
|
}
|
|
// Convert to uint64 and mask to 53 bits
|
|
micros := uint64(microsInt) & ((1 << 53) - 1)
|
|
|
|
// Generate deterministic 10-bit clock ID from GUID hash
|
|
// Use XOR of multiple hash bytes to reduce collisions
|
|
hash := sha256.Sum256([]byte(guid))
|
|
// XOR bytes 0-3 together, then 4-7, combine for more entropy
|
|
h1 := uint64(hash[0]) ^ uint64(hash[2]) ^ uint64(hash[4]) ^ uint64(hash[6])
|
|
h2 := uint64(hash[1]) ^ uint64(hash[3]) ^ uint64(hash[5]) ^ uint64(hash[7])
|
|
clockID := (h1 << 2) | (h2 >> 6)
|
|
clockID = clockID & ((1 << 10) - 1) // 10 bits = 0-1023
|
|
|
|
// Combine: top bit 0, 53 bits timestamp, 10 bits clock ID
|
|
tid := (micros << 10) | clockID
|
|
|
|
// Encode as base32-sortable (13 characters)
|
|
var result [13]byte
|
|
for i := 12; i >= 0; i-- {
|
|
result[i] = tidAlphabet[tid&0x1f]
|
|
tid >>= 5
|
|
}
|
|
|
|
return string(result[:])
|
|
}
|
|
|
|
// extractURLs finds all URLs in a string
|
|
func extractURLs(text string) []string {
|
|
// Match http:// or https:// URLs
|
|
urlRegex := regexp.MustCompile(`https?://[^\s<>"'\)]+`)
|
|
matches := urlRegex.FindAllString(text, -1)
|
|
|
|
// Clean up trailing punctuation
|
|
var urls []string
|
|
for _, u := range matches {
|
|
// Remove trailing punctuation that's likely not part of the URL
|
|
u = strings.TrimRight(u, ".,;:!?")
|
|
if u != "" {
|
|
urls = append(urls, u)
|
|
}
|
|
}
|
|
return urls
|
|
}
|
|
|
|
// PublishItem posts a feed item to the PDS
|
|
// Returns the AT URI of the created record, or error
|
|
func (p *Publisher) PublishItem(session *PDSSession, item *Item) (string, error) {
|
|
if item.GUID == "" && item.Link == "" {
|
|
return "", fmt.Errorf("item has no GUID or link, cannot publish")
|
|
}
|
|
|
|
// Collect URLs: main link + HN comments link (if applicable)
|
|
// Limit to 2 URLs max to stay under 300 grapheme limit
|
|
urlSet := make(map[string]bool)
|
|
var allURLs []string
|
|
|
|
// Add main link first
|
|
if item.Link != "" {
|
|
urlSet[item.Link] = true
|
|
allURLs = append(allURLs, item.Link)
|
|
}
|
|
|
|
// For HN feeds, add comments link from description (looks like "https://news.ycombinator.com/item?id=...")
|
|
descURLs := extractURLs(item.Description)
|
|
for _, u := range descURLs {
|
|
if strings.Contains(u, "news.ycombinator.com/item") && !urlSet[u] {
|
|
urlSet[u] = true
|
|
allURLs = append(allURLs, u)
|
|
break // Only add one comments link
|
|
}
|
|
}
|
|
|
|
// Add enclosure URL for podcasts/media (audio/video) if we have room
|
|
// Bluesky has 300 char limit, so only add if total URLs + minimal title fits
|
|
if len(allURLs) < 2 && item.Enclosure != nil && item.Enclosure.URL != "" {
|
|
encType := strings.ToLower(item.Enclosure.Type)
|
|
if strings.HasPrefix(encType, "audio/") || strings.HasPrefix(encType, "video/") {
|
|
if !urlSet[item.Enclosure.URL] {
|
|
// Calculate if enclosure would fit (need ~60 chars for title + separators)
|
|
currentURLLen := 0
|
|
for _, u := range allURLs {
|
|
currentURLLen += len(u) + 2 // +2 for \n\n
|
|
}
|
|
enclosureLen := len(item.Enclosure.URL) + 2
|
|
if currentURLLen+enclosureLen < 235 { // Leave 60 chars for title
|
|
urlSet[item.Enclosure.URL] = true
|
|
allURLs = append(allURLs, item.Enclosure.URL)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build post text: title + link labels
|
|
// Bluesky has 300 grapheme limit - use rune count as approximation
|
|
const maxGraphemes = 295 // Leave some margin
|
|
|
|
// Create labeled links: "Article", "Audio", etc.
|
|
type labeledLink struct {
|
|
Label string
|
|
URL string
|
|
}
|
|
var links []labeledLink
|
|
|
|
for i, u := range allURLs {
|
|
if i == 0 {
|
|
// First URL is the article link
|
|
links = append(links, labeledLink{Label: "Article", URL: u})
|
|
} else if item.Enclosure != nil && u == item.Enclosure.URL {
|
|
// Enclosure URL - label based on type
|
|
encType := strings.ToLower(item.Enclosure.Type)
|
|
if strings.HasPrefix(encType, "audio/") {
|
|
links = append(links, labeledLink{Label: "Audio", URL: u})
|
|
} else if strings.HasPrefix(encType, "video/") {
|
|
links = append(links, labeledLink{Label: "Video", URL: u})
|
|
} else {
|
|
links = append(links, labeledLink{Label: "Media", URL: u})
|
|
}
|
|
} else if strings.Contains(u, "news.ycombinator.com") {
|
|
links = append(links, labeledLink{Label: "Comments", URL: u})
|
|
} else {
|
|
links = append(links, labeledLink{Label: "Link", URL: u})
|
|
}
|
|
}
|
|
|
|
// Calculate space needed for labels (in runes)
|
|
// Format: "Article · Audio" or just "Article"
|
|
labelSpace := 0
|
|
for i, link := range links {
|
|
labelSpace += utf8.RuneCountInString(link.Label)
|
|
if i > 0 {
|
|
labelSpace += 3 // " · " separator
|
|
}
|
|
}
|
|
labelSpace += 2 // \n\n before labels
|
|
|
|
// Truncate title if needed
|
|
title := item.Title
|
|
titleRunes := utf8.RuneCountInString(title)
|
|
maxTitleRunes := maxGraphemes - labelSpace - 3 // -3 for "..."
|
|
|
|
if titleRunes+labelSpace > maxGraphemes {
|
|
if maxTitleRunes > 10 {
|
|
runes := []rune(title)
|
|
if len(runes) > maxTitleRunes {
|
|
title = string(runes[:maxTitleRunes]) + "..."
|
|
}
|
|
} else {
|
|
runes := []rune(title)
|
|
if len(runes) > 50 {
|
|
title = string(runes[:50]) + "..."
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build final text with labels
|
|
var textBuilder strings.Builder
|
|
textBuilder.WriteString(title)
|
|
if len(links) > 0 {
|
|
textBuilder.WriteString("\n\n")
|
|
for i, link := range links {
|
|
if i > 0 {
|
|
textBuilder.WriteString(" · ")
|
|
}
|
|
textBuilder.WriteString(link.Label)
|
|
}
|
|
}
|
|
text := textBuilder.String()
|
|
|
|
// Use original publication date if available, otherwise current time
|
|
createdAt := time.Now()
|
|
if !item.PubDate.IsZero() {
|
|
createdAt = item.PubDate
|
|
}
|
|
|
|
post := BskyPost{
|
|
Type: "app.bsky.feed.post",
|
|
Text: text,
|
|
CreatedAt: createdAt.Format(time.RFC3339),
|
|
}
|
|
|
|
// Add facets for labeled links
|
|
// Find each label in the text and create a facet linking to its URL
|
|
searchPos := len(title) + 2 // Start after title + \n\n
|
|
for _, link := range links {
|
|
labelStart := strings.Index(text[searchPos:], link.Label)
|
|
if labelStart >= 0 {
|
|
labelStart += searchPos
|
|
byteStart := len(text[:labelStart])
|
|
byteEnd := byteStart + len(link.Label)
|
|
|
|
post.Facets = append(post.Facets, BskyFacet{
|
|
Index: BskyByteSlice{
|
|
ByteStart: byteStart,
|
|
ByteEnd: byteEnd,
|
|
},
|
|
Features: []BskyFeature{
|
|
{
|
|
Type: "app.bsky.richtext.facet#link",
|
|
URI: link.URL,
|
|
},
|
|
},
|
|
})
|
|
searchPos = labelStart + len(link.Label)
|
|
}
|
|
}
|
|
|
|
// Decide embed type based on content
|
|
// Priority: images > external link card
|
|
if len(item.ImageURLs) > 0 {
|
|
// Try to upload images (up to 4)
|
|
uploadedImages := p.uploadImages(session, item.ImageURLs, item.Title)
|
|
if len(uploadedImages) > 0 {
|
|
post.Embed = &BskyEmbed{
|
|
Type: "app.bsky.embed.images",
|
|
Images: uploadedImages,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fall back to external embed if no images were uploaded
|
|
if post.Embed == nil && len(allURLs) > 0 {
|
|
external := &BskyExternal{
|
|
URI: allURLs[0],
|
|
Title: item.Title,
|
|
Description: truncate(stripHTML(item.Description), 300),
|
|
}
|
|
|
|
// Try to add thumbnail from first image
|
|
if len(item.ImageURLs) > 0 {
|
|
if thumb := p.fetchAndUploadImage(session, item.ImageURLs[0]); thumb != nil {
|
|
external.Thumb = thumb
|
|
}
|
|
}
|
|
|
|
post.Embed = &BskyEmbed{
|
|
Type: "app.bsky.embed.external",
|
|
External: external,
|
|
}
|
|
}
|
|
|
|
// Use GUID + discoveredAt for deterministic rkey
|
|
// This allows regenerating a new rkey by updating discoveredAt if needed
|
|
guidForRkey := item.GUID
|
|
if guidForRkey == "" {
|
|
guidForRkey = item.Link
|
|
}
|
|
// Use PubDate for rkey to match createdAt ordering, fall back to DiscoveredAt
|
|
rkeyTime := item.PubDate
|
|
if rkeyTime.IsZero() {
|
|
rkeyTime = item.DiscoveredAt
|
|
}
|
|
rkey := GenerateRkey(guidForRkey, rkeyTime)
|
|
|
|
// Create the record with deterministic rkey
|
|
payload := map[string]interface{}{
|
|
"repo": session.DID,
|
|
"collection": "app.bsky.feed.post",
|
|
"rkey": rkey,
|
|
"record": post,
|
|
}
|
|
|
|
body, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
|
|
|
resp, err := p.httpClient.Do(req)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", fmt.Errorf("create record failed: %s - %s", resp.Status, string(respBody))
|
|
}
|
|
|
|
var result struct {
|
|
URI string `json:"uri"`
|
|
CID string `json:"cid"`
|
|
}
|
|
if err := json.Unmarshal(respBody, &result); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return result.URI, nil
|
|
}
|
|
|
|
// uploadImages fetches and uploads up to 4 images, returning BskyImage structs
|
|
func (p *Publisher) uploadImages(session *PDSSession, imageURLs []string, altText string) []BskyImage {
|
|
var images []BskyImage
|
|
maxImages := 4
|
|
if len(imageURLs) < maxImages {
|
|
maxImages = len(imageURLs)
|
|
}
|
|
|
|
for i := 0; i < maxImages; i++ {
|
|
result := p.fetchAndUploadImageWithDimensions(session, imageURLs[i])
|
|
if result != nil && result.Blob != nil {
|
|
img := BskyImage{
|
|
Alt: altText,
|
|
Image: result.Blob,
|
|
}
|
|
if result.Width > 0 && result.Height > 0 {
|
|
img.AspectRatio = &BskyAspectRatio{
|
|
Width: result.Width,
|
|
Height: result.Height,
|
|
}
|
|
}
|
|
images = append(images, img)
|
|
}
|
|
}
|
|
|
|
return images
|
|
}
|
|
|
|
// fetchAndUploadImage downloads an image and uploads it to the PDS
|
|
// FetchFavicon tries to get a favicon URL for a site
|
|
// Returns the favicon URL or empty string if not found
|
|
func (p *Publisher) FetchFavicon(siteURL string) string {
|
|
if siteURL == "" {
|
|
return ""
|
|
}
|
|
|
|
// Parse the site URL to get the host
|
|
if !strings.Contains(siteURL, "://") {
|
|
siteURL = "https://" + siteURL
|
|
}
|
|
u, err := url.Parse(siteURL)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
// Try common favicon locations
|
|
faviconURLs := []string{
|
|
fmt.Sprintf("https://%s/favicon.ico", u.Host),
|
|
fmt.Sprintf("https://%s/favicon.png", u.Host),
|
|
fmt.Sprintf("https://%s/apple-touch-icon.png", u.Host),
|
|
}
|
|
|
|
for _, faviconURL := range faviconURLs {
|
|
resp, err := p.httpClient.Head(faviconURL)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
resp.Body.Close()
|
|
if resp.StatusCode == http.StatusOK {
|
|
contentType := resp.Header.Get("Content-Type")
|
|
if strings.HasPrefix(contentType, "image/") || strings.HasSuffix(faviconURL, ".ico") {
|
|
return faviconURL
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback to Google's favicon service (reliable, returns PNG)
|
|
return fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
|
|
}
|
|
|
|
// ImageUploadResult contains the uploaded blob and image dimensions
|
|
type ImageUploadResult struct {
|
|
Blob *BlobRef
|
|
Width int
|
|
Height int
|
|
}
|
|
|
|
func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *BlobRef {
|
|
result := p.fetchAndUploadImageWithDimensions(session, imageURL)
|
|
if result == nil {
|
|
return nil
|
|
}
|
|
return result.Blob
|
|
}
|
|
|
|
// upgradeImageURL attempts to get a larger version of known CDN image URLs
|
|
func upgradeImageURL(imageURL string) string {
|
|
// BBC images: /standard/240/ -> /standard/800/
|
|
if strings.Contains(imageURL, "ichef.bbci.co.uk") {
|
|
imageURL = strings.Replace(imageURL, "/standard/240/", "/standard/800/", 1)
|
|
imageURL = strings.Replace(imageURL, "/standard/480/", "/standard/800/", 1)
|
|
}
|
|
return imageURL
|
|
}
|
|
|
|
func (p *Publisher) fetchAndUploadImageWithDimensions(session *PDSSession, imageURL string) *ImageUploadResult {
|
|
// Upgrade image URL to larger size if possible
|
|
imageURL = upgradeImageURL(imageURL)
|
|
|
|
// Fetch the image
|
|
resp, err := p.httpClient.Get(imageURL)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil
|
|
}
|
|
|
|
// Check content type
|
|
contentType := resp.Header.Get("Content-Type")
|
|
if contentType == "" {
|
|
// Try to guess from URL
|
|
if strings.HasSuffix(strings.ToLower(imageURL), ".png") {
|
|
contentType = "image/png"
|
|
} else if strings.HasSuffix(strings.ToLower(imageURL), ".gif") {
|
|
contentType = "image/gif"
|
|
} else if strings.HasSuffix(strings.ToLower(imageURL), ".webp") {
|
|
contentType = "image/webp"
|
|
} else {
|
|
contentType = "image/jpeg" // Default
|
|
}
|
|
}
|
|
|
|
// Only accept image types
|
|
if !strings.HasPrefix(contentType, "image/") {
|
|
return nil
|
|
}
|
|
|
|
// Read image data (limit to 2MB to allow for resize headroom)
|
|
data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
|
if err != nil || len(data) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Decode image to get dimensions
|
|
imgConfig, _, err := image.DecodeConfig(bytes.NewReader(data))
|
|
width, height := 1, 1 // Default if decode fails
|
|
if err == nil {
|
|
width, height = imgConfig.Width, imgConfig.Height
|
|
}
|
|
|
|
// Bluesky blob limit is ~976KB, use 900KB as safe threshold
|
|
const maxBlobSize = 900 * 1024
|
|
|
|
// If image is too large, resize it
|
|
if len(data) > maxBlobSize {
|
|
// Decode the full image for resizing
|
|
img, _, err := image.Decode(bytes.NewReader(data))
|
|
if err != nil {
|
|
return nil // Can't decode, can't resize
|
|
}
|
|
|
|
// Scale down iteratively until under limit
|
|
scaleFactor := 0.9 // Start with 90% and iterate if needed
|
|
|
|
for attempt := 0; attempt < 5; attempt++ {
|
|
newWidth := int(float64(width) * scaleFactor)
|
|
newHeight := int(float64(height) * scaleFactor)
|
|
|
|
// Minimum dimensions
|
|
if newWidth < 100 {
|
|
newWidth = 100
|
|
}
|
|
if newHeight < 100 {
|
|
newHeight = 100
|
|
}
|
|
|
|
// Create resized image
|
|
resized := image.NewRGBA(image.Rect(0, 0, newWidth, newHeight))
|
|
draw.CatmullRom.Scale(resized, resized.Bounds(), img, img.Bounds(), draw.Over, nil)
|
|
|
|
// Encode as JPEG
|
|
var buf bytes.Buffer
|
|
if err := jpeg.Encode(&buf, resized, &jpeg.Options{Quality: 85}); err != nil {
|
|
return nil
|
|
}
|
|
|
|
if buf.Len() <= maxBlobSize {
|
|
data = buf.Bytes()
|
|
width = newWidth
|
|
height = newHeight
|
|
contentType = "image/jpeg"
|
|
break
|
|
}
|
|
|
|
// Still too large, reduce scale further
|
|
scaleFactor *= 0.8
|
|
}
|
|
|
|
// If still too large after 5 attempts, give up
|
|
if len(data) > maxBlobSize {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Upload to PDS
|
|
blob, err := p.UploadBlob(session, data, contentType)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
return &ImageUploadResult{
|
|
Blob: blob,
|
|
Width: width,
|
|
Height: height,
|
|
}
|
|
}
|
|
|
|
func truncate(s string, maxLen int) string {
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen-3] + "..."
|
|
}
|
|
|
|
// stripHTML removes HTML tags from a string
|
|
func stripHTML(s string) string {
|
|
// Remove HTML tags
|
|
tagRegex := regexp.MustCompile(`<[^>]*>`)
|
|
s = tagRegex.ReplaceAllString(s, "")
|
|
|
|
// Decode common HTML entities
|
|
s = strings.ReplaceAll(s, "&", "&")
|
|
s = strings.ReplaceAll(s, "<", "<")
|
|
s = strings.ReplaceAll(s, ">", ">")
|
|
s = strings.ReplaceAll(s, """, "\"")
|
|
s = strings.ReplaceAll(s, "'", "'")
|
|
s = strings.ReplaceAll(s, " ", " ")
|
|
|
|
// Collapse whitespace
|
|
spaceRegex := regexp.MustCompile(`\s+`)
|
|
s = spaceRegex.ReplaceAllString(s, " ")
|
|
|
|
return strings.TrimSpace(s)
|
|
}
|
|
|
|
// DeriveHandleFromFeed generates an AT Protocol handle from a feed URL
|
|
// Format: {domain}-{category}.1440.news
|
|
// AT Protocol allows up to 63 characters per label, but the PDS
|
|
// restricts the first segment to 18 characters for local handles.
|
|
// Examples:
|
|
// feeds.bbci.co.uk/news/technology/rss.xml → bbc-technology.1440.news
|
|
// news.ycombinator.com/rss → ycombinator.1440.news
|
|
func DeriveHandleFromFeed(feedURL string) string {
|
|
const maxSubdomainLen = 18 // PDS limit for first segment
|
|
|
|
// Ensure we have a scheme for parsing
|
|
if !strings.Contains(feedURL, "://") {
|
|
feedURL = "https://" + feedURL
|
|
}
|
|
|
|
u, err := url.Parse(feedURL)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
hostname := strings.ToLower(u.Hostname())
|
|
path := strings.ToLower(u.Path)
|
|
|
|
// Remove common feed suffixes/extensions
|
|
suffixesToRemove := []string{".xml", ".rss", ".atom", ".json", "/rss", "/feed", "/atom", "/index"}
|
|
for _, suffix := range suffixesToRemove {
|
|
path = strings.TrimSuffix(path, suffix)
|
|
}
|
|
|
|
// Split path into segments and filter noise
|
|
segments := strings.Split(strings.Trim(path, "/"), "/")
|
|
skipPathWords := map[string]bool{
|
|
"rss": true, "feed": true, "feeds": true, "atom": true,
|
|
"xml": true, "default": true, "index": true, "services": true,
|
|
"nyt": true, "blog": true,
|
|
}
|
|
|
|
var pathParts []string
|
|
for _, seg := range segments {
|
|
seg = cleanHandleSegment(seg)
|
|
if seg != "" && !skipPathWords[seg] {
|
|
pathParts = append(pathParts, seg)
|
|
}
|
|
}
|
|
|
|
// Split hostname and extract the meaningful domain
|
|
hostParts := strings.Split(hostname, ".")
|
|
|
|
// Two-part TLDs to handle specially
|
|
twoPartTLDs := map[string]bool{
|
|
"co.uk": true, "com.au": true, "co.nz": true, "co.jp": true,
|
|
"com.br": true, "co.in": true, "org.uk": true, "ac.uk": true,
|
|
}
|
|
|
|
// Check for two-part TLD
|
|
if len(hostParts) >= 2 {
|
|
possibleTwoPartTLD := hostParts[len(hostParts)-2] + "." + hostParts[len(hostParts)-1]
|
|
if twoPartTLDs[possibleTwoPartTLD] {
|
|
hostParts = hostParts[:len(hostParts)-2]
|
|
} else {
|
|
// Single TLD - remove it
|
|
singleTLDs := map[string]bool{
|
|
"com": true, "org": true, "net": true, "io": true,
|
|
"edu": true, "gov": true, "uk": true, "de": true, "fr": true,
|
|
}
|
|
if singleTLDs[hostParts[len(hostParts)-1]] {
|
|
hostParts = hostParts[:len(hostParts)-1]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Skip noise subdomains
|
|
skipHostWords := map[string]bool{
|
|
"www": true, "feeds": true, "rss": true, "feed": true,
|
|
"api": true, "cdn": true, "static": true, "news": true,
|
|
}
|
|
|
|
var meaningfulHostParts []string
|
|
for _, part := range hostParts {
|
|
if !skipHostWords[part] && part != "" {
|
|
meaningfulHostParts = append(meaningfulHostParts, part)
|
|
}
|
|
}
|
|
|
|
// Get the main domain (e.g., "bbci", "ycombinator", "nytimes")
|
|
var mainDomain string
|
|
if len(meaningfulHostParts) > 0 {
|
|
mainDomain = meaningfulHostParts[len(meaningfulHostParts)-1]
|
|
} else if len(hostParts) > 0 {
|
|
mainDomain = hostParts[len(hostParts)-1]
|
|
}
|
|
|
|
// Special case: "bbci" should become "bbc"
|
|
if mainDomain == "bbci" {
|
|
mainDomain = "bbc"
|
|
}
|
|
|
|
// Abbreviations for long category names to fit 18-char limit
|
|
categoryAbbrevs := map[string]string{
|
|
"science-and-environment": "sci-env",
|
|
"entertainment-and-arts": "ent-arts",
|
|
"science-environment": "sci-env",
|
|
"entertainment-arts": "ent-arts",
|
|
"technology": "tech",
|
|
"business": "biz",
|
|
"international": "intl",
|
|
"environment": "env",
|
|
"entertainment": "ent",
|
|
"politics": "pol",
|
|
}
|
|
|
|
// Build subdomain: domain + category (from path)
|
|
var subdomain string
|
|
if len(pathParts) > 0 {
|
|
// Use last meaningful path part as category (e.g., "technology" from /news/technology/)
|
|
category := pathParts[len(pathParts)-1]
|
|
// Skip generic categories
|
|
if category == "news" && len(pathParts) == 1 {
|
|
subdomain = mainDomain
|
|
} else {
|
|
// Try to abbreviate if the full subdomain would be too long
|
|
fullSubdomain := mainDomain + "-" + category
|
|
if len(fullSubdomain) > maxSubdomainLen {
|
|
if abbrev, ok := categoryAbbrevs[category]; ok {
|
|
category = abbrev
|
|
}
|
|
}
|
|
subdomain = mainDomain + "-" + category
|
|
}
|
|
} else {
|
|
subdomain = mainDomain
|
|
}
|
|
|
|
// If still too long, just use main hostname
|
|
if len(subdomain) > maxSubdomainLen {
|
|
subdomain = mainDomain
|
|
}
|
|
|
|
// Final safety: truncate if still too long
|
|
if len(subdomain) > maxSubdomainLen {
|
|
subdomain = subdomain[:maxSubdomainLen]
|
|
}
|
|
|
|
subdomain = strings.Trim(subdomain, "-")
|
|
|
|
// Collapse multiple hyphens
|
|
for strings.Contains(subdomain, "--") {
|
|
subdomain = strings.ReplaceAll(subdomain, "--", "-")
|
|
}
|
|
|
|
return subdomain + ".1440.news"
|
|
}
|
|
|
|
// cleanHandleSegment sanitizes a string for use in an AT Protocol handle segment
|
|
// Handle segments must be alphanumeric with hyphens, no leading/trailing hyphens
|
|
func cleanHandleSegment(s string) string {
|
|
// Remove file extensions
|
|
if idx := strings.LastIndex(s, "."); idx > 0 {
|
|
s = s[:idx]
|
|
}
|
|
|
|
// Convert to lowercase
|
|
s = strings.ToLower(s)
|
|
|
|
// Strip common feed prefixes/suffixes from the segment itself
|
|
// e.g., "showrss" → "show", "rssworld" → "world"
|
|
feedAffixes := []string{"rss", "feed", "atom", "xml"}
|
|
for _, affix := range feedAffixes {
|
|
// Strip suffix (e.g., "showrss" → "show")
|
|
if strings.HasSuffix(s, affix) && len(s) > len(affix) {
|
|
s = strings.TrimSuffix(s, affix)
|
|
break
|
|
}
|
|
// Strip prefix (e.g., "rssworld" → "world")
|
|
if strings.HasPrefix(s, affix) && len(s) > len(affix) {
|
|
s = strings.TrimPrefix(s, affix)
|
|
break
|
|
}
|
|
}
|
|
|
|
// Replace underscores and other separators with hyphens
|
|
s = strings.ReplaceAll(s, "_", "-")
|
|
s = strings.ReplaceAll(s, " ", "-")
|
|
|
|
// Remove any characters that aren't alphanumeric or hyphens
|
|
reg := regexp.MustCompile(`[^a-z0-9-]`)
|
|
s = reg.ReplaceAllString(s, "")
|
|
|
|
// Collapse multiple hyphens
|
|
for strings.Contains(s, "--") {
|
|
s = strings.ReplaceAll(s, "--", "-")
|
|
}
|
|
|
|
// Trim leading/trailing hyphens
|
|
s = strings.Trim(s, "-")
|
|
|
|
return s
|
|
}
|
|
|
|
// SplitHandle extracts the path prefix and hostname from a derived handle
|
|
// Example: show.news.ycombinator.com.1440.news → ("show", "news.ycombinator.com")
|
|
func SplitHandle(handle string) (prefix string, hostname string) {
|
|
// Remove .1440.news suffix
|
|
handle = strings.TrimSuffix(handle, ".1440.news")
|
|
|
|
parts := strings.Split(handle, ".")
|
|
|
|
// Try to find where hostname starts by looking for valid hostname patterns
|
|
if len(parts) >= 2 {
|
|
for i := 0; i < len(parts)-1; i++ {
|
|
remaining := strings.Join(parts[i:], ".")
|
|
if looksLikeHostname(remaining) {
|
|
if i > 0 {
|
|
prefix = strings.Join(parts[:i], ".")
|
|
}
|
|
hostname = remaining
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback: no prefix, entire thing is hostname
|
|
hostname = handle
|
|
return "", hostname
|
|
}
|
|
|
|
func isLikelyTLDPart(s string) bool {
|
|
tlds := map[string]bool{
|
|
"com": true, "org": true, "net": true, "edu": true, "gov": true,
|
|
"io": true, "co": true, "uk": true, "de": true, "fr": true,
|
|
"jp": true, "au": true, "ca": true, "nl": true, "se": true,
|
|
"news": true, "blog": true, "tech": true, "dev": true,
|
|
}
|
|
return tlds[s]
|
|
}
|
|
|
|
func isTwoPartTLD(first, second string) bool {
|
|
twoPartTLDs := map[string]bool{
|
|
"co.uk": true, "com.au": true, "co.jp": true, "co.nz": true,
|
|
"org.uk": true, "net.au": true, "com.br": true,
|
|
}
|
|
return twoPartTLDs[first+"."+second]
|
|
}
|
|
|
|
func looksLikeHostname(s string) bool {
|
|
// A hostname typically has at least one dot and ends with a TLD-like part
|
|
parts := strings.Split(s, ".")
|
|
if len(parts) < 2 {
|
|
return false
|
|
}
|
|
lastPart := parts[len(parts)-1]
|
|
return isLikelyTLDPart(lastPart)
|
|
}
|
|
|
|
// BlobRef represents a blob reference for profile images
|
|
type BlobRef struct {
|
|
Type string `json:"$type"`
|
|
Ref Link `json:"ref"`
|
|
MimeType string `json:"mimeType"`
|
|
Size int64 `json:"size"`
|
|
}
|
|
|
|
type Link struct {
|
|
Link string `json:"$link"`
|
|
}
|
|
|
|
// UploadBlob uploads an image to the PDS and returns a blob reference
|
|
func (p *Publisher) UploadBlob(session *PDSSession, data []byte, mimeType string) (*BlobRef, error) {
|
|
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.uploadBlob", bytes.NewReader(data))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Content-Type", mimeType)
|
|
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
|
|
|
resp, err := p.httpClient.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("upload blob failed: %s - %s", resp.Status, string(respBody))
|
|
}
|
|
|
|
var result struct {
|
|
Blob BlobRef `json:"blob"`
|
|
}
|
|
if err := json.Unmarshal(respBody, &result); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &result.Blob, nil
|
|
}
|
|
|
|
// UpdateProfile updates the profile for an account
|
|
func (p *Publisher) UpdateProfile(session *PDSSession, displayName, description string, avatar *BlobRef) error {
|
|
// First, get the current profile to preserve any existing fields
|
|
getReq, err := http.NewRequest("GET",
|
|
p.pdsHost+"/xrpc/com.atproto.repo.getRecord?repo="+session.DID+"&collection=app.bsky.actor.profile&rkey=self",
|
|
nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
getReq.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
|
|
|
getResp, err := p.httpClient.Do(getReq)
|
|
|
|
var existingCID string
|
|
profile := map[string]interface{}{
|
|
"$type": "app.bsky.actor.profile",
|
|
}
|
|
|
|
if err == nil && getResp.StatusCode == http.StatusOK {
|
|
defer getResp.Body.Close()
|
|
var existing struct {
|
|
CID string `json:"cid"`
|
|
Value map[string]interface{} `json:"value"`
|
|
}
|
|
if json.NewDecoder(getResp.Body).Decode(&existing) == nil {
|
|
existingCID = existing.CID
|
|
profile = existing.Value
|
|
}
|
|
} else if getResp != nil {
|
|
getResp.Body.Close()
|
|
}
|
|
|
|
// Update fields
|
|
if displayName != "" {
|
|
profile["displayName"] = displayName
|
|
}
|
|
if description != "" {
|
|
profile["description"] = description
|
|
}
|
|
if avatar != nil {
|
|
profile["avatar"] = avatar
|
|
}
|
|
|
|
// Put the record
|
|
payload := map[string]interface{}{
|
|
"repo": session.DID,
|
|
"collection": "app.bsky.actor.profile",
|
|
"rkey": "self",
|
|
"record": profile,
|
|
}
|
|
if existingCID != "" {
|
|
payload["swapRecord"] = existingCID
|
|
}
|
|
|
|
body, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.putRecord", bytes.NewReader(body))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
|
|
|
|
resp, err := p.httpClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return fmt.Errorf("update profile failed: %s - %s", resp.Status, string(respBody))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// FetchFavicon downloads a favicon/icon from a URL
|
|
func FetchFavicon(siteURL string) ([]byte, string, error) {
|
|
// Try common favicon locations
|
|
if !strings.HasPrefix(siteURL, "http") {
|
|
siteURL = "https://" + siteURL
|
|
}
|
|
|
|
u, err := url.Parse(siteURL)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
|
|
baseURL := u.Scheme + "://" + u.Host
|
|
|
|
// Try apple-touch-icon first (usually higher quality)
|
|
iconURLs := []string{
|
|
baseURL + "/apple-touch-icon.png",
|
|
baseURL + "/apple-touch-icon-precomposed.png",
|
|
baseURL + "/favicon.png",
|
|
baseURL + "/favicon.ico",
|
|
}
|
|
|
|
client := &http.Client{Timeout: 10 * time.Second}
|
|
|
|
for _, iconURL := range iconURLs {
|
|
resp, err := client.Get(iconURL)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
continue
|
|
}
|
|
|
|
data, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
// Determine mime type
|
|
contentType := resp.Header.Get("Content-Type")
|
|
if contentType == "" {
|
|
if strings.HasSuffix(iconURL, ".png") {
|
|
contentType = "image/png"
|
|
} else if strings.HasSuffix(iconURL, ".ico") {
|
|
contentType = "image/x-icon"
|
|
} else {
|
|
contentType = "image/png" // default
|
|
}
|
|
}
|
|
|
|
return data, contentType, nil
|
|
}
|
|
|
|
return nil, "", fmt.Errorf("no favicon found for %s", siteURL)
|
|
}
|