Fix image embeds and rkey collisions

- Add image_urls to GetAllUnpublishedItems query
- Add aspectRatio to image embeds (required by Bluesky)
- Add image decoding to get dimensions (width/height)
- Fix rkey collision by using XOR of multiple hash bytes

The rkey collision was caused by using only 2 hash bytes (10 bits)
which had ~0.1% collision rate per pair of items with same timestamp.
Now XORs 8 hash bytes for better entropy distribution.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
primal
2026-01-28 21:24:35 -05:00
parent 4e4e8c939a
commit a1f02cd0bc
2 changed files with 67 additions and 12 deletions
+9 -3
View File
@@ -1,6 +1,7 @@
package main
import (
"encoding/json"
"fmt"
"io"
"net/http"
@@ -371,7 +372,7 @@ func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string)
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
rows, err := c.db.Query(`
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content,
i.author, i.pub_date, i.discovered_at
i.author, i.pub_date, i.discovered_at, i.image_urls
FROM items i
JOIN feeds f ON i.feed_url = f.url
WHERE f.publish_status = 'pass'
@@ -388,11 +389,11 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
var items []Item
for rows.Next() {
var item Item
var guid, title, link, description, content, author *string
var guid, title, link, description, content, author, imageURLsJSON *string
var pubDate, discoveredAt *time.Time
err := rows.Scan(&item.ID, &item.FeedURL, &guid, &title, &link, &description,
&content, &author, &pubDate, &discoveredAt)
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON)
if err != nil {
continue
}
@@ -406,6 +407,11 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
item.PubDate = TimeValue(pubDate)
item.DiscoveredAt = TimeValue(discoveredAt)
// Parse image URLs from JSON array
if imageURLsJSON != nil && *imageURLsJSON != "" {
json.Unmarshal([]byte(*imageURLsJSON), &item.ImageURLs)
}
items = append(items, item)
}
+58 -9
View File
@@ -5,6 +5,10 @@ import (
"crypto/sha256"
"encoding/json"
"fmt"
"image"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"io"
"net/http"
"net/url"
@@ -12,6 +16,8 @@ import (
"strings"
"time"
"unicode/utf8"
_ "golang.org/x/image/webp"
)
// Publisher handles posting items to AT Protocol PDS
@@ -66,8 +72,14 @@ type BskyExternal struct {
}
type BskyImage struct {
Alt string `json:"alt"`
Image *BlobRef `json:"image"`
Alt string `json:"alt"`
Image *BlobRef `json:"image"`
AspectRatio *BskyAspectRatio `json:"aspectRatio,omitempty"`
}
type BskyAspectRatio struct {
Width int `json:"width"`
Height int `json:"height"`
}
// NewPublisher creates a new Publisher instance
@@ -216,8 +228,12 @@ func GenerateRkey(guid string, timestamp time.Time) string {
micros := uint64(microsInt) & ((1 << 53) - 1)
// Generate deterministic 10-bit clock ID from GUID hash
// Use XOR of multiple hash bytes to reduce collisions
hash := sha256.Sum256([]byte(guid))
clockID := uint64(hash[0])<<2 | uint64(hash[1])>>6
// XOR bytes 0-3 together, then 4-7, combine for more entropy
h1 := uint64(hash[0]) ^ uint64(hash[2]) ^ uint64(hash[4]) ^ uint64(hash[6])
h2 := uint64(hash[1]) ^ uint64(hash[3]) ^ uint64(hash[5]) ^ uint64(hash[7])
clockID := (h1 << 2) | (h2 >> 6)
clockID = clockID & ((1 << 10) - 1) // 10 bits = 0-1023
// Combine: top bit 0, 53 bits timestamp, 10 bits clock ID
@@ -460,12 +476,19 @@ func (p *Publisher) uploadImages(session *PDSSession, imageURLs []string, altTex
}
for i := 0; i < maxImages; i++ {
blob := p.fetchAndUploadImage(session, imageURLs[i])
if blob != nil {
images = append(images, BskyImage{
result := p.fetchAndUploadImageWithDimensions(session, imageURLs[i])
if result != nil && result.Blob != nil {
img := BskyImage{
Alt: altText,
Image: blob,
})
Image: result.Blob,
}
if result.Width > 0 && result.Height > 0 {
img.AspectRatio = &BskyAspectRatio{
Width: result.Width,
Height: result.Height,
}
}
images = append(images, img)
}
}
@@ -514,7 +537,22 @@ func (p *Publisher) FetchFavicon(siteURL string) string {
return fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
}
// ImageUploadResult contains the uploaded blob and image dimensions
type ImageUploadResult struct {
Blob *BlobRef
Width int
Height int
}
func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *BlobRef {
result := p.fetchAndUploadImageWithDimensions(session, imageURL)
if result == nil {
return nil
}
return result.Blob
}
func (p *Publisher) fetchAndUploadImageWithDimensions(session *PDSSession, imageURL string) *ImageUploadResult {
// Fetch the image
resp, err := p.httpClient.Get(imageURL)
if err != nil {
@@ -552,13 +590,24 @@ func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *B
return nil
}
// Decode image to get dimensions
img, _, err := image.DecodeConfig(bytes.NewReader(data))
width, height := 1, 1 // Default if decode fails
if err == nil {
width, height = img.Width, img.Height
}
// Upload to PDS
blob, err := p.UploadBlob(session, data, contentType)
if err != nil {
return nil
}
return blob
return &ImageUploadResult{
Blob: blob,
Width: width,
Height: height,
}
}
func truncate(s string, maxLen int) string {