Fix image embeds and rkey collisions
- Add image_urls to GetAllUnpublishedItems query - Add aspectRatio to image embeds (required by Bluesky) - Add image decoding to get dimensions (width/height) - Fix rkey collision by using XOR of multiple hash bytes The rkey collision was caused by using only 2 hash bytes (10 bits) which had ~0.1% collision rate per pair of items with same timestamp. Now XORs 8 hash bytes for better entropy distribution. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+9
-3
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -371,7 +372,7 @@ func (c *Crawler) RefreshAllProfiles(publisher *Publisher, feedPassword string)
|
||||
func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
||||
rows, err := c.db.Query(`
|
||||
SELECT i.id, i.feed_url, i.guid, i.title, i.link, i.description, i.content,
|
||||
i.author, i.pub_date, i.discovered_at
|
||||
i.author, i.pub_date, i.discovered_at, i.image_urls
|
||||
FROM items i
|
||||
JOIN feeds f ON i.feed_url = f.url
|
||||
WHERE f.publish_status = 'pass'
|
||||
@@ -388,11 +389,11 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
||||
var items []Item
|
||||
for rows.Next() {
|
||||
var item Item
|
||||
var guid, title, link, description, content, author *string
|
||||
var guid, title, link, description, content, author, imageURLsJSON *string
|
||||
var pubDate, discoveredAt *time.Time
|
||||
|
||||
err := rows.Scan(&item.ID, &item.FeedURL, &guid, &title, &link, &description,
|
||||
&content, &author, &pubDate, &discoveredAt)
|
||||
&content, &author, &pubDate, &discoveredAt, &imageURLsJSON)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
@@ -406,6 +407,11 @@ func (c *Crawler) GetAllUnpublishedItems(limit int) ([]Item, error) {
|
||||
item.PubDate = TimeValue(pubDate)
|
||||
item.DiscoveredAt = TimeValue(discoveredAt)
|
||||
|
||||
// Parse image URLs from JSON array
|
||||
if imageURLsJSON != nil && *imageURLsJSON != "" {
|
||||
json.Unmarshal([]byte(*imageURLsJSON), &item.ImageURLs)
|
||||
}
|
||||
|
||||
items = append(items, item)
|
||||
}
|
||||
|
||||
|
||||
+58
-9
@@ -5,6 +5,10 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
_ "image/gif"
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@@ -12,6 +16,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
_ "golang.org/x/image/webp"
|
||||
)
|
||||
|
||||
// Publisher handles posting items to AT Protocol PDS
|
||||
@@ -66,8 +72,14 @@ type BskyExternal struct {
|
||||
}
|
||||
|
||||
type BskyImage struct {
|
||||
Alt string `json:"alt"`
|
||||
Image *BlobRef `json:"image"`
|
||||
Alt string `json:"alt"`
|
||||
Image *BlobRef `json:"image"`
|
||||
AspectRatio *BskyAspectRatio `json:"aspectRatio,omitempty"`
|
||||
}
|
||||
|
||||
type BskyAspectRatio struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
}
|
||||
|
||||
// NewPublisher creates a new Publisher instance
|
||||
@@ -216,8 +228,12 @@ func GenerateRkey(guid string, timestamp time.Time) string {
|
||||
micros := uint64(microsInt) & ((1 << 53) - 1)
|
||||
|
||||
// Generate deterministic 10-bit clock ID from GUID hash
|
||||
// Use XOR of multiple hash bytes to reduce collisions
|
||||
hash := sha256.Sum256([]byte(guid))
|
||||
clockID := uint64(hash[0])<<2 | uint64(hash[1])>>6
|
||||
// XOR bytes 0-3 together, then 4-7, combine for more entropy
|
||||
h1 := uint64(hash[0]) ^ uint64(hash[2]) ^ uint64(hash[4]) ^ uint64(hash[6])
|
||||
h2 := uint64(hash[1]) ^ uint64(hash[3]) ^ uint64(hash[5]) ^ uint64(hash[7])
|
||||
clockID := (h1 << 2) | (h2 >> 6)
|
||||
clockID = clockID & ((1 << 10) - 1) // 10 bits = 0-1023
|
||||
|
||||
// Combine: top bit 0, 53 bits timestamp, 10 bits clock ID
|
||||
@@ -460,12 +476,19 @@ func (p *Publisher) uploadImages(session *PDSSession, imageURLs []string, altTex
|
||||
}
|
||||
|
||||
for i := 0; i < maxImages; i++ {
|
||||
blob := p.fetchAndUploadImage(session, imageURLs[i])
|
||||
if blob != nil {
|
||||
images = append(images, BskyImage{
|
||||
result := p.fetchAndUploadImageWithDimensions(session, imageURLs[i])
|
||||
if result != nil && result.Blob != nil {
|
||||
img := BskyImage{
|
||||
Alt: altText,
|
||||
Image: blob,
|
||||
})
|
||||
Image: result.Blob,
|
||||
}
|
||||
if result.Width > 0 && result.Height > 0 {
|
||||
img.AspectRatio = &BskyAspectRatio{
|
||||
Width: result.Width,
|
||||
Height: result.Height,
|
||||
}
|
||||
}
|
||||
images = append(images, img)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -514,7 +537,22 @@ func (p *Publisher) FetchFavicon(siteURL string) string {
|
||||
return fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
|
||||
}
|
||||
|
||||
// ImageUploadResult contains the uploaded blob and image dimensions
|
||||
type ImageUploadResult struct {
|
||||
Blob *BlobRef
|
||||
Width int
|
||||
Height int
|
||||
}
|
||||
|
||||
func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *BlobRef {
|
||||
result := p.fetchAndUploadImageWithDimensions(session, imageURL)
|
||||
if result == nil {
|
||||
return nil
|
||||
}
|
||||
return result.Blob
|
||||
}
|
||||
|
||||
func (p *Publisher) fetchAndUploadImageWithDimensions(session *PDSSession, imageURL string) *ImageUploadResult {
|
||||
// Fetch the image
|
||||
resp, err := p.httpClient.Get(imageURL)
|
||||
if err != nil {
|
||||
@@ -552,13 +590,24 @@ func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *B
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decode image to get dimensions
|
||||
img, _, err := image.DecodeConfig(bytes.NewReader(data))
|
||||
width, height := 1, 1 // Default if decode fails
|
||||
if err == nil {
|
||||
width, height = img.Width, img.Height
|
||||
}
|
||||
|
||||
// Upload to PDS
|
||||
blob, err := p.UploadBlob(session, data, contentType)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return blob
|
||||
return &ImageUploadResult{
|
||||
Blob: blob,
|
||||
Width: width,
|
||||
Height: height,
|
||||
}
|
||||
}
|
||||
|
||||
func truncate(s string, maxLen int) string {
|
||||
|
||||
Reference in New Issue
Block a user