Files
crawler/image.go
primal 1066f42189 Refactor large Go files into focused modules
Split dashboard.go (3,528 lines) into:
- routes.go: HTTP route registration
- api_domains.go: Domain API handlers
- api_feeds.go: Feed API handlers
- api_publish.go: Publishing API handlers
- api_search.go: Search API handlers
- templates.go: HTML templates
- dashboard.go: Stats functions only (235 lines)

Split publisher.go (1,502 lines) into:
- pds_auth.go: Authentication and account management
- pds_records.go: Record operations (upload, update, delete)
- handle.go: Handle derivation from feed URLs
- image.go: Image processing and favicon fetching
- publisher.go: Core types and PublishItem (439 lines)

Split feed.go (1,137 lines) into:
- item.go: Item struct and DB operations
- feed_check.go: Feed checking and processing
- feed.go: Feed struct and DB operations (565 lines)

Also includes domain import batch size increase (1k -> 100k).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 22:25:02 -05:00

382 lines
9.7 KiB
Go

package main
import (
"bytes"
"fmt"
"image"
_ "image/gif"
"image/jpeg"
_ "image/png"
"io"
"net/http"
"net/url"
"strings"
"time"
"go.deanishe.net/favicon"
"golang.org/x/image/draw"
_ "golang.org/x/image/webp"
)
// ImageUploadResult contains the uploaded blob and image dimensions
type ImageUploadResult struct {
Blob *BlobRef
Width int
Height int
}
// uploadImages fetches and uploads up to 4 images, returning BskyImage structs
func (p *Publisher) uploadImages(session *PDSSession, imageURLs []string, altText string) []BskyImage {
var images []BskyImage
maxImages := 4
if len(imageURLs) < maxImages {
maxImages = len(imageURLs)
}
for i := 0; i < maxImages; i++ {
result := p.fetchAndUploadImageWithDimensions(session, imageURLs[i])
if result != nil && result.Blob != nil {
img := BskyImage{
Alt: altText,
Image: result.Blob,
}
if result.Width > 0 && result.Height > 0 {
img.AspectRatio = &BskyAspectRatio{
Width: result.Width,
Height: result.Height,
}
}
images = append(images, img)
}
}
return images
}
// FetchFavicon tries to get a favicon URL for a site
// Uses go.deanishe.net/favicon library which parses HTML, manifests, and checks common paths
// Returns the favicon URL or empty string if not found
func (p *Publisher) FetchFavicon(siteURL string) string {
if siteURL == "" {
return ""
}
// Ensure URL has scheme
if !strings.Contains(siteURL, "://") {
siteURL = "https://" + siteURL
}
u, err := url.Parse(siteURL)
if err != nil {
return ""
}
// Create finder with custom HTTP client
// Note: Don't use IgnoreNoSize as it filters out valid favicon.ico files that don't have size metadata
finder := favicon.New(
favicon.WithClient(p.httpClient),
)
// Find icons - library checks HTML <link> tags, manifests, OG images, common paths
icons, err := finder.Find(siteURL)
if err == nil && len(icons) > 0 {
// Filter and score icons for avatar use
// Prefer: square icons, reasonable size, PNG format, actual favicons over OG images
var bestIcon string
var bestScore int
for _, icon := range icons {
// Skip tiny icons (likely tracking pixels)
if icon.Width > 0 && icon.Width < 32 {
continue
}
// Skip Open Graph images (meant for link previews, usually wide banners)
lowerURL := strings.ToLower(icon.URL)
if strings.Contains(lowerURL, "og-image") || strings.Contains(lowerURL, "og_image") ||
strings.Contains(lowerURL, "opengraph") || strings.Contains(lowerURL, "twitter") {
continue
}
// Skip wide images (aspect ratio > 1.5 means it's a banner, not an icon)
if icon.Width > 0 && icon.Height > 0 {
ratio := float64(icon.Width) / float64(icon.Height)
if ratio > 1.5 || ratio < 0.67 {
continue
}
}
// Score the icon
score := 0
// Prefer actual favicon paths
if strings.Contains(lowerURL, "favicon") || strings.Contains(lowerURL, "icon") ||
strings.Contains(lowerURL, "apple-touch") {
score += 100
}
// Prefer PNG over other formats
if icon.MimeType == "image/png" {
score += 50
} else if icon.MimeType == "image/x-icon" || strings.HasSuffix(lowerURL, ".ico") {
score += 40
} else if icon.MimeType == "image/jpeg" {
score += 10 // JPEG less preferred for icons
}
// Prefer larger icons (but not too large)
if icon.Width >= 64 && icon.Width <= 512 {
score += 30
} else if icon.Width > 0 {
score += 10
}
if score > bestScore {
bestScore = score
bestIcon = icon.URL
}
}
if bestIcon != "" {
return bestIcon
}
// Fall back to first non-OG icon
for _, icon := range icons {
lowerURL := strings.ToLower(icon.URL)
if !strings.Contains(lowerURL, "og-image") && !strings.Contains(lowerURL, "og_image") {
return icon.URL
}
}
}
// Fallback to Google's favicon service (reliable, returns PNG)
return fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
}
func (p *Publisher) fetchAndUploadImage(session *PDSSession, imageURL string) *BlobRef {
result := p.fetchAndUploadImageWithDimensions(session, imageURL)
if result == nil {
return nil
}
return result.Blob
}
// upgradeImageURL attempts to get a larger version of known CDN image URLs
func upgradeImageURL(imageURL string) string {
// BBC images: /standard/240/ -> /standard/800/
if strings.Contains(imageURL, "ichef.bbci.co.uk") {
imageURL = strings.Replace(imageURL, "/standard/240/", "/standard/800/", 1)
imageURL = strings.Replace(imageURL, "/standard/480/", "/standard/800/", 1)
}
return imageURL
}
func (p *Publisher) fetchAndUploadImageWithDimensions(session *PDSSession, imageURL string) *ImageUploadResult {
// Upgrade image URL to larger size if possible
imageURL = upgradeImageURL(imageURL)
// Fetch the image
resp, err := p.httpClient.Get(imageURL)
if err != nil {
return nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil
}
// Check content type
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
// Try to guess from URL
if strings.HasSuffix(strings.ToLower(imageURL), ".png") {
contentType = "image/png"
} else if strings.HasSuffix(strings.ToLower(imageURL), ".gif") {
contentType = "image/gif"
} else if strings.HasSuffix(strings.ToLower(imageURL), ".webp") {
contentType = "image/webp"
} else {
contentType = "image/jpeg" // Default
}
}
// Only accept image types
if !strings.HasPrefix(contentType, "image/") {
return nil
}
// Read image data (limit to 2MB to allow for resize headroom)
data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
if err != nil || len(data) == 0 {
return nil
}
// Decode image to get dimensions
imgConfig, _, err := image.DecodeConfig(bytes.NewReader(data))
width, height := 1, 1 // Default if decode fails
if err == nil {
width, height = imgConfig.Width, imgConfig.Height
}
// Bluesky blob limit is ~976KB, use 900KB as safe threshold
const maxBlobSize = 900 * 1024
// If image is too large, resize it
if len(data) > maxBlobSize {
// Decode the full image for resizing
img, _, err := image.Decode(bytes.NewReader(data))
if err != nil {
return nil // Can't decode, can't resize
}
// Scale down iteratively until under limit
scaleFactor := 0.9 // Start with 90% and iterate if needed
for attempt := 0; attempt < 5; attempt++ {
newWidth := int(float64(width) * scaleFactor)
newHeight := int(float64(height) * scaleFactor)
// Minimum dimensions
if newWidth < 100 {
newWidth = 100
}
if newHeight < 100 {
newHeight = 100
}
// Create resized image
resized := image.NewRGBA(image.Rect(0, 0, newWidth, newHeight))
draw.CatmullRom.Scale(resized, resized.Bounds(), img, img.Bounds(), draw.Over, nil)
// Encode as JPEG
var buf bytes.Buffer
if err := jpeg.Encode(&buf, resized, &jpeg.Options{Quality: 85}); err != nil {
return nil
}
if buf.Len() <= maxBlobSize {
data = buf.Bytes()
width = newWidth
height = newHeight
contentType = "image/jpeg"
break
}
// Still too large, reduce scale further
scaleFactor *= 0.8
}
// If still too large after 5 attempts, give up
if len(data) > maxBlobSize {
return nil
}
}
// Upload to PDS
blob, err := p.UploadBlob(session, data, contentType)
if err != nil {
return nil
}
return &ImageUploadResult{
Blob: blob,
Width: width,
Height: height,
}
}
// FetchFavicon downloads a favicon/icon from a URL
// Uses go.deanishe.net/favicon library to find the best icon
// Returns the favicon URL or empty string if not found
func FetchFaviconBytes(siteURL string) ([]byte, string, error) {
if !strings.HasPrefix(siteURL, "http") {
siteURL = "https://" + siteURL
}
u, err := url.Parse(siteURL)
if err != nil {
return nil, "", err
}
client := &http.Client{Timeout: 10 * time.Second}
// Use favicon library to find icons
finder := favicon.New(
favicon.WithClient(client),
favicon.IgnoreNoSize,
)
icons, err := finder.Find(siteURL)
if err != nil || len(icons) == 0 {
// Fallback to Google's favicon service
googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
return fetchIconBytes(client, googleURL)
}
// Try icons in order (sorted by size, largest first)
// Prefer PNG/JPEG over ICO
var iconURLs []string
for _, icon := range icons {
if icon.Width > 0 && icon.Width < 32 {
continue // Skip tiny icons
}
if icon.MimeType == "image/png" || icon.MimeType == "image/jpeg" {
iconURLs = append([]string{icon.URL}, iconURLs...) // Prepend PNG/JPEG
} else {
iconURLs = append(iconURLs, icon.URL)
}
}
// If no good icons, use all of them
if len(iconURLs) == 0 {
for _, icon := range icons {
iconURLs = append(iconURLs, icon.URL)
}
}
// Try to download each icon
for _, iconURL := range iconURLs {
data, mimeType, err := fetchIconBytes(client, iconURL)
if err == nil && len(data) > 0 {
return data, mimeType, nil
}
}
// Final fallback to Google
googleURL := fmt.Sprintf("https://www.google.com/s2/favicons?domain=%s&sz=128", u.Host)
return fetchIconBytes(client, googleURL)
}
// fetchIconBytes downloads an icon and returns its bytes and mime type
func fetchIconBytes(client *http.Client, iconURL string) ([]byte, string, error) {
resp, err := client.Get(iconURL)
if err != nil {
return nil, "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, "", err
}
// Determine mime type
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
if strings.HasSuffix(iconURL, ".png") {
contentType = "image/png"
} else if strings.HasSuffix(iconURL, ".ico") {
contentType = "image/x-icon"
} else {
contentType = "image/png"
}
}
return data, contentType, nil
}