Files
primal 3b5c4ddeb2 Add item status (pass/fail) support
- Filter unpublished items by status='pass' in publish loop
- Add DeletePost function to remove posts from PDS
- Add /api/setItemStatus endpoint to mark items pass/fail
- When marking fail, deletes the Bluesky post if it was published

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 15:46:27 -05:00

439 lines
9.8 KiB
Go

package main
import (
"bytes"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"time"
"github.com/1440news/commons"
)
// Publisher handles posting items to AT Protocol PDS
type Publisher struct {
pdsHost string
httpClient *http.Client
}
// PDSSession holds authentication info for a PDS account
type PDSSession struct {
DID string `json:"did"`
Handle string `json:"handle"`
AccessJwt string `json:"accessJwt"`
RefreshJwt string `json:"refreshJwt"`
}
// BskyPost represents an app.bsky.feed.post record
type BskyPost struct {
Type string `json:"$type"`
Text string `json:"text"`
CreatedAt string `json:"createdAt"`
Facets []BskyFacet `json:"facets,omitempty"`
Embed *BskyEmbed `json:"embed,omitempty"`
}
type BskyFacet struct {
Index BskyByteSlice `json:"index"`
Features []BskyFeature `json:"features"`
}
type BskyByteSlice struct {
ByteStart int `json:"byteStart"`
ByteEnd int `json:"byteEnd"`
}
type BskyFeature struct {
Type string `json:"$type"`
URI string `json:"uri,omitempty"`
Tag string `json:"tag,omitempty"`
}
type BskyEmbed struct {
Type string `json:"$type"`
External *BskyExternal `json:"external,omitempty"`
Images []BskyImage `json:"images,omitempty"`
}
type BskyExternal struct {
URI string `json:"uri"`
Title string `json:"title"`
Description string `json:"description"`
Thumb *BlobRef `json:"thumb,omitempty"`
}
type BskyImage struct {
Alt string `json:"alt"`
Image *BlobRef `json:"image"`
AspectRatio *BskyAspectRatio `json:"aspectRatio,omitempty"`
}
type BskyAspectRatio struct {
Width int `json:"width"`
Height int `json:"height"`
}
// NewPublisher creates a new Publisher instance
func NewPublisher(pdsHost string) *Publisher {
return &Publisher{
pdsHost: pdsHost,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// TID alphabet for base32-sortable encoding
const tidAlphabet = "234567abcdefghijklmnopqrstuvwxyz"
// GenerateRkey creates a deterministic TID-format rkey from a GUID and timestamp
func GenerateRkey(guid string, timestamp time.Time) string {
if guid == "" {
return ""
}
microsInt := timestamp.UnixMicro()
if microsInt < 0 {
microsInt = 0
}
micros := uint64(microsInt) & ((1 << 53) - 1)
hash := sha256.Sum256([]byte(guid))
h1 := uint64(hash[0]) ^ uint64(hash[2]) ^ uint64(hash[4]) ^ uint64(hash[6])
h2 := uint64(hash[1]) ^ uint64(hash[3]) ^ uint64(hash[5]) ^ uint64(hash[7])
clockID := (h1 << 2) | (h2 >> 6)
clockID = clockID & ((1 << 10) - 1)
tid := (micros << 10) | clockID
var result [13]byte
for i := 12; i >= 0; i-- {
result[i] = tidAlphabet[tid&0x1f]
tid >>= 5
}
return string(result[:])
}
// extractURLs finds all URLs in a string
func extractURLs(text string) []string {
urlRegex := regexp.MustCompile(`https?://[^\s<>"'\)]+`)
matches := urlRegex.FindAllString(text, -1)
var urls []string
for _, u := range matches {
u = strings.TrimRight(u, ".,;:!?")
if u != "" {
urls = append(urls, u)
}
}
return urls
}
// toCamelCaseTag converts a tag string to camelCase hashtag format
func toCamelCaseTag(tag string) string {
tag = strings.TrimSpace(tag)
if tag == "" {
return ""
}
tag = strings.TrimPrefix(tag, "#")
words := strings.FieldsFunc(tag, func(r rune) bool {
return r == ' ' || r == '-' || r == '_'
})
if len(words) == 0 {
return ""
}
if len(words) == 1 {
return strings.ToLower(words[0])
}
var result strings.Builder
for i, word := range words {
if word == "" {
continue
}
runes := []rune(word)
if len(runes) > 0 {
if i == 0 || result.Len() == 0 {
result.WriteString(strings.ToLower(word))
} else {
result.WriteString(strings.ToUpper(string(runes[0])))
if len(runes) > 1 {
result.WriteString(strings.ToLower(string(runes[1:])))
}
}
}
}
return result.String()
}
// formatTagsForPost converts item tags to hashtag text and facets
func formatTagsForPost(tags []string, textOffset int) (string, []BskyFacet) {
if len(tags) == 0 {
return "", nil
}
seen := make(map[string]bool)
var hashtags []string
for _, tag := range tags {
camel := toCamelCaseTag(tag)
if camel == "" || seen[strings.ToLower(camel)] {
continue
}
seen[strings.ToLower(camel)] = true
hashtags = append(hashtags, camel)
}
if len(hashtags) == 0 {
return "", nil
}
if len(hashtags) > 5 {
hashtags = hashtags[:5]
}
var line strings.Builder
var facets []BskyFacet
currentOffset := textOffset
for i, ht := range hashtags {
if i > 0 {
line.WriteString(" ")
currentOffset++
}
hashtagText := "#" + ht
byteStart := currentOffset
byteEnd := currentOffset + len(hashtagText)
line.WriteString(hashtagText)
facets = append(facets, BskyFacet{
Index: BskyByteSlice{
ByteStart: byteStart,
ByteEnd: byteEnd,
},
Features: []BskyFeature{{
Type: "app.bsky.richtext.facet#tag",
Tag: ht,
}},
})
currentOffset = byteEnd
}
return line.String(), facets
}
// PublishItem posts a feed item to the PDS
func (p *Publisher) PublishItem(session *PDSSession, item *commons.Item) (string, error) {
if item.GUID == "" && item.Link == "" {
return "", fmt.Errorf("item has no GUID or link, cannot publish")
}
urlSet := make(map[string]bool)
var allURLs []string
if item.Link != "" {
urlSet[item.Link] = true
allURLs = append(allURLs, item.Link)
}
descURLs := extractURLs(item.Description)
for _, u := range descURLs {
if strings.Contains(u, "news.ycombinator.com/item") && !urlSet[u] {
urlSet[u] = true
allURLs = append(allURLs, u)
break
}
}
if len(allURLs) < 2 && item.Enclosure != nil && item.Enclosure.URL != "" {
encType := strings.ToLower(item.Enclosure.Type)
if strings.HasPrefix(encType, "audio/") || strings.HasPrefix(encType, "video/") {
if !urlSet[item.Enclosure.URL] {
currentURLLen := 0
for _, u := range allURLs {
currentURLLen += len(u) + 2
}
enclosureLen := len(item.Enclosure.URL) + 2
if currentURLLen+enclosureLen < 235 {
urlSet[item.Enclosure.URL] = true
allURLs = append(allURLs, item.Enclosure.URL)
}
}
}
}
primaryURL := ""
if len(allURLs) > 0 {
primaryURL = allURLs[0]
}
createdAt := time.Now()
if !item.PubDate.IsZero() {
createdAt = item.PubDate
}
postText := ""
var facets []BskyFacet
if len(item.Tags) > 0 {
tagLine, tagFacets := formatTagsForPost(item.Tags, 0)
postText = tagLine
facets = tagFacets
}
post := BskyPost{
Type: "app.bsky.feed.post",
Text: postText,
CreatedAt: createdAt.Format(time.RFC3339),
Facets: facets,
}
if primaryURL != "" {
external := &BskyExternal{
URI: primaryURL,
Title: item.Title,
Description: truncate(stripHTML(item.Description), 300),
}
if len(item.ImageURLs) > 0 {
if thumb := p.fetchAndUploadImage(session, item.ImageURLs[0]); thumb != nil {
external.Thumb = thumb
}
}
post.Embed = &BskyEmbed{
Type: "app.bsky.embed.external",
External: external,
}
}
guidForRkey := item.GUID
if guidForRkey == "" {
guidForRkey = item.Link
}
rkeyTime := item.PubDate
if rkeyTime.IsZero() {
rkeyTime = item.DiscoveredAt
}
rkey := GenerateRkey(guidForRkey, rkeyTime)
payload := map[string]interface{}{
"repo": session.DID,
"collection": "app.bsky.feed.post",
"rkey": rkey,
"record": post,
}
body, err := json.Marshal(payload)
if err != nil {
return "", err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.createRecord", bytes.NewReader(body))
if err != nil {
return "", err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("create record failed: %s - %s", resp.Status, string(respBody))
}
var result struct {
URI string `json:"uri"`
CID string `json:"cid"`
}
if err := json.Unmarshal(respBody, &result); err != nil {
return "", err
}
return result.URI, nil
}
// DeletePost deletes a post from the PDS by its AT URI
// AT URI format: at://did:plc:xxxxx/app.bsky.feed.post/rkey
func (p *Publisher) DeletePost(session *PDSSession, atURI string) error {
// Parse the AT URI to extract rkey
// Format: at://did/collection/rkey
parts := strings.Split(atURI, "/")
if len(parts) < 5 {
return fmt.Errorf("invalid AT URI format: %s", atURI)
}
rkey := parts[len(parts)-1]
payload := map[string]interface{}{
"repo": session.DID,
"collection": "app.bsky.feed.post",
"rkey": rkey,
}
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequest("POST", p.pdsHost+"/xrpc/com.atproto.repo.deleteRecord", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+session.AccessJwt)
resp, err := p.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("delete record failed: %s - %s", resp.Status, string(respBody))
}
return nil
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen-3] + "..."
}
func stripHTML(s string) string {
tagRegex := regexp.MustCompile(`<[^>]*>`)
s = tagRegex.ReplaceAllString(s, "")
s = strings.ReplaceAll(s, "&amp;", "&")
s = strings.ReplaceAll(s, "&lt;", "<")
s = strings.ReplaceAll(s, "&gt;", ">")
s = strings.ReplaceAll(s, "&quot;", "\"")
s = strings.ReplaceAll(s, "&#39;", "'")
s = strings.ReplaceAll(s, "&nbsp;", " ")
spaceRegex := regexp.MustCompile(`\s+`)
s = spaceRegex.ReplaceAllString(s, " ")
return strings.TrimSpace(s)
}