Files
crawler/shortener.go
primal 3f277ec165 Remove item ID column references - items now use composite PK (guid, feed_url)
- Remove ID field from Item struct
- Remove ID field from SearchItem struct
- Update all SQL queries to not select id column
- Change MarkItemPublished to use feedURL/guid instead of id
- Update shortener to use item_guid instead of item_id
- Add migration to convert item_id to item_guid in short_urls table
- Update API endpoints to use feedUrl/guid instead of itemId

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 23:51:44 -05:00

250 lines
6.6 KiB
Go

package main
import (
"crypto/sha256"
"encoding/base64"
"fmt"
"net/http"
"strings"
"time"
)
// ShortURL represents a shortened URL mapping
type ShortURL struct {
Code string `json:"code"`
OriginalURL string `json:"original_url"`
ItemGUID string `json:"item_guid,omitempty"`
FeedURL string `json:"feed_url,omitempty"`
CreatedAt time.Time `json:"created_at"`
ClickCount int `json:"click_count"`
}
// Click represents a click event on a short URL
type Click struct {
ID int64 `json:"id"`
ShortCode string `json:"short_code"`
ClickedAt time.Time `json:"clicked_at"`
Referrer string `json:"referrer,omitempty"`
UserAgent string `json:"user_agent,omitempty"`
IPHash string `json:"ip_hash,omitempty"`
Country string `json:"country,omitempty"`
}
// Base62 alphabet for short codes
const base62Alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
// generateShortCode creates a short code from a URL
// Uses first 6 characters of base64-encoded SHA256 hash
func generateShortCode(url string) string {
hash := sha256.Sum256([]byte(url))
encoded := base64.URLEncoding.EncodeToString(hash[:])
// Take first 6 chars, replace URL-unsafe chars
code := encoded[:6]
code = strings.ReplaceAll(code, "-", "x")
code = strings.ReplaceAll(code, "_", "y")
return code
}
// CreateShortURL creates or retrieves a short URL for the given original URL
func (c *Crawler) CreateShortURL(originalURL string, itemGUID, feedURL string) (*ShortURL, error) {
// Check if we already have this URL
var existing ShortURL
var itemGUIDPtr, feedURLPtr *string
err := c.db.QueryRow(`
SELECT code, original_url, item_guid, feed_url, created_at, click_count
FROM short_urls WHERE original_url = $1
`, originalURL).Scan(&existing.Code, &existing.OriginalURL, &itemGUIDPtr,
&feedURLPtr, &existing.CreatedAt, &existing.ClickCount)
if err == nil {
existing.ItemGUID = StringValue(itemGUIDPtr)
existing.FeedURL = StringValue(feedURLPtr)
return &existing, nil
}
// Generate new short code
code := generateShortCode(originalURL)
// Handle collision by appending counter
baseCode := code
for i := 0; i < 100; i++ {
if i > 0 {
code = fmt.Sprintf("%s%d", baseCode, i)
}
var existingURL string
err := c.db.QueryRow("SELECT original_url FROM short_urls WHERE code = $1", code).Scan(&existingURL)
if err != nil {
// Code doesn't exist, use it
break
}
if existingURL == originalURL {
// Same URL, return existing
return c.GetShortURL(code)
}
// Collision with different URL, try next
}
// Insert new short URL
now := time.Now()
_, err = c.db.Exec(`
INSERT INTO short_urls (code, original_url, item_guid, feed_url, created_at, click_count)
VALUES ($1, $2, $3, $4, $5, 0)
`, code, originalURL, NullableString(itemGUID), NullableString(feedURL), now)
if err != nil {
return nil, fmt.Errorf("failed to create short URL: %v", err)
}
return &ShortURL{
Code: code,
OriginalURL: originalURL,
ItemGUID: itemGUID,
FeedURL: feedURL,
CreatedAt: now,
ClickCount: 0,
}, nil
}
// GetShortURL retrieves a short URL by code
func (c *Crawler) GetShortURL(code string) (*ShortURL, error) {
var s ShortURL
var itemGUID *string
var feedURL *string
err := c.db.QueryRow(`
SELECT code, original_url, item_guid, feed_url, created_at, click_count
FROM short_urls WHERE code = $1
`, code).Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)
if err != nil {
return nil, err
}
s.ItemGUID = StringValue(itemGUID)
s.FeedURL = StringValue(feedURL)
return &s, nil
}
// RecordClick records a click on a short URL
func (c *Crawler) RecordClick(code string, r *http.Request) error {
// Hash the IP for privacy
ip := r.RemoteAddr
if forwarded := r.Header.Get("X-Forwarded-For"); forwarded != "" {
ip = strings.Split(forwarded, ",")[0]
}
ipHash := fmt.Sprintf("%x", sha256.Sum256([]byte(ip)))[:16]
referrer := r.Header.Get("Referer")
userAgent := r.Header.Get("User-Agent")
// Truncate long values
if len(referrer) > 500 {
referrer = referrer[:500]
}
if len(userAgent) > 500 {
userAgent = userAgent[:500]
}
// Insert click record
_, err := c.db.Exec(`
INSERT INTO clicks (short_code, clicked_at, referrer, user_agent, ip_hash)
VALUES ($1, $2, $3, $4, $5)
`, code, time.Now(), NullableString(referrer), NullableString(userAgent), ipHash)
if err != nil {
return err
}
// Increment click count
_, err = c.db.Exec(`
UPDATE short_urls SET click_count = click_count + 1 WHERE code = $1
`, code)
return err
}
// GetShortURLForPost returns the short URL string for use in posts
// Format: https://url.1440.news/{code}
func (c *Crawler) GetShortURLForPost(originalURL, itemGUID, feedURL string) (string, error) {
shortURL, err := c.CreateShortURL(originalURL, itemGUID, feedURL)
if err != nil {
return "", err
}
return fmt.Sprintf("https://url.1440.news/%s", shortURL.Code), nil
}
// GetClickStats returns click statistics for a short URL
func (c *Crawler) GetClickStats(code string) (int, error) {
var count int
err := c.db.QueryRow("SELECT click_count FROM short_urls WHERE code = $1", code).Scan(&count)
return count, err
}
// GetRecentClicks returns recent clicks across all short URLs
func (c *Crawler) GetRecentClicks(limit int) ([]Click, error) {
rows, err := c.db.Query(`
SELECT id, short_code, clicked_at, referrer, user_agent, ip_hash, country
FROM clicks
ORDER BY clicked_at DESC
LIMIT $1
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var clicks []Click
for rows.Next() {
var click Click
var referrer, userAgent, ipHash, country *string
err := rows.Scan(&click.ID, &click.ShortCode, &click.ClickedAt,
&referrer, &userAgent, &ipHash, &country)
if err != nil {
continue
}
click.Referrer = StringValue(referrer)
click.UserAgent = StringValue(userAgent)
click.IPHash = StringValue(ipHash)
click.Country = StringValue(country)
clicks = append(clicks, click)
}
return clicks, nil
}
// GetTopShortURLs returns the most clicked short URLs
func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) {
rows, err := c.db.Query(`
SELECT code, original_url, item_guid, feed_url, created_at, click_count
FROM short_urls
ORDER BY click_count DESC
LIMIT $1
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var urls []ShortURL
for rows.Next() {
var s ShortURL
var itemGUID *string
var feedURL *string
err := rows.Scan(&s.Code, &s.OriginalURL, &itemGUID, &feedURL, &s.CreatedAt, &s.ClickCount)
if err != nil {
continue
}
s.ItemGUID = StringValue(itemGUID)
s.FeedURL = StringValue(feedURL)
urls = append(urls, s)
}
return urls, nil
}