diff --git a/crawler.go b/crawler.go index a0876f4..f0338b8 100644 --- a/crawler.go +++ b/crawler.go @@ -246,8 +246,25 @@ func (c *Crawler) StartPublishLoop() { sessions[account] = session } + // Shorten URLs before publishing + itemToPublish := item + if item.Link != "" { + if shortURL, err := c.GetShortURLForPost(item.Link, &item.ID, item.FeedURL); err == nil { + itemToPublish.Link = shortURL + } + } + if item.Enclosure != nil && item.Enclosure.URL != "" { + if shortURL, err := c.GetShortURLForPost(item.Enclosure.URL, &item.ID, item.FeedURL); err == nil { + itemToPublish.Enclosure = &Enclosure{ + URL: shortURL, + Type: item.Enclosure.Type, + Length: item.Enclosure.Length, + } + } + } + // Publish the item - uri, err := publisher.PublishItem(session, &item) + uri, err := publisher.PublishItem(session, &itemToPublish) if err != nil { fmt.Printf("Publish: failed item %d: %v\n", item.ID, err) // Clear session cache on auth errors diff --git a/dashboard.go b/dashboard.go index d382fef..c8f48ae 100644 --- a/dashboard.go +++ b/dashboard.go @@ -5,6 +5,7 @@ import ( "fmt" "html/template" "net/http" + "strings" "time" "github.com/jackc/pgx/v5" @@ -238,6 +239,12 @@ func (c *Crawler) StartDashboard(addr string) error { http.HandleFunc("/dashboard", func(w http.ResponseWriter, r *http.Request) { c.handleDashboard(w, r) }) + + // URL shortener redirect handler + http.HandleFunc("/r/", func(w http.ResponseWriter, r *http.Request) { + c.handleRedirect(w, r) + }) + http.HandleFunc("/api/stats", func(w http.ResponseWriter, r *http.Request) { c.handleAPIStats(w, r) }) @@ -2227,6 +2234,39 @@ func (c *Crawler) handleAPIStats(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(stats) } +// handleRedirect handles short URL redirects +func (c *Crawler) handleRedirect(w http.ResponseWriter, r *http.Request) { + // Extract code from path: /r/{code} + path := r.URL.Path + if !strings.HasPrefix(path, "/r/") { + http.NotFound(w, r) + return + } + + code := strings.TrimPrefix(path, "/r/") + if code == "" { + http.NotFound(w, r) + return + } + + // Look up the short URL + shortURL, err := c.GetShortURL(code) + if err != nil { + http.NotFound(w, r) + return + } + + // Record the click asynchronously + go func() { + if err := c.RecordClick(code, r); err != nil { + fmt.Printf("Failed to record click for %s: %v\n", code, err) + } + }() + + // Redirect to original URL + http.Redirect(w, r, shortURL.OriginalURL, http.StatusFound) +} + const dashboardHTML = ` diff --git a/db.go b/db.go index db3d740..25ad93b 100644 --- a/db.go +++ b/db.go @@ -129,6 +129,33 @@ CREATE INDEX IF NOT EXISTS idx_items_feed_url_pub_date ON items(feed_url, pub_da CREATE INDEX IF NOT EXISTS idx_items_unpublished ON items(feed_url, published_at) WHERE published_at IS NULL; CREATE INDEX IF NOT EXISTS idx_items_search ON items USING GIN(search_vector); +-- URL Shortener tables +CREATE TABLE IF NOT EXISTS short_urls ( + code TEXT PRIMARY KEY, + original_url TEXT NOT NULL, + item_id BIGINT REFERENCES items(id), + feed_url TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + click_count INTEGER DEFAULT 0 +); + +CREATE INDEX IF NOT EXISTS idx_short_urls_original ON short_urls(original_url); +CREATE INDEX IF NOT EXISTS idx_short_urls_item_id ON short_urls(item_id); +CREATE INDEX IF NOT EXISTS idx_short_urls_feed_url ON short_urls(feed_url); + +CREATE TABLE IF NOT EXISTS clicks ( + id BIGSERIAL PRIMARY KEY, + short_code TEXT NOT NULL REFERENCES short_urls(code), + clicked_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + referrer TEXT, + user_agent TEXT, + ip_hash TEXT, + country TEXT +); + +CREATE INDEX IF NOT EXISTS idx_clicks_short_code ON clicks(short_code); +CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON clicks(clicked_at DESC); + -- Trigger to normalize feed URLs on insert/update (strips https://, http://, www.) CREATE OR REPLACE FUNCTION normalize_feed_url() RETURNS TRIGGER AS $$ diff --git a/shortener.go b/shortener.go new file mode 100644 index 0000000..cf8a596 --- /dev/null +++ b/shortener.go @@ -0,0 +1,246 @@ +package main + +import ( + "crypto/sha256" + "encoding/base64" + "fmt" + "net/http" + "strings" + "time" +) + +// ShortURL represents a shortened URL mapping +type ShortURL struct { + Code string `json:"code"` + OriginalURL string `json:"original_url"` + ItemID *int64 `json:"item_id,omitempty"` + FeedURL string `json:"feed_url,omitempty"` + CreatedAt time.Time `json:"created_at"` + ClickCount int `json:"click_count"` +} + +// Click represents a click event on a short URL +type Click struct { + ID int64 `json:"id"` + ShortCode string `json:"short_code"` + ClickedAt time.Time `json:"clicked_at"` + Referrer string `json:"referrer,omitempty"` + UserAgent string `json:"user_agent,omitempty"` + IPHash string `json:"ip_hash,omitempty"` + Country string `json:"country,omitempty"` +} + +// Base62 alphabet for short codes +const base62Alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + +// generateShortCode creates a short code from a URL +// Uses first 6 characters of base64-encoded SHA256 hash +func generateShortCode(url string) string { + hash := sha256.Sum256([]byte(url)) + encoded := base64.URLEncoding.EncodeToString(hash[:]) + // Take first 6 chars, replace URL-unsafe chars + code := encoded[:6] + code = strings.ReplaceAll(code, "-", "x") + code = strings.ReplaceAll(code, "_", "y") + return code +} + +// CreateShortURL creates or retrieves a short URL for the given original URL +func (c *Crawler) CreateShortURL(originalURL string, itemID *int64, feedURL string) (*ShortURL, error) { + // Check if we already have this URL + var existing ShortURL + err := c.db.QueryRow(` + SELECT code, original_url, item_id, feed_url, created_at, click_count + FROM short_urls WHERE original_url = $1 + `, originalURL).Scan(&existing.Code, &existing.OriginalURL, &existing.ItemID, + &existing.FeedURL, &existing.CreatedAt, &existing.ClickCount) + + if err == nil { + return &existing, nil + } + + // Generate new short code + code := generateShortCode(originalURL) + + // Handle collision by appending counter + baseCode := code + for i := 0; i < 100; i++ { + if i > 0 { + code = fmt.Sprintf("%s%d", baseCode, i) + } + + var existingURL string + err := c.db.QueryRow("SELECT original_url FROM short_urls WHERE code = $1", code).Scan(&existingURL) + if err != nil { + // Code doesn't exist, use it + break + } + if existingURL == originalURL { + // Same URL, return existing + return c.GetShortURL(code) + } + // Collision with different URL, try next + } + + // Insert new short URL + now := time.Now() + _, err = c.db.Exec(` + INSERT INTO short_urls (code, original_url, item_id, feed_url, created_at, click_count) + VALUES ($1, $2, $3, $4, $5, 0) + `, code, originalURL, itemID, NullableString(feedURL), now) + + if err != nil { + return nil, fmt.Errorf("failed to create short URL: %v", err) + } + + return &ShortURL{ + Code: code, + OriginalURL: originalURL, + ItemID: itemID, + FeedURL: feedURL, + CreatedAt: now, + ClickCount: 0, + }, nil +} + +// GetShortURL retrieves a short URL by code +func (c *Crawler) GetShortURL(code string) (*ShortURL, error) { + var s ShortURL + var itemID *int64 + var feedURL *string + + err := c.db.QueryRow(` + SELECT code, original_url, item_id, feed_url, created_at, click_count + FROM short_urls WHERE code = $1 + `, code).Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount) + + if err != nil { + return nil, err + } + + s.ItemID = itemID + s.FeedURL = StringValue(feedURL) + return &s, nil +} + +// RecordClick records a click on a short URL +func (c *Crawler) RecordClick(code string, r *http.Request) error { + // Hash the IP for privacy + ip := r.RemoteAddr + if forwarded := r.Header.Get("X-Forwarded-For"); forwarded != "" { + ip = strings.Split(forwarded, ",")[0] + } + ipHash := fmt.Sprintf("%x", sha256.Sum256([]byte(ip)))[:16] + + referrer := r.Header.Get("Referer") + userAgent := r.Header.Get("User-Agent") + + // Truncate long values + if len(referrer) > 500 { + referrer = referrer[:500] + } + if len(userAgent) > 500 { + userAgent = userAgent[:500] + } + + // Insert click record + _, err := c.db.Exec(` + INSERT INTO clicks (short_code, clicked_at, referrer, user_agent, ip_hash) + VALUES ($1, $2, $3, $4, $5) + `, code, time.Now(), NullableString(referrer), NullableString(userAgent), ipHash) + + if err != nil { + return err + } + + // Increment click count + _, err = c.db.Exec(` + UPDATE short_urls SET click_count = click_count + 1 WHERE code = $1 + `, code) + + return err +} + +// GetShortURLForPost returns the short URL string for use in posts +// Format: https://1440.news/r/{code} +func (c *Crawler) GetShortURLForPost(originalURL string, itemID *int64, feedURL string) (string, error) { + shortURL, err := c.CreateShortURL(originalURL, itemID, feedURL) + if err != nil { + return "", err + } + return fmt.Sprintf("https://1440.news/r/%s", shortURL.Code), nil +} + +// GetClickStats returns click statistics for a short URL +func (c *Crawler) GetClickStats(code string) (int, error) { + var count int + err := c.db.QueryRow("SELECT click_count FROM short_urls WHERE code = $1", code).Scan(&count) + return count, err +} + +// GetRecentClicks returns recent clicks across all short URLs +func (c *Crawler) GetRecentClicks(limit int) ([]Click, error) { + rows, err := c.db.Query(` + SELECT id, short_code, clicked_at, referrer, user_agent, ip_hash, country + FROM clicks + ORDER BY clicked_at DESC + LIMIT $1 + `, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var clicks []Click + for rows.Next() { + var click Click + var referrer, userAgent, ipHash, country *string + + err := rows.Scan(&click.ID, &click.ShortCode, &click.ClickedAt, + &referrer, &userAgent, &ipHash, &country) + if err != nil { + continue + } + + click.Referrer = StringValue(referrer) + click.UserAgent = StringValue(userAgent) + click.IPHash = StringValue(ipHash) + click.Country = StringValue(country) + + clicks = append(clicks, click) + } + + return clicks, nil +} + +// GetTopShortURLs returns the most clicked short URLs +func (c *Crawler) GetTopShortURLs(limit int) ([]ShortURL, error) { + rows, err := c.db.Query(` + SELECT code, original_url, item_id, feed_url, created_at, click_count + FROM short_urls + ORDER BY click_count DESC + LIMIT $1 + `, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var urls []ShortURL + for rows.Next() { + var s ShortURL + var itemID *int64 + var feedURL *string + + err := rows.Scan(&s.Code, &s.OriginalURL, &itemID, &feedURL, &s.CreatedAt, &s.ClickCount) + if err != nil { + continue + } + + s.ItemID = itemID + s.FeedURL = StringValue(feedURL) + urls = append(urls, s) + } + + return urls, nil +}