Initial commit: shared Go module for 1440.news services

Contains: - db.go: Database connection wrapper with helper methods - models.go: Domain, Feed, Item, ShortURL, Click structs - util.go: URL normalization, TLD functions, search helpers - handle.go: AT Protocol handle derivation from feed URLs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 11:19:04 -05:00
commit 1ff14ee957
6 changed files with 880 additions and 0 deletions
@@ -0,0 +1,156 @@
+package shared
+
+import (
+	"context"
+	"fmt"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/jackc/pgx/v5"
+	"github.com/jackc/pgx/v5/pgxpool"
+)
+
+// DB wraps pgxpool.Pool with helper methods
+type DB struct {
+	*pgxpool.Pool
+}
+
+// OpenDatabase connects to PostgreSQL using environment variables or connection string
+func OpenDatabase(connString string) (*DB, error) {
+	fmt.Printf("Connecting to database...\n")
+
+	// If connection string not provided, try environment variables
+	if connString == "" {
+		connString = os.Getenv("DATABASE_URL")
+	}
+	if connString == "" {
+		// Build from individual env vars
+		host := GetEnvOrDefault("DB_HOST", "infra-postgres")
+		port := GetEnvOrDefault("DB_PORT", "5432")
+		user := GetEnvOrDefault("DB_USER", "dba_1440_news")
+		dbname := GetEnvOrDefault("DB_NAME", "db_1440_news")
+
+		// Support Docker secrets (password file) or direct password
+		password := os.Getenv("DB_PASSWORD")
+		if password == "" {
+			if passwordFile := os.Getenv("DB_PASSWORD_FILE"); passwordFile != "" {
+				data, err := os.ReadFile(passwordFile)
+				if err != nil {
+					return nil, fmt.Errorf("failed to read password file: %v", err)
+				}
+				password = strings.TrimSpace(string(data))
+			}
+		}
+
+		connString = fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable",
+			user, url.QueryEscape(password), host, port, dbname)
+	}
+
+	config, err := pgxpool.ParseConfig(connString)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse connection string: %v", err)
+	}
+
+	// Connection pool settings
+	config.MaxConns = 10
+	config.MinConns = 0 // Don't pre-create connections to avoid schema race conditions
+	config.MaxConnLifetime = 5 * time.Minute
+	config.MaxConnIdleTime = 1 * time.Minute
+
+	ctx := context.Background()
+	pool, err := pgxpool.NewWithConfig(ctx, config)
+	if err != nil {
+		return nil, fmt.Errorf("failed to connect to database: %v", err)
+	}
+
+	// Verify connection
+	if err := pool.Ping(ctx); err != nil {
+		pool.Close()
+		return nil, fmt.Errorf("failed to ping database: %v", err)
+	}
+	fmt.Println("  Connected to PostgreSQL")
+
+	return &DB{pool}, nil
+}
+
+// GetEnvOrDefault returns environment variable value or default
+func GetEnvOrDefault(key, defaultVal string) string {
+	if val := os.Getenv(key); val != "" {
+		return val
+	}
+	return defaultVal
+}
+
+// QueryRow wraps pool.QueryRow for compatibility
+func (db *DB) QueryRow(query string, args ...interface{}) pgx.Row {
+	return db.Pool.QueryRow(context.Background(), query, args...)
+}
+
+// Query wraps pool.Query for compatibility
+func (db *DB) Query(query string, args ...interface{}) (pgx.Rows, error) {
+	return db.Pool.Query(context.Background(), query, args...)
+}
+
+// Exec wraps pool.Exec for compatibility
+func (db *DB) Exec(query string, args ...interface{}) (int64, error) {
+	result, err := db.Pool.Exec(context.Background(), query, args...)
+	if err != nil {
+		return 0, err
+	}
+	return result.RowsAffected(), nil
+}
+
+// Begin starts a transaction
+func (db *DB) Begin() (pgx.Tx, error) {
+	return db.Pool.Begin(context.Background())
+}
+
+// Close closes the connection pool
+func (db *DB) Close() error {
+	db.Pool.Close()
+	return nil
+}
+
+// NullableString returns nil for empty strings, otherwise the string pointer
+func NullableString(s string) *string {
+	if s == "" {
+		return nil
+	}
+	return &s
+}
+
+// NullableTime returns nil for zero times, otherwise the time pointer
+func NullableTime(t time.Time) *time.Time {
+	if t.IsZero() {
+		return nil
+	}
+	return &t
+}
+
+// StringValue returns empty string for nil, otherwise the dereferenced value
+func StringValue(s *string) string {
+	if s == nil {
+		return ""
+	}
+	return *s
+}
+
+// TimeValue returns zero time for nil, otherwise the dereferenced value
+func TimeValue(t *time.Time) time.Time {
+	if t == nil {
+		return time.Time{}
+	}
+	return *t
+}
+
+// ToSearchQuery converts a user query to PostgreSQL tsquery format
+func ToSearchQuery(query string) string {
+	// Simple conversion: split on spaces and join with &
+	words := strings.Fields(query)
+	if len(words) == 0 {
+		return ""
+	}
+	return strings.Join(words, " & ")
+}
@@ -0,0 +1,14 @@
+module github.com/1440news/shared
+
+go 1.24.0
+
+require github.com/jackc/pgx/v5 v5.7.5
+
+require (
+	github.com/jackc/pgpassfile v1.0.0 // indirect
+	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
+	github.com/jackc/puddle/v2 v2.2.2 // indirect
+	golang.org/x/crypto v0.47.0 // indirect
+	golang.org/x/sync v0.19.0 // indirect
+	golang.org/x/text v0.33.0 // indirect
+)
@@ -0,0 +1,28 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.7.5 h1:JHGfMnQY+IEtGM63d+NGMjoRpysB2JBwDr5fsngwmJs=
+github.com/jackc/pgx/v5 v5.7.5/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
+github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
+github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8=
+golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A=
+golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
+golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -0,0 +1,262 @@
+package shared
+
+import (
+	"net/url"
+	"regexp"
+	"strings"
+)
+
+// DeriveHandleFromFeed generates an AT Protocol handle from a feed URL
+// Format: {domain}-{category}.1440.news
+// AT Protocol allows up to 63 characters per label, but the PDS
+// restricts the first segment to 18 characters for local handles.
+// Examples:
+//
+//	feeds.bbci.co.uk/news/technology/rss.xml → bbc-technology.1440.news
+//	news.ycombinator.com/rss → ycombinator.1440.news
+func DeriveHandleFromFeed(feedURL string) string {
+	const maxSubdomainLen = 18 // PDS limit for first segment
+
+	// Ensure we have a scheme for parsing
+	if !strings.Contains(feedURL, "://") {
+		feedURL = "https://" + feedURL
+	}
+
+	u, err := url.Parse(feedURL)
+	if err != nil {
+		return ""
+	}
+
+	hostname := strings.ToLower(u.Hostname())
+	path := strings.ToLower(u.Path)
+
+	// Remove common feed suffixes/extensions
+	suffixesToRemove := []string{".xml", ".rss", ".atom", ".json", "/rss", "/feed", "/atom", "/index"}
+	for _, suffix := range suffixesToRemove {
+		path = strings.TrimSuffix(path, suffix)
+	}
+
+	// Split path into segments and filter noise
+	segments := strings.Split(strings.Trim(path, "/"), "/")
+	skipPathWords := map[string]bool{
+		"rss": true, "feed": true, "feeds": true, "atom": true,
+		"xml": true, "default": true, "index": true, "services": true,
+		"nyt": true,
+	}
+
+	var pathParts []string
+	for _, seg := range segments {
+		seg = cleanHandleSegment(seg)
+		if seg != "" && !skipPathWords[seg] {
+			pathParts = append(pathParts, seg)
+		}
+	}
+
+	// Split hostname and extract the meaningful domain
+	hostParts := strings.Split(hostname, ".")
+
+	// Two-part TLDs to handle specially
+	twoPartTLDs := map[string]bool{
+		"co.uk": true, "com.au": true, "co.nz": true, "co.jp": true,
+		"com.br": true, "co.in": true, "org.uk": true, "ac.uk": true,
+	}
+
+	// Check for two-part TLD
+	if len(hostParts) >= 2 {
+		possibleTwoPartTLD := hostParts[len(hostParts)-2] + "." + hostParts[len(hostParts)-1]
+		if twoPartTLDs[possibleTwoPartTLD] {
+			hostParts = hostParts[:len(hostParts)-2]
+		} else {
+			// Single TLD - remove it
+			singleTLDs := map[string]bool{
+				"com": true, "org": true, "net": true, "io": true,
+				"edu": true, "gov": true, "uk": true, "de": true, "fr": true,
+			}
+			if singleTLDs[hostParts[len(hostParts)-1]] {
+				hostParts = hostParts[:len(hostParts)-1]
+			}
+		}
+	}
+
+	// Skip noise subdomains
+	skipHostWords := map[string]bool{
+		"www": true, "feeds": true, "rss": true, "feed": true,
+		"api": true, "cdn": true, "static": true, "news": true,
+	}
+
+	var meaningfulHostParts []string
+	for _, part := range hostParts {
+		if !skipHostWords[part] && part != "" {
+			meaningfulHostParts = append(meaningfulHostParts, part)
+		}
+	}
+
+	// Get the main domain (e.g., "bbci", "ycombinator", "nytimes")
+	var mainDomain string
+	if len(meaningfulHostParts) > 0 {
+		mainDomain = meaningfulHostParts[len(meaningfulHostParts)-1]
+	} else if len(hostParts) > 0 {
+		mainDomain = hostParts[len(hostParts)-1]
+	}
+
+	// Special case: "bbci" should become "bbc"
+	if mainDomain == "bbci" {
+		mainDomain = "bbc"
+	}
+
+	// Abbreviations for long category names to fit 18-char limit
+	categoryAbbrevs := map[string]string{
+		"science-and-environment": "sci-env",
+		"entertainment-and-arts":  "ent-arts",
+		"science-environment":     "sci-env",
+		"entertainment-arts":      "ent-arts",
+		"technology":              "tech",
+		"business":                "biz",
+		"international":           "intl",
+		"environment":             "env",
+		"entertainment":           "ent",
+		"politics":                "pol",
+	}
+
+	// Build subdomain: domain + category (from path)
+	var subdomain string
+	if len(pathParts) > 0 {
+		// Use last meaningful path part as category (e.g., "technology" from /news/technology/)
+		category := pathParts[len(pathParts)-1]
+		// Skip generic categories
+		if category == "news" && len(pathParts) == 1 {
+			subdomain = mainDomain
+		} else {
+			// Try to abbreviate if the full subdomain would be too long
+			fullSubdomain := mainDomain + "-" + category
+			if len(fullSubdomain) > maxSubdomainLen {
+				if abbrev, ok := categoryAbbrevs[category]; ok {
+					category = abbrev
+				}
+			}
+			subdomain = mainDomain + "-" + category
+		}
+	} else {
+		subdomain = mainDomain
+	}
+
+	// If still too long, just use main hostname
+	if len(subdomain) > maxSubdomainLen {
+		subdomain = mainDomain
+	}
+
+	// Final safety: truncate if still too long
+	if len(subdomain) > maxSubdomainLen {
+		subdomain = subdomain[:maxSubdomainLen]
+	}
+
+	subdomain = strings.Trim(subdomain, "-")
+
+	// Collapse multiple hyphens
+	for strings.Contains(subdomain, "--") {
+		subdomain = strings.ReplaceAll(subdomain, "--", "-")
+	}
+
+	return subdomain + ".1440.news"
+}
+
+// cleanHandleSegment sanitizes a string for use in an AT Protocol handle segment
+// Handle segments must be alphanumeric with hyphens, no leading/trailing hyphens
+func cleanHandleSegment(s string) string {
+	// Remove file extensions
+	if idx := strings.LastIndex(s, "."); idx > 0 {
+		s = s[:idx]
+	}
+
+	// Convert to lowercase
+	s = strings.ToLower(s)
+
+	// Strip common feed prefixes/suffixes from the segment itself
+	// e.g., "showrss" → "show", "rssworld" → "world"
+	feedAffixes := []string{"rss", "feed", "atom", "xml"}
+	for _, affix := range feedAffixes {
+		// Strip suffix (e.g., "showrss" → "show")
+		if strings.HasSuffix(s, affix) && len(s) > len(affix) {
+			s = strings.TrimSuffix(s, affix)
+			break
+		}
+		// Strip prefix (e.g., "rssworld" → "world")
+		if strings.HasPrefix(s, affix) && len(s) > len(affix) {
+			s = strings.TrimPrefix(s, affix)
+			break
+		}
+	}
+
+	// Replace underscores and other separators with hyphens
+	s = strings.ReplaceAll(s, "_", "-")
+	s = strings.ReplaceAll(s, " ", "-")
+
+	// Remove any characters that aren't alphanumeric or hyphens
+	reg := regexp.MustCompile(`[^a-z0-9-]`)
+	s = reg.ReplaceAllString(s, "")
+
+	// Collapse multiple hyphens
+	for strings.Contains(s, "--") {
+		s = strings.ReplaceAll(s, "--", "-")
+	}
+
+	// Trim leading/trailing hyphens
+	s = strings.Trim(s, "-")
+
+	return s
+}
+
+// SplitHandle extracts the path prefix and hostname from a derived handle
+// Example: show.news.ycombinator.com.1440.news → ("show", "news.ycombinator.com")
+func SplitHandle(handle string) (prefix string, hostname string) {
+	// Remove .1440.news suffix
+	handle = strings.TrimSuffix(handle, ".1440.news")
+
+	parts := strings.Split(handle, ".")
+
+	// Try to find where hostname starts by looking for valid hostname patterns
+	if len(parts) >= 2 {
+		for i := 0; i < len(parts)-1; i++ {
+			remaining := strings.Join(parts[i:], ".")
+			if looksLikeHostname(remaining) {
+				if i > 0 {
+					prefix = strings.Join(parts[:i], ".")
+				}
+				hostname = remaining
+				return
+			}
+		}
+	}
+
+	// Fallback: no prefix, entire thing is hostname
+	hostname = handle
+	return "", hostname
+}
+
+func isLikelyTLDPart(s string) bool {
+	tlds := map[string]bool{
+		"com": true, "org": true, "net": true, "edu": true, "gov": true,
+		"io": true, "co": true, "uk": true, "de": true, "fr": true,
+		"jp": true, "au": true, "ca": true, "nl": true, "se": true,
+		"news": true, "blog": true, "tech": true, "dev": true,
+	}
+	return tlds[s]
+}
+
+func isTwoPartTLD(first, second string) bool {
+	twoPartTLDs := map[string]bool{
+		"co.uk": true, "com.au": true, "co.jp": true, "co.nz": true,
+		"org.uk": true, "net.au": true, "com.br": true,
+	}
+	return twoPartTLDs[first+"."+second]
+}
+
+func looksLikeHostname(s string) bool {
+	// A hostname typically has at least one dot and ends with a TLD-like part
+	parts := strings.Split(s, ".")
+	if len(parts) < 2 {
+		return false
+	}
+	lastPart := parts[len(parts)-1]
+	return isLikelyTLDPart(lastPart)
+}
@@ -0,0 +1,178 @@
+package shared
+
+import (
+	"time"
+)
+
+// Domain represents a host to process for feeds
+// Status: hold (pending review), pass (approved), skip (not processing), dead (retired TLD)
+// CrawledAt: zero time = needs domain_check, +1 sec = needs feed_crawl, real time = done
+type Domain struct {
+	Host       string    `json:"host"`
+	Status     string    `json:"status"`
+	CrawledAt  time.Time `json:"crawled_at"`
+	FeedsFound int       `json:"feeds_found,omitempty"`
+	LastError  string    `json:"last_error,omitempty"`
+	TLD        string    `json:"tld,omitempty"`
+	MissCount  int       `json:"miss_count,omitempty"`
+}
+
+// MissCountThreshold is the number of consecutive errors before setting status to hold
+const MissCountThreshold = 100
+
+// Sentinel values for domain processing state
+var (
+	DomainStateUnchecked = time.Time{}                  // 0001-01-01 00:00:00 - needs domain_check
+	DomainStateChecked   = time.Time{}.Add(time.Second) // 0001-01-01 00:00:01 - needs feed_crawl
+)
+
+// FullHost returns the complete hostname (host + tld)
+func (d *Domain) FullHost() string {
+	return FullHost(d.Host, d.TLD)
+}
+
+// Feed represents a discovered RSS/Atom feed with metadata
+type Feed struct {
+	URL         string `json:"url"`
+	Type        string `json:"type"`     // "rss", "atom", "json", or "unknown"
+	Category    string `json:"category"` // "main", "comments", "category", "author", "article", "podcast"
+	Title       string `json:"title,omitempty"`
+	Description string `json:"description,omitempty"`
+	Language    string `json:"language,omitempty"`
+	SiteURL     string `json:"site_url,omitempty"` // The website the feed belongs to
+
+	// Timing
+	DiscoveredAt  time.Time `json:"discovered_at"`
+	LastCheckedAt time.Time `json:"last_checked_at,omitempty"` // feed_check: when last checked
+	NextCheckAt   time.Time `json:"next_check_at,omitempty"`   // feed_check: when to next check
+	LastBuildDate time.Time `json:"last_build_date,omitempty"` // From feed's lastBuildDate/updated
+
+	// Cache headers for conditional requests
+	ETag         string `json:"etag,omitempty"`
+	LastModified string `json:"last_modified,omitempty"`
+
+	// Health tracking
+	Status      string    `json:"status"` // "pass", "hold", "skip"
+	LastError   string    `json:"last_error,omitempty"`
+	LastErrorAt time.Time `json:"last_error_at,omitempty"`
+
+	// Discovery source
+	SourceURL  string `json:"source_url,omitempty"`
+	DomainHost string `json:"domain_host,omitempty"`
+	DomainTLD  string `json:"domain_tld,omitempty"`
+
+	// Content stats
+	ItemCount      int       `json:"item_count,omitempty"` // Number of items in last feed_check
+	OldestItemDate time.Time `json:"oldest_item_date,omitempty"`
+	NewestItemDate time.Time `json:"newest_item_date,omitempty"`
+
+	// Adaptive check interval
+	NoUpdate int `json:"no_update"` // Consecutive checks with no change
+
+	// Publishing to PDS
+	PublishStatus  string `json:"publish_status"`            // "hold", "pass", "skip"
+	PublishAccount string `json:"publish_account,omitempty"` // e.g., "news.ycombinator.com.1440.news"
+}
+
+// Enclosure represents a media attachment (audio, video, image)
+type Enclosure struct {
+	URL    string `json:"url"`
+	Type   string `json:"type"`   // MIME type (audio/mpeg, image/jpeg, etc.)
+	Length int64  `json:"length"` // Size in bytes
+}
+
+// Item represents an individual entry/article from a feed
+type Item struct {
+	FeedURL      string    `json:"feed_url"`
+	GUID         string    `json:"guid,omitempty"`
+	Title        string    `json:"title,omitempty"`
+	Link         string    `json:"link,omitempty"`
+	Description  string    `json:"description,omitempty"`
+	Content      string    `json:"content,omitempty"`
+	Author       string    `json:"author,omitempty"`
+	PubDate      time.Time `json:"pub_date,omitempty"`
+	DiscoveredAt time.Time `json:"discovered_at"`
+	UpdatedAt    time.Time `json:"updated_at,omitempty"`
+
+	// Media attachments
+	Enclosure *Enclosure `json:"enclosure,omitempty"`  // Primary enclosure (podcast audio, etc.)
+	ImageURLs []string   `json:"image_urls,omitempty"` // Image URLs extracted from content
+	Tags      []string   `json:"tags,omitempty"`       // Category/tag strings from feed
+
+	// Publishing to PDS
+	PublishedAt  time.Time `json:"published_at,omitempty"`
+	PublishedUri string    `json:"published_uri,omitempty"`
+}
+
+// ShortURL represents a shortened URL mapping
+type ShortURL struct {
+	Code        string    `json:"code"`
+	OriginalURL string    `json:"original_url"`
+	ItemGUID    string    `json:"item_guid,omitempty"`
+	FeedURL     string    `json:"feed_url,omitempty"`
+	CreatedAt   time.Time `json:"created_at"`
+	ClickCount  int       `json:"click_count"`
+}
+
+// Click represents a click event on a short URL
+type Click struct {
+	ID        int64     `json:"id"`
+	ShortCode string    `json:"short_code"`
+	ClickedAt time.Time `json:"clicked_at"`
+	Referrer  string    `json:"referrer,omitempty"`
+	UserAgent string    `json:"user_agent,omitempty"`
+	IPHash    string    `json:"ip_hash,omitempty"`
+	Country   string    `json:"country,omitempty"`
+}
+
+// DashboardStats holds all statistics for the dashboard
+type DashboardStats struct {
+	// Domain stats
+	TotalDomains int `json:"total_domains"`
+	HoldDomains  int `json:"hold_domains"`
+	PassDomains  int `json:"pass_domains"`
+	SkipDomains  int `json:"skip_domains"`
+	DeadDomains  int `json:"dead_domains"`
+
+	// Feed stats
+	TotalFeeds   int `json:"total_feeds"`
+	AliveFeeds   int `json:"alive_feeds"`   // status='pass' (healthy feeds)
+	PublishFeeds int `json:"publish_feeds"` // publish_status='pass' (approved for publishing)
+	SkipFeeds    int `json:"skip_feeds"`
+	HoldFeeds    int `json:"hold_feeds"`
+	DeadFeeds    int `json:"dead_feeds"`
+	EmptyFeeds   int `json:"empty_feeds"`
+	RSSFeeds     int `json:"rss_feeds"`
+	AtomFeeds    int `json:"atom_feeds"`
+	JSONFeeds    int `json:"json_feeds"`
+	UnknownFeeds int `json:"unknown_feeds"`
+
+	// Processing rates (per minute)
+	DomainsCrawled  int32 `json:"domains_crawled"`   // feed_crawl count
+	DomainCheckRate int   `json:"domain_check_rate"` // domain_check per minute
+	FeedCrawlRate   int   `json:"feed_crawl_rate"`   // feed_crawl per minute
+	FeedCheckRate   int   `json:"feed_check_rate"`   // feed_check per minute
+
+	// Timing
+	UpdatedAt time.Time `json:"updated_at"`
+}
+
+// TLDStat holds TLD statistics
+type TLDStat struct {
+	TLD   string `json:"tld"`
+	Count int    `json:"count"`
+}
+
+// DomainStat holds domain statistics
+type DomainStat struct {
+	Host       string `json:"host"`
+	FeedsFound int    `json:"feeds_found"`
+}
+
+// FeedInfo holds basic feed metadata for profile setup
+type FeedInfo struct {
+	Title       string
+	Description string
+	SiteURL     string
+	SourceHost  string
+}
@@ -0,0 +1,242 @@
+package shared
+
+import (
+	"net/url"
+	"strings"
+)
+
+// NormalizeURL strips scheme (http/https) and www. prefix to save storage space.
+// The normalized URL can be reconstructed with https:// for fetching.
+func NormalizeURL(rawURL string) string {
+	// Remove scheme
+	u := rawURL
+	if strings.HasPrefix(u, "https://") {
+		u = u[8:]
+	} else if strings.HasPrefix(u, "http://") {
+		u = u[7:]
+	}
+
+	// Remove www. prefix
+	if strings.HasPrefix(u, "www.") {
+		u = u[4:]
+	}
+
+	return u
+}
+
+// NormalizeHost strips www. prefix from a hostname for canonical storage
+func NormalizeHost(host string) string {
+	if strings.HasPrefix(host, "www.") {
+		return host[4:]
+	}
+	return host
+}
+
+// ReverseHost converts a reverse domain notation back to normal
+// e.g., "com.example.www" -> "www.example.com"
+func ReverseHost(reverseHost string) string {
+	parts := strings.Split(reverseHost, ".")
+	for i, j := 0, len(parts)-1; i < j; i, j = i+1, j-1 {
+		parts[i], parts[j] = parts[j], parts[i]
+	}
+	return strings.Join(parts, ".")
+}
+
+// GetTLD extracts the TLD from a hostname
+func GetTLD(host string) string {
+	parts := strings.Split(host, ".")
+	if len(parts) > 0 {
+		return parts[len(parts)-1]
+	}
+	return ""
+}
+
+// StripTLD removes the TLD suffix from a hostname
+// e.g., "example.com" -> "example", "sub.example.com" -> "sub.example"
+func StripTLD(host string) string {
+	idx := strings.LastIndex(host, ".")
+	if idx > 0 {
+		return host[:idx]
+	}
+	return host
+}
+
+// GetDomainHost extracts the host part from a full domain (without TLD)
+// e.g., "npr.org" -> "npr", "bbc.co.uk" -> "bbc.co"
+func GetDomainHost(domain string) string {
+	return StripTLD(domain)
+}
+
+// FullHost reconstructs the full hostname from host and tld
+// e.g., ("example", "com") -> "example.com"
+func FullHost(host, tld string) string {
+	if tld == "" {
+		return host
+	}
+	return host + "." + tld
+}
+
+// MakeAbsoluteURL resolves a relative URL against a base URL
+func MakeAbsoluteURL(href, baseURL string) string {
+	base, err := url.Parse(baseURL)
+	if err != nil {
+		return href
+	}
+
+	link, err := url.Parse(href)
+	if err != nil {
+		return href
+	}
+
+	return base.ResolveReference(link).String()
+}
+
+// SearchQuery represents a parsed search with optional type prefix
+type SearchQuery struct {
+	Type       string // "all", "domain", "url", "title", "description", "item"
+	Pattern    string // the search pattern (without prefix)
+	ExactMatch bool   // for domain searches: true if TLD was specified (d:npr.org matches exactly)
+	// For "all" type searches that look like domains, these are populated for additional exact matching
+	DomainHost string // e.g., "npr" from "npr.org"
+	DomainTLD  string // e.g., "org" from "npr.org"
+}
+
+// ParseSearchPrefix parses search prefixes like "a:", "d:", "f:", "t:", "s:", "i:"
+// Returns SearchQuery with Type and Pattern
+// Types: "all" (default or a: prefix), "domain" (d:, extracts TLD from pattern),
+//
+//	"url" (f:), "title" (t:), "description" (s:), "item" (i:)
+func ParseSearchPrefix(query string) SearchQuery {
+	query = strings.TrimSpace(query)
+	if query == "" {
+		return SearchQuery{Type: "all", Pattern: ""}
+	}
+
+	// Check for prefixes (case-insensitive)
+	lower := strings.ToLower(query)
+	if strings.HasPrefix(lower, "a:") {
+		return SearchQuery{Type: "all", Pattern: strings.TrimSpace(query[2:])}
+	}
+	if strings.HasPrefix(lower, "d:") {
+		return SearchQuery{Type: "domain", Pattern: strings.TrimSpace(query[2:])}
+	}
+	if strings.HasPrefix(lower, "f:") {
+		return SearchQuery{Type: "url", Pattern: strings.TrimSpace(query[2:])}
+	}
+	if strings.HasPrefix(lower, "t:") {
+		return SearchQuery{Type: "title", Pattern: strings.TrimSpace(query[2:])}
+	}
+	if strings.HasPrefix(lower, "s:") {
+		return SearchQuery{Type: "description", Pattern: strings.TrimSpace(query[2:])}
+	}
+	if strings.HasPrefix(lower, "i:") {
+		return SearchQuery{Type: "item", Pattern: strings.TrimSpace(query[2:])}
+	}
+
+	// For "all" type, check if pattern looks like a domain and extract host/tld
+	result := SearchQuery{Type: "all", Pattern: query}
+	if LooksLikeDomain(query) {
+		host, tld := ParseSearchTerm(query)
+		if tld != "" {
+			result.DomainHost = host
+			result.DomainTLD = tld
+		}
+	}
+	return result
+}
+
+// LooksLikeDomain checks if a query looks like a domain name
+func LooksLikeDomain(query string) bool {
+	if query == "" || strings.Contains(query, " ") {
+		return false
+	}
+	// Must have at least one dot
+	lastDot := strings.LastIndex(query, ".")
+	if lastDot == -1 || lastDot == 0 || lastDot == len(query)-1 {
+		return false
+	}
+	// TLD must be 2-6 lowercase letters
+	tld := query[lastDot+1:]
+	if len(tld) < 2 || len(tld) > 6 {
+		return false
+	}
+	for _, c := range tld {
+		if c < 'a' || c > 'z' {
+			if c < 'A' || c > 'Z' {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+// ParseSearchTerm analyzes a search query and extracts host pattern and optional TLD filter.
+// If the search ends with what looks like a TLD (e.g., "example.com"), it splits them.
+// Returns (hostPattern, tldFilter) where tldFilter may be empty.
+func ParseSearchTerm(search string) (hostPattern, tldFilter string) {
+	search = strings.TrimSpace(search)
+	if search == "" {
+		return "", ""
+	}
+
+	// Check if search contains a dot
+	lastDot := strings.LastIndex(search, ".")
+	if lastDot == -1 || lastDot == len(search)-1 {
+		// No dot or ends with dot - treat as host-only search
+		return search, ""
+	}
+
+	// Extract potential TLD (part after last dot)
+	potentialTLD := strings.ToLower(search[lastDot+1:])
+	hostPart := search[:lastDot]
+
+	// Validate TLD: must be 2-24 lowercase letters (covers all IANA TLDs)
+	if len(potentialTLD) < 2 || len(potentialTLD) > 24 {
+		return search, ""
+	}
+	for _, c := range potentialTLD {
+		if c < 'a' || c > 'z' {
+			// Contains non-letter, not a TLD
+			return search, ""
+		}
+	}
+
+	// Looks like a valid TLD pattern
+	return hostPart, potentialTLD
+}
+
+// ShouldCrawl checks if a link should be crawled (same host as base)
+func ShouldCrawl(link, baseURL string) bool {
+	linkURL, err := url.Parse(link)
+	if err != nil {
+		return false
+	}
+
+	baseURLParsed, err := url.Parse(baseURL)
+	if err != nil {
+		return false
+	}
+
+	return linkURL.Host == baseURLParsed.Host
+}
+
+// ShouldAutoSkipDomain checks if a domain should be auto-skipped based on patterns
+func ShouldAutoSkipDomain(host string) bool {
+	// Never skip our own domain
+	if strings.HasSuffix(host, "1440.news") || host == "1440.news" {
+		return false
+	}
+	// Skip bare TLDs (no dot means it's just "com", "net", etc.)
+	if !strings.Contains(host, ".") {
+		return true
+	}
+	// Skip domains starting with a digit (spam pattern)
+	if len(host) > 0 && host[0] >= '0' && host[0] <= '9' {
+		return true
+	}
+	// Skip domains starting with letter-dash (spam pattern, e.g., "a-example.com")
+	if len(host) > 1 && ((host[0] >= 'a' && host[0] <= 'z') || (host[0] >= 'A' && host[0] <= 'Z')) && host[1] == '-' {
+		return true
+	}
+	return false
+}