Files
athens/pkg/download/protocol.go
Nicholas Wiersma d932d50232 chore: lint code with golangci-lint (#1828)
* feat: add golangci-lint linting

* chore: fix linter issues

* feat: add linting into the workflow

* docs: update lint docs

* fix: cr suggestions

* fix: remove old formatting and vetting scripts

* fix: add docker make target

* fix: action go caching

* fix: depreciated actions checkout version

* fix: cr suggestion

* fix: cr suggestions

---------

Co-authored-by: Manu Gupta <manugupt1@gmail.com>
2023-02-24 20:39:17 -08:00

305 lines
9.5 KiB
Go

package download
import (
"context"
"regexp"
"strings"
"sync"
"time"
"github.com/gomods/athens/pkg/download/mode"
"github.com/gomods/athens/pkg/errors"
"github.com/gomods/athens/pkg/log"
"github.com/gomods/athens/pkg/module"
"github.com/gomods/athens/pkg/observ"
"github.com/gomods/athens/pkg/requestid"
"github.com/gomods/athens/pkg/stash"
"github.com/gomods/athens/pkg/storage"
)
// Protocol is the download protocol which mirrors
// the http requests that cmd/go makes to the proxy.
type Protocol interface {
// List implements GET /{module}/@v/list
List(ctx context.Context, mod string) ([]string, error)
// Info implements GET /{module}/@v/{version}.info
Info(ctx context.Context, mod, ver string) ([]byte, error)
// Latest implements GET /{module}/@latest
Latest(ctx context.Context, mod string) (*storage.RevInfo, error)
// GoMod implements GET /{module}/@v/{version}.mod
GoMod(ctx context.Context, mod, ver string) ([]byte, error)
// Zip implements GET /{module}/@v/{version}.zip
Zip(ctx context.Context, mod, ver string) (storage.SizeReadCloser, error)
}
// Wrapper helps extend the main protocol's functionality with addons.
type Wrapper func(Protocol) Protocol
// Opts specifies download protocol options to avoid long func signature.
type Opts struct {
Storage storage.Backend
Stasher stash.Stasher
Lister module.UpstreamLister
DownloadFile *mode.DownloadFile
NetworkMode string
}
// NetworkMode constants.
const (
Strict = "strict"
Offline = "offline"
Fallback = "fallback"
)
// New returns a full implementation of the download.Protocol
// that the proxy needs. New also takes a variadic list of wrappers
// to extend the protocol's functionality (see addons package).
// The wrappers are applied in order, meaning the last wrapper
// passed is the Protocol that gets hit first.
func New(opts *Opts, wrappers ...Wrapper) Protocol {
if opts.DownloadFile == nil {
opts.DownloadFile = &mode.DownloadFile{Mode: mode.Sync}
}
var p Protocol = &protocol{opts.DownloadFile, opts.Storage, opts.Stasher, opts.Lister, opts.NetworkMode}
for _, w := range wrappers {
p = w(p)
}
return p
}
type protocol struct {
df *mode.DownloadFile
storage storage.Backend
stasher stash.Stasher
lister module.UpstreamLister
networkMode string
}
func (p *protocol) List(ctx context.Context, mod string) ([]string, error) {
const op errors.Op = "protocol.List"
ctx, span := observ.StartSpan(ctx, op.String())
defer span.End()
var strList, goList []string
var sErr, goErr error
var wg sync.WaitGroup
/*
TODO: potential refactor:
Storage Lister: just return stuff from storage, or error otherwise.
FallbackVCS lister: list from VCS, return empty list if error.
StrictVCS Lister: list from VCS, error if it doesn't succeed.
UnionLister(listers ...Lister): combines any number of listers.
*/
wg.Add(1)
go func() {
defer wg.Done()
strList, sErr = p.storage.List(ctx, mod)
}()
if p.networkMode != Offline {
wg.Add(1)
go func() {
defer wg.Done()
_, goList, goErr = p.lister.List(ctx, mod)
}()
}
wg.Wait()
// if we got an unexpected storage err then we can not guarantee that the end result contains all versions
// a tag or repo could have been deleted
if sErr != nil {
return nil, errors.E(op, sErr)
}
// if we're in offline mode, just return what came from storage.
if p.networkMode == Offline {
return strList, nil
}
// if i.e. github is unavailable we should fail as well so that the behavior of the proxy is stable.
// otherwise we will get different results the next time because i.e. GH is up again
isUnexpGoErr := goErr != nil && !errors.IsRepoNotFoundErr(goErr)
if isUnexpGoErr && p.networkMode == Strict {
return nil, errors.E(op, goErr)
}
// if we're in fallback mode, and VCS is down, just return what we have in storage,
// don't remove any pseudo versions.
if isUnexpGoErr && p.networkMode == Fallback {
return strList, nil
}
isRepoNotFoundErr := goErr != nil && errors.IsRepoNotFoundErr(goErr)
storageEmpty := len(strList) == 0
// if storage has no versions, and the repo was deleted/not-found, we know for sure
// there are no versions that Athens can serve, so just return an error.
if isRepoNotFoundErr && storageEmpty {
return nil, errors.E(op, errors.M(mod), errors.KindNotFound, goErr)
}
strListSemVers := removePseudoVersions(strList)
// if the repo does not exist but athens already saved some versions
// return those so that running go get github.com/my/mod gives us the newest saved version
// we should only do that if exclusively pseudo-versions have been saved
// otherwise @latest would not return the latest stable version but latest commit
if isRepoNotFoundErr && len(strListSemVers) == 0 {
return strList, nil
}
// if the repo exists we have to filter out pseudo versions to prevent following scenario:
// user does go get github.com/my/mod
// there is no sem-ver and so the /list endpoint returns nothing, then /latests gets hit
// Athens saves the pseudo version x1
// from now on every time user runs go get github.com/my/mod she/he will get pseudo version x1 even if a newer version x2 exists
// this is because /list returns non-empty list of versions (x1) and so /latest wont get hit
return union(goList, strListSemVers), nil
}
var pseudoVersionRE = regexp.MustCompile(`^v[0-9]+\.(0\.0-|\d+\.\d+-([^+]*\.)?0\.)\d{14}-[A-Za-z0-9]+(\+incompatible)?$`)
func removePseudoVersions(allVersions []string) []string {
var vers []string
for _, v := range allVersions {
// copied from go cmd https://github.com/golang/go/blob/master/src/cmd/go/internal/modfetch/pseudo.go#L93
isPseudoVersion := strings.Count(v, "-") >= 2 && pseudoVersionRE.MatchString(v)
if !isPseudoVersion {
vers = append(vers, v)
}
}
return vers
}
func (p *protocol) Latest(ctx context.Context, mod string) (*storage.RevInfo, error) {
const op errors.Op = "protocol.Latest"
ctx, span := observ.StartSpan(ctx, op.String())
defer span.End()
if p.networkMode == Offline {
// Go never pings the /@latest endpoint _first_. It always tries /list and if that
// endpoint returns an empty list then it fallsback to calling /@latest.
return nil, errors.E(op, "Athens is in offline mode, use /list endpoint", errors.KindNotFound)
}
lr, _, err := p.lister.List(ctx, mod)
if err != nil {
return nil, errors.E(op, err)
}
return lr, nil
}
func (p *protocol) Info(ctx context.Context, mod, ver string) ([]byte, error) {
const op errors.Op = "protocol.Info"
ctx, span := observ.StartSpan(ctx, op.String())
defer span.End()
info, err := p.storage.Info(ctx, mod, ver)
if errors.IsNotFoundErr(err) {
err = p.processDownload(ctx, mod, ver, func(newVer string) error {
info, err = p.storage.Info(ctx, mod, newVer)
return err
})
}
if err != nil {
return nil, errors.E(op, err)
}
return info, nil
}
func (p *protocol) GoMod(ctx context.Context, mod, ver string) ([]byte, error) {
const op errors.Op = "protocol.GoMod"
ctx, span := observ.StartSpan(ctx, op.String())
defer span.End()
goMod, err := p.storage.GoMod(ctx, mod, ver)
if errors.IsNotFoundErr(err) {
err = p.processDownload(ctx, mod, ver, func(newVer string) error {
goMod, err = p.storage.GoMod(ctx, mod, newVer)
return err
})
}
if err != nil {
return nil, errors.E(op, err)
}
return goMod, nil
}
func (p *protocol) Zip(ctx context.Context, mod, ver string) (storage.SizeReadCloser, error) {
const op errors.Op = "protocol.Zip"
ctx, span := observ.StartSpan(ctx, op.String())
defer span.End()
zip, err := p.storage.Zip(ctx, mod, ver)
if errors.IsNotFoundErr(err) {
err = p.processDownload(ctx, mod, ver, func(newVer string) error {
zip, err = p.storage.Zip(ctx, mod, newVer)
return err
})
}
if err != nil {
return nil, errors.E(op, err)
}
return zip, nil
}
func (p *protocol) processDownload(ctx context.Context, mod, ver string, f func(newVer string) error) error {
const op errors.Op = "protocol.processDownload"
// Create a new context with custom deadline and ditch whatever deadline was passed by the caller.
// This is needed so that the async go routines can continue even after the HTTP request is complete (which leads to context cancellation).
ctx, cancel := copyContextWithCustomTimeout(ctx, time.Minute*15)
defer cancel()
switch p.df.Match(mod) {
case mode.Sync:
newVer, err := p.stasher.Stash(ctx, mod, ver)
if err != nil {
return errors.E(op, err)
}
return f(newVer)
case mode.Async:
go func() { _, _ = p.stasher.Stash(ctx, mod, ver) }()
return errors.E(op, "async: module not found", errors.KindNotFound)
case mode.Redirect:
return errors.E(op, "redirect", errors.KindRedirect)
case mode.AsyncRedirect:
go func() { _, _ = p.stasher.Stash(ctx, mod, ver) }()
return errors.E(op, "async_redirect: module not found", errors.KindRedirect)
case mode.None:
return errors.E(op, "none", errors.KindNotFound)
}
return nil
}
// union concatenates two version lists and removes duplicates.
func union(list1, list2 []string) []string {
if list1 == nil {
list1 = []string{}
}
if list2 == nil {
list2 = []string{}
}
list1 = append(list1, list2...)
unique := []string{}
m := make(map[string]struct{})
for _, v := range list1 {
if _, ok := m[v]; !ok {
unique = append(unique, v)
m[v] = struct{}{}
}
}
return unique
}
func copyContextWithCustomTimeout(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) {
ctxCopy, cancel := context.WithTimeout(context.Background(), timeout)
requestid.SetInContext(ctxCopy, requestid.FromContext(ctx))
log.SetEntryInContext(ctxCopy, log.EntryFromContext(ctx))
return ctxCopy, cancel
}