diff --git a/pkg/download/list_merge_test.go b/pkg/download/list_merge_test.go index 63b2c581..a71e002d 100644 --- a/pkg/download/list_merge_test.go +++ b/pkg/download/list_merge_test.go @@ -127,7 +127,7 @@ func TestListMerge(t *testing.T) { t.Fatal(err) } for _, v := range tc.strVersions { - s.Save(ctx, testModName, v, bts, io.NopCloser(bytes.NewReader(bts)), bts) + s.Save(ctx, testModName, v, bts, io.NopCloser(bytes.NewReader(bts)), nil, bts) } defer clearStorage(s, testModName, tc.strVersions) dp := New(&Opts{s, nil, &listerMock{versions: tc.goVersions, err: tc.goErr}, nil, Strict}) diff --git a/pkg/download/protocol_test.go b/pkg/download/protocol_test.go index d7070e01..1f2bd5f2 100644 --- a/pkg/download/protocol_test.go +++ b/pkg/download/protocol_test.go @@ -165,7 +165,7 @@ func TestListMode(t *testing.T) { networkMode: tc.networkmode, } for _, tag := range tc.storageTags { - err := strg.Save(ctx, tc.path, tag, []byte("mod"), bytes.NewReader([]byte("zip")), []byte("info")) + err := strg.Save(ctx, tc.path, tag, []byte("mod"), bytes.NewReader([]byte("zip")), nil, []byte("info")) require.NoError(t, err) } t.Run(tc.name, func(t *testing.T) { @@ -429,7 +429,7 @@ func TestDownloadProtocolWhenFetchFails(t *testing.T) { } fakeMod := testMod{"github.com/athens-artifacts/samplelib", "v1.0.0"} bts := []byte(fakeMod.mod + "@" + fakeMod.ver) - err = s.Save(context.Background(), fakeMod.mod, fakeMod.ver, bts, io.NopCloser(bytes.NewReader(bts)), bts) + err = s.Save(context.Background(), fakeMod.mod, fakeMod.ver, bts, io.NopCloser(bytes.NewReader(bts)), nil, bts) if err != nil { t.Fatal(err) } @@ -472,7 +472,7 @@ type mockStasher struct { } func (ms *mockStasher) Stash(ctx context.Context, mod string, ver string) (string, error) { - err := ms.s.Save(ctx, mod, ver, []byte("mod"), strings.NewReader("zip"), []byte("info")) + err := ms.s.Save(ctx, mod, ver, []byte("mod"), strings.NewReader("zip"), nil, []byte("info")) ms.ch <- true // signal async stashing is done return ver, err } diff --git a/pkg/module/go_get_fetcher.go b/pkg/module/go_get_fetcher.go index 88ea4c4d..4b2de490 100644 --- a/pkg/module/go_get_fetcher.go +++ b/pkg/module/go_get_fetcher.go @@ -3,8 +3,10 @@ package module import ( "bytes" "context" + "crypto/md5" //nolint:gosec "encoding/json" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -96,6 +98,26 @@ func (g *goGetFetcher) Fetch(ctx context.Context, mod, ver string) (*storage.Ver } storageVer.Mod = gomod + zipMD5, err := func() ([]byte, error) { + // Perform in a separate function to ensure file is closed + zipForChecksum, err := g.fs.Open(m.Zip) + if err != nil { + return nil, errors.E(op, err) + } + defer zipForChecksum.Close() + + //nolint:gosec + hash := md5.New() + if _, err := io.Copy(hash, zipForChecksum); err != nil { + return nil, errors.E(op, err) + } + + return hash.Sum(nil), nil + }() + if err != nil { + return nil, err + } + zip, err := g.fs.Open(m.Zip) if err != nil { return nil, errors.E(op, err) @@ -105,6 +127,7 @@ func (g *goGetFetcher) Fetch(ctx context.Context, mod, ver string) (*storage.Ver // if we close, then the caller will panic, and the alternative to make this work is // that we read into memory and return an io.ReadCloser that reads out of memory storageVer.Zip = &zipReadCloser{zip, g.fs, goPathRoot} + storageVer.ZipMD5 = zipMD5 return &storageVer, nil } diff --git a/pkg/stash/stasher.go b/pkg/stash/stasher.go index b7380f17..1c337be2 100644 --- a/pkg/stash/stasher.go +++ b/pkg/stash/stasher.go @@ -71,7 +71,7 @@ func (s *stasher) Stash(ctx context.Context, mod, ver string) (string, error) { return v.Semver, nil } } - err = s.storage.Save(ctx, mod, v.Semver, v.Mod, v.Zip, v.Info) + err = s.storage.Save(ctx, mod, v.Semver, v.Mod, v.Zip, v.ZipMD5, v.Info) if err != nil { return "", errors.E(op, err) } diff --git a/pkg/stash/stasher_test.go b/pkg/stash/stasher_test.go index 70291577..75bb66b5 100644 --- a/pkg/stash/stasher_test.go +++ b/pkg/stash/stasher_test.go @@ -84,7 +84,7 @@ type mockStorage struct { existsResponse bool } -func (ms *mockStorage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error { +func (ms *mockStorage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5 []byte, info []byte) error { ms.saveCalled = true ms.givenVersion = version return nil diff --git a/pkg/stash/with_azureblob_test.go b/pkg/stash/with_azureblob_test.go index 124ede39..9acbbfc0 100644 --- a/pkg/stash/with_azureblob_test.go +++ b/pkg/stash/with_azureblob_test.go @@ -73,6 +73,7 @@ func (ms *mockAzureBlobStasher) Stash(ctx context.Context, mod, ver string) (str ver, []byte("mod file"), strings.NewReader("zip file"), + nil, []byte("info file"), ) if err != nil { diff --git a/pkg/stash/with_gcs_test.go b/pkg/stash/with_gcs_test.go index d738a26d..0446e9c1 100644 --- a/pkg/stash/with_gcs_test.go +++ b/pkg/stash/with_gcs_test.go @@ -119,7 +119,7 @@ func TestWithGCSPartialFailure(t *testing.T) { } s := gs(ms) // We simulate a failure by manually passing an io.Reader that will fail. - err = ms.strg.Save(ctx, "stashmod", "v1.0.0", []byte(ms.content), fr, []byte(ms.content)) + err = ms.strg.Save(ctx, "stashmod", "v1.0.0", []byte(ms.content), fr, nil, []byte(ms.content)) if err == nil { // We *want* to fail. t.Fatal(err) @@ -172,6 +172,7 @@ func (ms *mockGCPStasher) Stash(ctx context.Context, mod, ver string) (string, e ver, []byte(ms.content), strings.NewReader(ms.content), + nil, []byte(ms.content), ) return "", err diff --git a/pkg/stash/with_redis_test.go b/pkg/stash/with_redis_test.go index 862ca012..ca28c9db 100644 --- a/pkg/stash/with_redis_test.go +++ b/pkg/stash/with_redis_test.go @@ -210,6 +210,7 @@ func (ms *mockRedisStasher) Stash(ctx context.Context, mod, ver string) (string, ver, []byte("mod file"), strings.NewReader("zip file"), + nil, []byte("info file"), ) if err != nil { diff --git a/pkg/storage/azureblob/saver.go b/pkg/storage/azureblob/saver.go index 98939942..e1736b12 100644 --- a/pkg/storage/azureblob/saver.go +++ b/pkg/storage/azureblob/saver.go @@ -11,7 +11,7 @@ import ( ) // Save implements the (./pkg/storage).Saver interface. -func (s *Storage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error { +func (s *Storage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "azureblob.Save" ctx, span := observ.StartSpan(ctx, op.String()) defer span.End() diff --git a/pkg/storage/compliance/benchmarks.go b/pkg/storage/compliance/benchmarks.go index 390dada7..b0d89f09 100644 --- a/pkg/storage/compliance/benchmarks.go +++ b/pkg/storage/compliance/benchmarks.go @@ -33,6 +33,7 @@ func benchList(b *testing.B, s storage.Backend, reset func() error) { version, mock.Mod, mock.Zip, + mock.ZipMD5, mock.Info, ) require.NoError(b, err, "save for storage failed") @@ -65,6 +66,7 @@ func benchSave(b *testing.B, s storage.Backend, reset func() error) { version, mock.Mod, bytes.NewReader(zipBts), + mock.ZipMD5, mock.Info, ) require.NoError(b, err) @@ -87,7 +89,7 @@ func benchDelete(b *testing.B, s storage.Backend, reset func() error) { b.Run("delete", func(b *testing.B) { for i := 0; i < b.N; i++ { name := fmt.Sprintf("del-%s-%d", module, i) - err := s.Save(ctx, name, version, mock.Mod, bytes.NewReader(zipBts), mock.Info) + err := s.Save(ctx, name, version, mock.Mod, bytes.NewReader(zipBts), mock.ZipMD5, mock.Info) require.NoError(b, err, "saving %s for storage failed", name) err = s.Delete(ctx, name, version) require.NoError(b, err, "delete failed: %s", name) @@ -104,7 +106,7 @@ func benchExists(b *testing.B, s storage.Backend, reset func() error) { mock := getMockModule() ctx := context.Background() - err := s.Save(ctx, module, version, mock.Mod, mock.Zip, mock.Info) + err := s.Save(ctx, module, version, mock.Mod, mock.Zip, mock.ZipMD5, mock.Info) require.NoError(b, err) b.Run("exists", func(b *testing.B) { diff --git a/pkg/storage/compliance/tests.go b/pkg/storage/compliance/tests.go index 32f60113..51985368 100644 --- a/pkg/storage/compliance/tests.go +++ b/pkg/storage/compliance/tests.go @@ -3,6 +3,7 @@ package compliance import ( "bytes" "context" + "crypto/md5" "fmt" "io" "math/rand" @@ -76,6 +77,7 @@ func testListSuffix(t *testing.T, b storage.Backend) { version, mock.Mod, mock.Zip, + mock.ZipMD5, mock.Info, ) require.NoError(t, err, "Save for storage failed") @@ -114,6 +116,7 @@ func testList(t *testing.T, b storage.Backend) { version, mock.Mod, mock.Zip, + mock.ZipMD5, mock.Info, ) require.NoError(t, err, "Save for storage failed") @@ -135,7 +138,7 @@ func testGet(t *testing.T, b storage.Backend) { ver := "v1.2.3" mock := getMockModule() zipBts, _ := io.ReadAll(mock.Zip) - b.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), mock.Info) + b.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), mock.ZipMD5, mock.Info) defer b.Delete(ctx, modname, ver) info, err := b.Info(ctx, modname, ver) @@ -160,7 +163,7 @@ func testExists(t *testing.T, b storage.Backend) { ver := "v1.2.3" mock := getMockModule() zipBts, _ := io.ReadAll(mock.Zip) - b.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), mock.Info) + b.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), mock.ZipMD5, mock.Info) defer b.Delete(ctx, modname, ver) checker := storage.WithChecker(b) exists, err := checker.Exists(ctx, modname, ver) @@ -174,7 +177,7 @@ func testShouldNotExist(t *testing.T, b storage.Backend) { ver := "v1.2.3-pre.1" mock := getMockModule() zipBts, _ := io.ReadAll(mock.Zip) - err := b.Save(ctx, mod, ver, mock.Mod, bytes.NewReader(zipBts), mock.Info) + err := b.Save(ctx, mod, ver, mock.Mod, bytes.NewReader(zipBts), mock.ZipMD5, mock.Info) require.NoError(t, err, "should successfully safe a mock module") defer b.Delete(ctx, mod, ver) @@ -196,7 +199,7 @@ func testDelete(t *testing.T, b storage.Backend) { version := fmt.Sprintf("%s%d", "delete", rand.Int()) mock := getMockModule() - err := b.Save(ctx, modname, version, mock.Mod, mock.Zip, mock.Info) + err := b.Save(ctx, modname, version, mock.Mod, mock.Zip, mock.ZipMD5, mock.Info) require.NoError(t, err) err = b.Delete(ctx, modname, version) @@ -209,8 +212,9 @@ func testDelete(t *testing.T, b storage.Backend) { func getMockModule() *storage.Version { return &storage.Version{ - Info: []byte("123"), - Mod: []byte("456"), - Zip: io.NopCloser(bytes.NewReader([]byte("789"))), + Info: []byte("123"), + Mod: []byte("456"), + Zip: io.NopCloser(bytes.NewReader([]byte("789"))), + ZipMD5: md5.New().Sum([]byte("789")), } } diff --git a/pkg/storage/external/client.go b/pkg/storage/external/client.go index b33e5d21..f70076b3 100644 --- a/pkg/storage/external/client.go +++ b/pkg/storage/external/client.go @@ -81,7 +81,7 @@ func (s *service) Zip(ctx context.Context, mod, ver string) (storage.SizeReadClo return storage.NewSizer(body, size), nil } -func (s *service) Save(ctx context.Context, mod, ver string, modFile []byte, zip io.Reader, info []byte) error { +func (s *service) Save(ctx context.Context, mod, ver string, modFile []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "external.Save" var err error mod, err = module.EscapePath(mod) diff --git a/pkg/storage/external/server.go b/pkg/storage/external/server.go index b4a4b8ca..cb8adad5 100644 --- a/pkg/storage/external/server.go +++ b/pkg/storage/external/server.go @@ -109,7 +109,7 @@ func NewServer(strg storage.Backend) http.Handler { return } defer func() { _ = modZ.Close() }() - err = strg.Save(r.Context(), params.Module, params.Version, modFile, modZ, info) + err = strg.Save(r.Context(), params.Module, params.Version, modFile, modZ, nil, info) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return diff --git a/pkg/storage/fs/saver.go b/pkg/storage/fs/saver.go index bf826eee..3feb3846 100644 --- a/pkg/storage/fs/saver.go +++ b/pkg/storage/fs/saver.go @@ -11,7 +11,7 @@ import ( "github.com/spf13/afero" ) -func (s *storageImpl) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error { +func (s *storageImpl) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "fs.Save" _, span := observ.StartSpan(ctx, op.String()) defer span.End() diff --git a/pkg/storage/gcp/saver.go b/pkg/storage/gcp/saver.go index 24a4ed79..55d73031 100644 --- a/pkg/storage/gcp/saver.go +++ b/pkg/storage/gcp/saver.go @@ -20,11 +20,11 @@ import ( // // Uploaded files are publicly accessible in the storage bucket as per // an ACL rule. -func (s *Storage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error { +func (s *Storage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "gcp.save" ctx, span := observ.StartSpan(ctx, op.String()) defer span.End() - err := s.save(ctx, module, version, mod, zip, info) + err := s.save(ctx, module, version, mod, zip, zipMD5, info) if err != nil { return errors.E(op, err) } @@ -37,26 +37,26 @@ func (s *Storage) SetStaleThreshold(threshold time.Duration) { s.staleThreshold = threshold } -func (s *Storage) save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error { +func (s *Storage) save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "gcp.save" ctx, span := observ.StartSpan(ctx, op.String()) defer span.End() gomodPath := config.PackageVersionedName(module, version, "mod") - err := s.upload(ctx, gomodPath, bytes.NewReader(mod), false) + err := s.upload(ctx, gomodPath, bytes.NewReader(mod), nil, false) // KindAlreadyExists means the file is uploaded (somewhere else) successfully. if err != nil && !errors.Is(err, errors.KindAlreadyExists) { return errors.E(op, err) } zipPath := config.PackageVersionedName(module, version, "zip") - err = s.upload(ctx, zipPath, zip, true) + err = s.upload(ctx, zipPath, zip, zipMD5, true) if err != nil && !errors.Is(err, errors.KindAlreadyExists) { return errors.E(op, err) } infoPath := config.PackageVersionedName(module, version, "info") - err = s.upload(ctx, infoPath, bytes.NewReader(info), false) + err = s.upload(ctx, infoPath, bytes.NewReader(info), nil, false) if err != nil && !errors.Is(err, errors.KindAlreadyExists) { return errors.E(op, err) } @@ -64,7 +64,7 @@ func (s *Storage) save(ctx context.Context, module, version string, mod []byte, return nil } -func (s *Storage) upload(ctx context.Context, path string, stream io.Reader, checkBefore bool) error { +func (s *Storage) upload(ctx context.Context, path string, stream io.Reader, md5 []byte, checkBefore bool) error { const op errors.Op = "gcp.upload" ctx, span := observ.StartSpan(ctx, op.String()) defer span.End() @@ -90,6 +90,10 @@ func (s *Storage) upload(ctx context.Context, path string, stream io.Reader, che DoesNotExist: true, }).NewWriter(cancelCtx) + if len(md5) > 0 { + wc.MD5 = md5 + } + // NOTE: content type is auto detected on GCP side and ACL defaults to public // Once we support private storage buckets this may need refactoring // unless there is a way to set the default perms in the project. diff --git a/pkg/storage/minio/saver.go b/pkg/storage/minio/saver.go index 864b4c18..73225b72 100644 --- a/pkg/storage/minio/saver.go +++ b/pkg/storage/minio/saver.go @@ -13,7 +13,7 @@ import ( minio "github.com/minio/minio-go/v6" ) -func (s *storageImpl) Save(ctx context.Context, module, vsn string, mod []byte, zip io.Reader, info []byte) error { +func (s *storageImpl) Save(ctx context.Context, module, vsn string, mod []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "storage.minio.Save" _, span := observ.StartSpan(ctx, op.String()) defer span.End() diff --git a/pkg/storage/mongo/mongo_test.go b/pkg/storage/mongo/mongo_test.go index 3f72ab0a..7639f00c 100644 --- a/pkg/storage/mongo/mongo_test.go +++ b/pkg/storage/mongo/mongo_test.go @@ -59,7 +59,7 @@ func TestQueryModuleVersionExists(t *testing.T) { backend := getStorage(t) zipBts, _ := io.ReadAll(mock.Zip) - backend.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), mock.Info) + backend.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), mock.ZipMD5, mock.Info) defer backend.Delete(ctx, modname, ver) info, err := query(ctx, backend, modname, ver) @@ -80,7 +80,7 @@ func TestQueryKindNotFoundErrorCases(t *testing.T) { backend := getStorage(t) zipBts, _ := io.ReadAll(mock.Zip) - backend.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), mock.Info) + backend.Save(ctx, modname, ver, mock.Mod, bytes.NewReader(zipBts), nil, mock.Info) defer backend.Delete(ctx, modname, ver) testCases := []struct { diff --git a/pkg/storage/mongo/saver.go b/pkg/storage/mongo/saver.go index 131c9097..370e3d3b 100644 --- a/pkg/storage/mongo/saver.go +++ b/pkg/storage/mongo/saver.go @@ -13,7 +13,7 @@ import ( ) // Save stores a module in mongo storage. -func (s *ModuleStore) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error { +func (s *ModuleStore) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "mongo.Save" ctx, span := observ.StartSpan(ctx, op.String()) defer span.End() diff --git a/pkg/storage/s3/saver.go b/pkg/storage/s3/saver.go index cbf6b5d8..4f7c8c13 100644 --- a/pkg/storage/s3/saver.go +++ b/pkg/storage/s3/saver.go @@ -13,7 +13,7 @@ import ( ) // Save implements the (github.com/gomods/athens/pkg/storage).Saver interface. -func (s *Storage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error { +func (s *Storage) Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5, info []byte) error { const op errors.Op = "s3.Save" ctx, span := observ.StartSpan(ctx, op.String()) defer span.End() diff --git a/pkg/storage/saver.go b/pkg/storage/saver.go index 45eb73a9..75b3d8fb 100644 --- a/pkg/storage/saver.go +++ b/pkg/storage/saver.go @@ -7,5 +7,9 @@ import ( // Saver saves module metadata and its source to underlying storage. type Saver interface { - Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, info []byte) error + // Save saves the module metadata and its source to the storage. + // + // The caller MAY call zipMD5 with a nil value if the checksum is not available. + // The storage implementation MAY use the zipMD5 to verify the integrity of the zip file. + Save(ctx context.Context, module, version string, mod []byte, zip io.Reader, zipMD5, info []byte) error } diff --git a/pkg/storage/version.go b/pkg/storage/version.go index 4947c4bf..908e469d 100644 --- a/pkg/storage/version.go +++ b/pkg/storage/version.go @@ -6,6 +6,7 @@ import "io" type Version struct { Mod []byte Zip io.ReadCloser + ZipMD5 []byte Info []byte Semver string }