From 514ac4965992b165cd6a63fde71a2ba29c309d24 Mon Sep 17 00:00:00 2001 From: Michal Pristas Date: Mon, 9 Jul 2018 18:58:51 +0200 Subject: [PATCH] Support for exclude include (#212) * almost there * loading file * fixed after real test * fixed includes" * configurable filter name * custom error --- cmd/proxy/actions/app.go | 5 +- cmd/proxy/actions/app_proxy.go | 9 +- cmd/proxy/actions/cache_miss_fetcher.go | 19 ++- cmd/proxy/actions/cache_miss_handler.go | 19 ++- cmd/proxy/actions/cache_miss_reporter.go | 13 +- cmd/proxy/main.go | 7 +- pkg/config/env/filter.go | 11 ++ pkg/module/errors.go | 18 +++ pkg/module/filter.go | 193 +++++++++++++++++++++++ pkg/module/filterRule.go | 13 ++ pkg/module/filter_rule.go | 6 + pkg/module/filter_test.go | 56 +++++++ 12 files changed, 344 insertions(+), 25 deletions(-) create mode 100644 pkg/config/env/filter.go create mode 100644 pkg/module/errors.go create mode 100644 pkg/module/filter.go create mode 100644 pkg/module/filterRule.go create mode 100644 pkg/module/filter_rule.go create mode 100644 pkg/module/filter_test.go diff --git a/cmd/proxy/actions/app.go b/cmd/proxy/actions/app.go index 17ba46b7..dda12257 100644 --- a/cmd/proxy/actions/app.go +++ b/cmd/proxy/actions/app.go @@ -14,6 +14,7 @@ import ( "github.com/gobuffalo/gocraft-work-adapter" "github.com/gobuffalo/packr" "github.com/gomods/athens/pkg/config/env" + "github.com/gomods/athens/pkg/module" "github.com/gomods/athens/pkg/user" "github.com/rs/cors" "github.com/unrolled/secure" @@ -105,7 +106,9 @@ func App() (*buffalo.App, error) { err = fmt.Errorf("error connecting to storage (%s)", err) return nil, err } - if err := addProxyRoutes(app, store); err != nil { + + mf := module.NewFilter() + if err := addProxyRoutes(app, store, mf); err != nil { err = fmt.Errorf("error adding proxy routes (%s)", err) return nil, err } diff --git a/cmd/proxy/actions/app_proxy.go b/cmd/proxy/actions/app_proxy.go index 616ffdb0..0cc6c07f 100644 --- a/cmd/proxy/actions/app_proxy.go +++ b/cmd/proxy/actions/app_proxy.go @@ -2,15 +2,16 @@ package actions import ( "github.com/gobuffalo/buffalo" + "github.com/gomods/athens/pkg/module" "github.com/gomods/athens/pkg/storage" ) -func addProxyRoutes(app *buffalo.App, storage storage.Backend) error { +func addProxyRoutes(app *buffalo.App, storage storage.Backend, mf *module.Filter) error { app.GET("/", proxyHomeHandler) app.GET("/{module:.+}/@v/list", listHandler(storage)) - app.GET("/{module:.+}/@v/{version}.info", cacheMissHandler(versionInfoHandler(storage), app.Worker)) - app.GET("/{module:.+}/@v/{version}.mod", cacheMissHandler(versionModuleHandler(storage), app.Worker)) - app.GET("/{module:.+}/@v/{version}.zip", cacheMissHandler(versionZipHandler(storage), app.Worker)) + app.GET("/{module:.+}/@v/{version}.info", cacheMissHandler(versionInfoHandler(storage), app.Worker, mf)) + app.GET("/{module:.+}/@v/{version}.mod", cacheMissHandler(versionModuleHandler(storage), app.Worker, mf)) + app.GET("/{module:.+}/@v/{version}.zip", cacheMissHandler(versionZipHandler(storage), app.Worker, mf)) app.POST("/admin/upload/{module:[a-zA-Z./]+}/{version}", uploadHandler(storage)) app.POST("/admin/fetch/{module:[a-zA-Z./]+}/{owner}/{repo}/{ref}/{version}", fetchHandler(storage)) return nil diff --git a/cmd/proxy/actions/cache_miss_fetcher.go b/cmd/proxy/actions/cache_miss_fetcher.go index 01cbd4a7..bc3c69e6 100644 --- a/cmd/proxy/actions/cache_miss_fetcher.go +++ b/cmd/proxy/actions/cache_miss_fetcher.go @@ -7,6 +7,7 @@ import ( "github.com/gobuffalo/buffalo/worker" "github.com/gomods/athens/pkg/config/env" + "github.com/gomods/athens/pkg/module" "github.com/gomods/athens/pkg/storage" olympusStore "github.com/gomods/athens/pkg/storage/olympus" ) @@ -19,27 +20,31 @@ const ( ) // GetProcessCacheMissJob processes queue of cache misses and downloads sources from active Olympus -func GetProcessCacheMissJob(s storage.Backend, w worker.Worker) worker.Handler { +func GetProcessCacheMissJob(s storage.Backend, w worker.Worker, mf *module.Filter) worker.Handler { return func(args worker.Args) (err error) { - module, version, err := parseArgs(args) + mod, version, err := parseArgs(args) if err != nil { return err } - if s.Exists(module, version) { + if !mf.ShouldProcess(mod) { + return module.NewErrModuleExcluded(mod) + } + + if s.Exists(mod, version) { return nil } // get module info - v, err := getModuleInfo(module, version) + v, err := getModuleInfo(mod, version) if err != nil { - process(module, version, args, w) + process(mod, version, args, w) return err } defer v.Zip.Close() - if err := s.Save(context.Background(), module, version, v.Mod, v.Zip, v.Info); err != nil { - process(module, version, args, w) + if err := s.Save(context.Background(), mod, version, v.Mod, v.Zip, v.Info); err != nil { + process(mod, version, args, w) } return err diff --git a/cmd/proxy/actions/cache_miss_handler.go b/cmd/proxy/actions/cache_miss_handler.go index aa2b78c1..dc7f9f99 100644 --- a/cmd/proxy/actions/cache_miss_handler.go +++ b/cmd/proxy/actions/cache_miss_handler.go @@ -6,20 +6,25 @@ import ( "github.com/gobuffalo/buffalo" "github.com/gobuffalo/buffalo/worker" + "github.com/gomods/athens/pkg/module" "github.com/gomods/athens/pkg/paths" "github.com/gomods/athens/pkg/storage" ) -func cacheMissHandler(next buffalo.Handler, w worker.Worker) buffalo.Handler { +func cacheMissHandler(next buffalo.Handler, w worker.Worker, mf *module.Filter) buffalo.Handler { return func(c buffalo.Context) error { + params, err := paths.GetAllParams(c) + if err != nil { + log.Println(err) + return err + } + + if !mf.ShouldProcess(params.Module) { + return module.NewErrModuleExcluded(params.Module) + } + nextErr := next(c) if isModuleNotFoundErr(nextErr) { - params, err := paths.GetAllParams(c) - if err != nil { - log.Println(err) - return nextErr - } - // TODO: add separate worker instead of inline reports if err := queueCacheMissReportJob(params.Module, params.Version, app.Worker); err != nil { log.Println(err) diff --git a/cmd/proxy/actions/cache_miss_reporter.go b/cmd/proxy/actions/cache_miss_reporter.go index 81afa8ad..176fccf8 100644 --- a/cmd/proxy/actions/cache_miss_reporter.go +++ b/cmd/proxy/actions/cache_miss_reporter.go @@ -9,22 +9,27 @@ import ( "time" "github.com/gobuffalo/buffalo/worker" + "github.com/gomods/athens/pkg/module" "github.com/gomods/athens/pkg/payloads" ) // GetCacheMissReporterJob porcesses queue of cache misses and reports them to Olympus -func GetCacheMissReporterJob(w worker.Worker) worker.Handler { +func GetCacheMissReporterJob(w worker.Worker, mf *module.Filter) worker.Handler { return func(args worker.Args) (err error) { - module, version, err := parseArgs(args) + mod, version, err := parseArgs(args) if err != nil { return err } - if err := reportCacheMiss(module, version); err != nil { + if !mf.ShouldProcess(mod) { + return module.NewErrModuleExcluded(mod) + } + + if err := reportCacheMiss(mod, version); err != nil { return err } - return queueCacheMissFetch(module, version, w) + return queueCacheMissFetch(mod, version, w) } } diff --git a/cmd/proxy/main.go b/cmd/proxy/main.go index cf31eab4..09b4306d 100644 --- a/cmd/proxy/main.go +++ b/cmd/proxy/main.go @@ -5,6 +5,7 @@ import ( "log" "github.com/gomods/athens/cmd/proxy/actions" + "github.com/gomods/athens/pkg/module" "github.com/gomods/athens/pkg/storage" ) @@ -20,10 +21,12 @@ func main() { } w := app.Worker - if err := w.Register(actions.FetcherWorkerName, actions.GetProcessCacheMissJob(s, w)); err != nil { + mf := module.NewFilter() + if err := w.Register(actions.FetcherWorkerName, actions.GetProcessCacheMissJob(s, w, mf)); err != nil { log.Fatal(err) } - if err := w.Register(actions.ReporterWorkerName, actions.GetCacheMissReporterJob(w)); err != nil { + + if err := w.Register(actions.ReporterWorkerName, actions.GetCacheMissReporterJob(w, mf)); err != nil { log.Fatal(err) } diff --git a/pkg/config/env/filter.go b/pkg/config/env/filter.go new file mode 100644 index 00000000..3776400b --- /dev/null +++ b/pkg/config/env/filter.go @@ -0,0 +1,11 @@ +package env + +import "github.com/gobuffalo/envy" + +const defaultConfigurationFileName = "filter.conf" + +// IncludeExcludeFileName specifies file name for include exclude filter +// If no filename is specified it fallbacks to 'filter.conf' +func IncludeExcludeFileName() string { + return envy.Get("ATHENS_FILTER_FILENAME", defaultConfigurationFileName) +} diff --git a/pkg/module/errors.go b/pkg/module/errors.go new file mode 100644 index 00000000..45cb1ee2 --- /dev/null +++ b/pkg/module/errors.go @@ -0,0 +1,18 @@ +package module + +import "fmt" + +// ErrModuleExcluded is error returned when processing of error is skipped +// due to filtering rules +type ErrModuleExcluded struct { + module string +} + +func (e *ErrModuleExcluded) Error() string { + return fmt.Sprintf("Module %s is excluded", e.module) +} + +// NewErrModuleExcluded creates new ErrModuleExcluded +func NewErrModuleExcluded(module string) error { + return &ErrModuleExcluded{module: module} +} diff --git a/pkg/module/filter.go b/pkg/module/filter.go new file mode 100644 index 00000000..fa43ef05 --- /dev/null +++ b/pkg/module/filter.go @@ -0,0 +1,193 @@ +package module + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/gomods/athens/pkg/config/env" +) + +var ( + pathSeparator = "/" +) + +// Filter is a filter of modules +type Filter struct { + root ruleNode +} + +// NewFilter creates new filter based on rules defined in a configuration file +// WARNING: this is not concurrency safe +// Configuration consists of two operations + for include and - for exclude +// e.g. +// - github.com/a +// + github.com/a/b +// will communicate all modules except github.com/a and its children, but github.com/a/b will be communicated +// example 2: +// - +// + github.com/a +// will exclude all items from communication except github.com/a +func NewFilter() *Filter { + rn := newRule(Default) + modFilter := Filter{} + modFilter.root = rn + + modFilter.initFromConfig() + + return &modFilter +} + +// AddRule adds rule for specified path +func (f *Filter) AddRule(path string, rule FilterRule) { + f.ensurePath(path) + + segments := getPathSegments(path) + + if len(segments) == 0 { + f.root.rule = rule + return + } + + // look for latest node in a path + latest := f.root + for _, p := range segments[:len(segments)-1] { + latest = latest.next[p] + } + + // replace with updated node + last := segments[len(segments)-1] + rn := latest.next[last] + rn.rule = rule + latest.next[last] = rn +} + +// ShouldProcess evaluates path and determines if module should be communicated or not +func (f *Filter) ShouldProcess(path string) bool { + segs := getPathSegments(path) + rule := f.shouldProcess(segs...) + + // process everything unless it's excluded + return rule != Exclude +} + +func (f *Filter) ensurePath(path string) { + latest := f.root.next + pathSegments := getPathSegments(path) + + for _, p := range pathSegments { + if _, ok := latest[p]; !ok { + latest[p] = newRule(Default) + } + latest = latest[p].next + } +} + +func (f *Filter) shouldProcess(path ...string) FilterRule { + if len(path) == 0 { + return f.root.rule + } + + rules := make([]FilterRule, 0, len(path)) + rn := f.root + for _, p := range path { + if _, ok := rn.next[p]; !ok { + break + } + rn = rn.next[p] + rules = append(rules, rn.rule) + } + + if len(rules) == 0 { + return f.root.rule + } + + for i := len(rules) - 1; i >= 0; i-- { + if rules[i] != Default { + return rules[i] + } + } + + return f.root.rule +} + +func (f *Filter) initFromConfig() { + lines, err := getConfigLines() + + if err != nil || len(lines) == 0 { + return + } + + for _, line := range lines { + split := strings.Split(strings.TrimSpace(line), " ") + if len(split) > 2 { + continue + } + + fmt.Printf("SPLIT %v %#v\n", len(split), split) + ruleSign := strings.TrimSpace(split[0]) + rule := Default + switch ruleSign { + case "+": + rule = Include + case "-": + rule = Exclude + default: + continue + } + + // is root config + if len(split) == 1 { + f.AddRule("", rule) + continue + } + + path := strings.TrimSpace(split[1]) + f.AddRule(path, rule) + } +} + +func getPathSegments(path string) []string { + path = strings.TrimSpace(path) + path = strings.Trim(path, pathSeparator) + + if path == "" { + return []string{} + } + + return strings.Split(path, pathSeparator) +} + +func newRule(r FilterRule) ruleNode { + rn := ruleNode{} + rn.next = make(map[string]ruleNode) + rn.rule = r + + return rn +} + +func getConfigLines() ([]string, error) { + configName := env.IncludeExcludeFileName() + + f, err := os.Open(configName) + if err != nil { + return nil, err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + + var lines []string + for scanner.Scan() { + line := scanner.Text() + line = strings.TrimSpace(line) + if len(line) == 0 { + continue + } + + lines = append(lines, line) + } + + return lines, nil +} diff --git a/pkg/module/filterRule.go b/pkg/module/filterRule.go new file mode 100644 index 00000000..b52637d7 --- /dev/null +++ b/pkg/module/filterRule.go @@ -0,0 +1,13 @@ +package module + +// FilterRule defines behavior of module communication +type FilterRule int + +const ( + // Default filter rule does not alter default behavior + Default FilterRule = iota + // Include filter rule includes package and its children from communication + Include + // Exclude filter rule excludes package and its children from communication + Exclude +) diff --git a/pkg/module/filter_rule.go b/pkg/module/filter_rule.go new file mode 100644 index 00000000..952086eb --- /dev/null +++ b/pkg/module/filter_rule.go @@ -0,0 +1,6 @@ +package module + +type ruleNode struct { + next map[string]ruleNode + rule FilterRule +} diff --git a/pkg/module/filter_test.go b/pkg/module/filter_test.go new file mode 100644 index 00000000..15d24234 --- /dev/null +++ b/pkg/module/filter_test.go @@ -0,0 +1,56 @@ +package module + +import ( + "testing" + + "github.com/stretchr/testify/suite" +) + +type FilterTests struct { + suite.Suite +} + +func Test_Filter(t *testing.T) { + suite.Run(t, new(FilterTests)) +} + +func (t *FilterTests) Test_IgnoreSimple() { + r := t.Require() + + f := NewFilter() + f.AddRule("github.com/a/b", Exclude) + + r.Equal(true, f.ShouldProcess("github.com/a")) + r.Equal(false, f.ShouldProcess("github.com/a/b")) + r.Equal(false, f.ShouldProcess("github.com/a/b/c")) + r.Equal(true, f.ShouldProcess("github.com/d")) + r.Equal(true, f.ShouldProcess("bitbucket.com/a/b")) +} + +func (t *FilterTests) Test_IgnoreParentAllowChildren() { + r := t.Require() + + f := NewFilter() + f.AddRule("github.com/a/b", Exclude) + f.AddRule("github.com/a/b/c", Include) + + r.Equal(true, f.ShouldProcess("github.com/a")) + r.Equal(false, f.ShouldProcess("github.com/a/b")) + r.Equal(true, f.ShouldProcess("github.com/a/b/c")) + r.Equal(true, f.ShouldProcess("github.com/d")) + r.Equal(true, f.ShouldProcess("bitbucket.com/a/b")) +} + +func (t *FilterTests) Test_OnlyAllowed() { + r := t.Require() + + f := NewFilter() + f.AddRule("github.com/a/b", Include) + f.AddRule("", Exclude) + + r.Equal(false, f.ShouldProcess("github.com/a")) + r.Equal(true, f.ShouldProcess("github.com/a/b")) + r.Equal(true, f.ShouldProcess("github.com/a/b/c")) + r.Equal(false, f.ShouldProcess("github.com/d")) + r.Equal(false, f.ShouldProcess("bitbucket.com/a/b")) +}