From 385ee7f8dacecc4f3117d81032b2b491ee4ef6ca Mon Sep 17 00:00:00 2001 From: Red Daly Date: Sun, 3 Jul 2022 16:51:35 -0700 Subject: [PATCH] Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp (#21943) * Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp. This makes the memfs implementation consistent with the filesystem implementation of List(). The docstring for filesystem.Interface.List does not specify how the pattern should be interpretted. It says: "List expands a pattern to a list of filenames." Perhaps that docstring should be updated to be more specific. * Use "filepath" instead of "path" in memfs; add test cases. Also, accept input without a "memfs://" prefix. --- .../go/pkg/beam/io/filesystem/memfs/memory.go | 15 ++-- .../beam/io/filesystem/memfs/memory_test.go | 88 +++++++++++++++++-- 2 files changed, 91 insertions(+), 12 deletions(-) diff --git a/sdks/go/pkg/beam/io/filesystem/memfs/memory.go b/sdks/go/pkg/beam/io/filesystem/memfs/memory.go index 951605a24010..782ae4c16945 100644 --- a/sdks/go/pkg/beam/io/filesystem/memfs/memory.go +++ b/sdks/go/pkg/beam/io/filesystem/memfs/memory.go @@ -19,10 +19,11 @@ package memfs import ( "bytes" "context" + "fmt" "io" "io/ioutil" "os" - "regexp" + "path/filepath" "sort" "strings" "sync" @@ -54,14 +55,16 @@ func (f *fs) List(_ context.Context, glob string) ([]string, error) { f.mu.Lock() defer f.mu.Unlock() - pattern, err := regexp.Compile(glob) - if err != nil { - return nil, err - } + // As with other functions, the memfs:// prefix is optional. + globNoScheme := strings.TrimPrefix(glob, "memfs://") var ret []string for k := range f.m { - if pattern.MatchString(k) { + matched, err := filepath.Match(globNoScheme, strings.TrimPrefix(k, "memfs://")) + if err != nil { + return nil, fmt.Errorf("invalid glob pattern: %w", err) + } + if matched { ret = append(ret, k) } } diff --git a/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go b/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go index b2b39b232e7a..3944ba724560 100644 --- a/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go +++ b/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go @@ -18,6 +18,7 @@ package memfs import ( "context" "os" + "path/filepath" "testing" "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem" @@ -105,7 +106,7 @@ func TestList(t *testing.T) { t.Fatalf("Write(%q) error = %v", name, err) } } - glob := "memfs://foo.*" + glob := "memfs://foo*" got, err := fs.List(ctx, glob) if err != nil { t.Errorf("error List(%q) = %v", glob, err) @@ -117,6 +118,81 @@ func TestList(t *testing.T) { } } +func TestListTable(t *testing.T) { + ctx := context.Background() + for _, tt := range []struct { + name string + files []string + pattern string + want []string + wantErr bool + }{ + { + name: "foo-star", + files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"}, + pattern: "memfs://foo*", + want: []string{"memfs://foo", "memfs://foobar"}, + }, + { + name: "foo-star-missing-memfs-prefix", + files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"}, + pattern: "foo*", + want: []string{"memfs://foo", "memfs://foobar"}, + }, + { + name: "bad-pattern", + files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"}, + pattern: "foo[", + wantErr: true, // invalid glob syntax + }, + { + name: "foo", + files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"}, + pattern: "memfs://foo", + want: []string{"memfs://foo"}, + }, + { + name: "dirs", + files: []string{ + "fizzbuzz", + filepath.Join("xyz", "12"), + filepath.Join("xyz", "1234"), + filepath.Join("xyz", "1235"), + "foobar", + "baz", + "bazfoo", + }, + pattern: "memfs://xyz/123*", + want: []string{ + "memfs://" + filepath.Join("xyz", "1234"), + "memfs://" + filepath.Join("xyz", "1235"), + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + fs := &fs{m: make(map[string][]byte)} + + for _, name := range tt.files { + if err := filesystem.Write(ctx, fs, name, []byte("contents")); err != nil { + t.Fatalf("Write(%q) error = %v", name, err) + } + } + got, err := fs.List(ctx, tt.pattern) + if gotErr := err != nil; gotErr != tt.wantErr { + t.Errorf("List(%q) got error %v, wantErr = %v", tt.pattern, err, tt.wantErr) + } + if err != nil { + return + } + + want := tt.want + if d := cmp.Diff(want, got); d != "" { + t.Errorf("List(%q) resulted in unexpected diff (-want, +got):\n %s", tt.pattern, d) + } + }) + } +} + func TestRemove(t *testing.T) { ctx := context.Background() fs := &fs{m: make(map[string][]byte)} @@ -133,14 +209,14 @@ func TestRemove(t *testing.T) { t.Errorf("error Remove(%q) = %v", toremove, err) } - got, err := fs.List(ctx, ".*") + got, err := fs.List(ctx, "memfs://*") if err != nil { - t.Errorf("error List(\".*\") = %v", err) + t.Errorf("error List(\"*\") = %v", err) } want := []string{"memfs://bazfoo", "memfs://fizzbuzz"} if d := cmp.Diff(want, got); d != "" { - t.Errorf("After Remove fs.List(\".*\") = %v, want %v", got, want) + t.Errorf("After Remove fs.List(\"*\") = %v, want %v", got, want) } } @@ -156,7 +232,7 @@ func TestCopy(t *testing.T) { if err := filesystem.Copy(ctx, fs, "memfs://fizzbuzz", "memfs://fizzbang"); err != nil { t.Fatalf("Copy() error = %v", err) } - glob := "memfs://fizz.*" + glob := "memfs://fizz*" got, err := fs.List(ctx, glob) if err != nil { t.Errorf("error List(%q) = %v", glob, err) @@ -188,7 +264,7 @@ func TestRename(t *testing.T) { if err := filesystem.Rename(ctx, fs, "memfs://fizzbuzz", "memfs://fizzbang"); err != nil { t.Fatalf("Rename() error = %v", err) } - glob := "memfs://fizz.*" + glob := "memfs://fizz*" got, err := fs.List(ctx, glob) if err != nil { t.Errorf("error List(%q) = %v", glob, err)