diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go index 1b67fda85c6..d459ba8792c 100644 --- a/libcontainer/seccomp/patchbpf/enosys_linux.go +++ b/libcontainer/seccomp/patchbpf/enosys_linux.go @@ -224,16 +224,30 @@ type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSy // representation, but SCMP_ARCH_X32 means we have to track cases where the // same architecture has different largest syscalls based on the mode. func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) { - lastSyscalls := make(lastSyscallMap) - // Only loop over architectures which are present in the filter. Any other - // architectures will get the libseccomp bad architecture action anyway. + scmpArchs := make(map[libseccomp.ScmpArch]struct{}) for _, ociArch := range config.Architectures { arch, err := libseccomp.GetArchFromString(ociArch) if err != nil { return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err) } + scmpArchs[arch] = struct{}{} + } + // On architectures like ppc64le, Docker inexplicably doesn't include the + // native architecture in the architecture list which results in no + // architectures being present in the list at all (rendering the ENOSYS + // stub a no-op). So, always include the native architecture. + if nativeScmpArch, err := libseccomp.GetNativeArch(); err != nil { + return nil, fmt.Errorf("unable to get native arch: %w", err) + } else if _, ok := scmpArchs[nativeScmpArch]; !ok { + logrus.Debugf("seccomp: adding implied native architecture %v to config set", nativeScmpArch) + scmpArchs[nativeScmpArch] = struct{}{} + } + logrus.Debugf("seccomp: configured architecture set: %s", scmpArchs) - // Figure out native architecture representation of the architecture. + // Only loop over architectures which are present in the filter. Any other + // architectures will get the libseccomp bad architecture action anyway. + lastSyscalls := make(lastSyscallMap) + for arch := range scmpArchs { auditArch, err := scmpArchToAuditArch(arch) if err != nil { return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err) diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go index bdfeff68adb..3d442e1daa6 100644 --- a/libcontainer/seccomp/patchbpf/enosys_linux_test.go +++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go @@ -12,6 +12,7 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" libseccomp "github.com/seccomp/libseccomp-golang" + "github.com/sirupsen/logrus" "golang.org/x/net/bpf" ) @@ -105,6 +106,18 @@ var testArches = []string{ "ppc64le", "s390", "s390x", + // Dummy value to indicate a configuration with no architecture specified. + "native", +} + +var nativeArch string + +func init() { + scmpNativeArch, err := libseccomp.GetNativeArch() + if err != nil { + logrus.Panicf("get native arch: %v", err) + } + nativeArch = scmpNativeArch.String() } func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) { @@ -155,6 +168,9 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) expected uint32 } + if arch == "native" { + arch = nativeArch + } scmpArch, err := libseccomp.GetArchFromString(arch) if err != nil { t.Fatalf("unknown libseccomp architecture %q: %v", arch, err) @@ -228,8 +244,15 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) // Test syscalls in the explicit list. for _, test := range syscallTests { - // Override the expected value in the two special cases. - if !archSet[arch] || isAllowAction(defaultAction) { + // Override the expected value in the two special cases: + // 1. If the default action is allow, the filter won't have + // the stub prepended so we expect a fallthrough. + // 2. If the executing architecture is not in the architecture + // set, then the architecture is not handled by the stub -- + // *except* in the case of the native architecture (which + // is always included in the stub). + if isAllowAction(defaultAction) || + (!archSet[arch] && arch != nativeArch) { test.expected = retFallthrough } @@ -263,7 +286,14 @@ var testActions = map[string]configs.Action{ func TestEnosysStub_SingleArch(t *testing.T) { for _, arch := range testArches { - arches := []string{arch} + var arches []string + // "native" indicates a blank architecture field for seccomp, to test + // the case where the running architecture was not included in the + // architecture. Docker doesn't always set the architecture for some + // reason (namely for ppc64le). + if arch != "native" { + arches = append(arches, arch) + } t.Run("arch="+arch, func(t *testing.T) { for name, action := range testActions { t.Run("action="+name, func(t *testing.T) { @@ -277,7 +307,16 @@ func TestEnosysStub_SingleArch(t *testing.T) { func TestEnosysStub_MultiArch(t *testing.T) { for end := 0; end < len(testArches); end++ { for start := 0; start < end; start++ { - arches := testArches[start:end] + var arches []string + for _, arch := range testArches[start:end] { + // "native" indicates a blank architecture field for seccomp, to test + // the case where the running architecture was not included in the + // architecture. Docker doesn't always set the architecture for some + // reason (namely for ppc64le). + if arch != "native" { + arches = append(arches, arch) + } + } if len(arches) <= 1 { continue }