From 167cbbda8fc890aeeb899b44fca5c016cc10b107 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 5 Mar 2018 04:46:52 -0600 Subject: [PATCH] vendor: Re-vendor virtcontainers Bring the CPU constraints changes. 12f35ce qemu: enable CPU hotplug 925a70c pod: hot add/remove vCPUs before starting containers 4a84438 qemu: Add support for CPU hot add/remove c17c48d vendor: Constraint containerd vendoring c9996f3 filesystem: set correct access mode for pod dir tree 4f9cb18 kata_agent: Add nouuid option for xfs filesystem 085848b kata_agent: Provide hotplugged device the right rootfs path 9ef18a6 kata_agent: Update host and guest paths for Kata 2c88b8e kata_agent: Wait for rootfs if it is a hotplugged device 15a4d17 kata_agent: Support block device passthrough fd5b27f vendor: Update Kata agent protocol 0519e89 check: lint: ineffassign in qemu.go ae661c5 network: Fix lint errors 5c73774 shim: Start shims inside PID namespace 6d7f6e5 shim: Add ability to enter any set of namespaces f318b77 shim: Add the ability to spawn the shim in new namespaces fefb087 pkg: nsenter: Introduce a generic nsenter package 43c05c2 utils: setup: Add some more prereq checks to the setup script. 1a25bad docs: developers: Add some developer docs Fixes #219 Signed-off-by: Gabriela Cervantes --- Gopkg.lock | 4 +- Gopkg.toml | 2 +- .../containers/virtcontainers/Gopkg.lock | 19 ++- .../containers/virtcontainers/Gopkg.toml | 6 +- .../containers/virtcontainers/README.md | 6 + .../containers/virtcontainers/api_test.go | 2 + .../containers/virtcontainers/container.go | 55 +++++++++ .../virtcontainers/container_test.go | 51 ++++++++ .../containers/virtcontainers/filesystem.go | 6 +- .../virtcontainers/filesystem_test.go | 15 ++- .../virtcontainers/hyperstart_agent.go | 34 ++++- .../containers/virtcontainers/hypervisor.go | 13 ++ .../virtcontainers/hypervisor_test.go | 1 + .../containers/virtcontainers/kata_agent.go | 82 ++++++++++--- .../containers/virtcontainers/network.go | 4 +- .../virtcontainers/pkg/hyperstart/types.go | 11 ++ .../containers/virtcontainers/qemu.go | 116 +++++++++++++++++- .../containers/virtcontainers/qemu_amd64.go | 5 + .../virtcontainers/qemu_arch_base.go | 1 + .../containers/virtcontainers/qemu_arm64.go | 11 +- .../containers/virtcontainers/qemu_test.go | 2 + .../containers/virtcontainers/shim.go | 42 ++++--- .../containers/virtcontainers/utils.go | 15 +++ .../containers/virtcontainers/utils_test.go | 17 +++ 24 files changed, 461 insertions(+), 59 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index 682052d..dc1b20e 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -7,7 +7,7 @@ "pkg/hyperstart", "pkg/hyperstart/mock" ] - revision = "6bccdf63e8c47e294c9e320aaa1c7b30e35350ca" + revision = "c88890875e8d4ce31a0664d6a7e7ca637aae9045" [[projects]] name = "github.com/davecgh/go-spew" @@ -53,6 +53,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "7b7e510e5315b97f4d052809fa25d56f1bd9b457f2fed92f1ef7e24a88a76b48" + inputs-digest = "2deb5e3eeb8c9668a8452bc2ab1b2db2e28d1e49dce4890e74ecac915e8687af" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index b3451f4..f5bc279 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -23,7 +23,7 @@ [[constraint]] name = "github.com/containers/virtcontainers" - revision = "6bccdf63e8c47e294c9e320aaa1c7b30e35350ca" + revision = "c88890875e8d4ce31a0664d6a7e7ca637aae9045" [[constraint]] name = "github.com/sirupsen/logrus" diff --git a/vendor/github.com/containers/virtcontainers/Gopkg.lock b/vendor/github.com/containers/virtcontainers/Gopkg.lock index d337a3d..0f42121 100644 --- a/vendor/github.com/containers/virtcontainers/Gopkg.lock +++ b/vendor/github.com/containers/virtcontainers/Gopkg.lock @@ -10,10 +10,9 @@ revision = "1d2a6a3ea132a86abd0731408b7dc34f2fc17d55" [[projects]] - branch = "master" name = "github.com/containerd/cri-containerd" packages = ["pkg/annotations"] - revision = "ca3b73899a159b17e49156f756d1c77e3b60bce4" + revision = "3d382e2f5dabe3bae62ceb9ded56bdee847008ee" [[projects]] name = "github.com/containernetworking/cni" @@ -55,7 +54,6 @@ revision = "342cbe0a04158f6dcb03ca0079991a51a4248c02" [[projects]] - branch = "master" name = "github.com/golang/protobuf" packages = [ "proto", @@ -64,7 +62,8 @@ "ptypes/duration", "ptypes/timestamp" ] - revision = "1e59b77b52bf8e4b449a57e6f79f21226d571845" + revision = "925541529c1fa6821df4e44ce2723319eb2be768" + version = "v1.0.0" [[projects]] name = "github.com/intel/govmm" @@ -77,7 +76,7 @@ "protocols/client", "protocols/grpc" ] - revision = "11f8b0ad0a3bb5771b88cbccd0a9c7dca5fd8e56" + revision = "33eecb2a445f906811a5bc9713d2dafd10768d18" [[projects]] name = "github.com/kubernetes-incubator/cri-o" @@ -142,7 +141,7 @@ branch = "master" name = "golang.org/x/crypto" packages = ["ssh/terminal"] - revision = "0fcca4842a8d74bfddc2c96a073bd2a4d2a7a2e8" + revision = "91a49db82a88618983a78a06c1cbd4e00ab749ab" [[projects]] name = "golang.org/x/net" @@ -166,7 +165,6 @@ revision = "1d2aa6dbdea45adaaebb9905d0666e4537563829" [[projects]] - branch = "master" name = "golang.org/x/text" packages = [ "collate", @@ -184,13 +182,14 @@ "unicode/norm", "unicode/rangetable" ] - revision = "e19ae1496984b1c655b8044a65c0300a3c878dd3" + revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0" + version = "v0.3.0" [[projects]] branch = "master" name = "google.golang.org/genproto" packages = ["googleapis/rpc/status"] - revision = "a8101f21cf983e773d0c1133ebc5424792003214" + revision = "2c5e7ac708aaa719366570dd82bda44541ca2a63" [[projects]] name = "google.golang.org/grpc" @@ -222,6 +221,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "40c24a43bb5ecd05c62d6b80cbd752cf6dc6291cae217936fa288e1dbb39c09f" + inputs-digest = "e967c53b1d55018feac5c3db0e1c142cb63042311aac76206a47ce7c0cd8316c" solver-name = "gps-cdcl" solver-version = 1 diff --git a/vendor/github.com/containers/virtcontainers/Gopkg.toml b/vendor/github.com/containers/virtcontainers/Gopkg.toml index 682f159..ca0638c 100644 --- a/vendor/github.com/containers/virtcontainers/Gopkg.toml +++ b/vendor/github.com/containers/virtcontainers/Gopkg.toml @@ -60,7 +60,11 @@ [[constraint]] name = "github.com/kata-containers/agent" - revision = "11f8b0ad0a3bb5771b88cbccd0a9c7dca5fd8e56" + revision = "33eecb2a445f906811a5bc9713d2dafd10768d18" + +[[constraint]] + name = "github.com/containerd/cri-containerd" + revision = "3d382e2f5dabe3bae62ceb9ded56bdee847008ee" [prune] non-go = true diff --git a/vendor/github.com/containers/virtcontainers/README.md b/vendor/github.com/containers/virtcontainers/README.md index e27d0cb..284c2e9 100644 --- a/vendor/github.com/containers/virtcontainers/README.md +++ b/vendor/github.com/containers/virtcontainers/README.md @@ -29,6 +29,7 @@ Table of Contents * [How to check if container uses devicemapper block device as its rootfs](#how-to-check-if-container-uses-devicemapper-block-device-as-its-rootfs) * [Devices](#devices) * [How to pass a device using VFIO-passthrough](#how-to-pass-a-device-using-vfio-passthrough) + * [Developers](#developers) # What is it ? @@ -346,3 +347,8 @@ PCI devices. The driver for the device needs to be present within the Clear Containers kernel. If the driver is missing, you can add it to your custom container kernel using the [osbuilder](https://github.com/clearcontainers/osbuilder) tooling. + +# Developers + +For information on how to build, develop and test `virtcontainers`, see the +[developer documentation](documentation/Developers.md). diff --git a/vendor/github.com/containers/virtcontainers/api_test.go b/vendor/github.com/containers/virtcontainers/api_test.go index 92a20f9..14c952d 100644 --- a/vendor/github.com/containers/virtcontainers/api_test.go +++ b/vendor/github.com/containers/virtcontainers/api_test.go @@ -883,6 +883,7 @@ func TestStatusPodSuccessfulStateReady(t *testing.T) { DefaultMemSz: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxQemuVCPUs, } expectedStatus := PodStatus{ @@ -938,6 +939,7 @@ func TestStatusPodSuccessfulStateRunning(t *testing.T) { DefaultMemSz: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxQemuVCPUs, } expectedStatus := PodStatus{ diff --git a/vendor/github.com/containers/virtcontainers/container.go b/vendor/github.com/containers/virtcontainers/container.go index a1d302e..ebd277b 100644 --- a/vendor/github.com/containers/virtcontainers/container.go +++ b/vendor/github.com/containers/virtcontainers/container.go @@ -58,6 +58,16 @@ type ContainerStatus struct { Annotations map[string]string } +// ContainerResources describes container resources +type ContainerResources struct { + // CPUQuota specifies the total amount of time in microseconds + // The number of microseconds per CPUPeriod that the container is guaranteed CPU access + CPUQuota int64 + + // CPUPeriod specifies the CPU CFS scheduler period of time in microseconds + CPUPeriod uint64 +} + // ContainerConfig describes one container runtime configuration. type ContainerConfig struct { ID string @@ -80,6 +90,9 @@ type ContainerConfig struct { // Device configuration for devices that must be available within the container. DeviceInfos []DeviceInfo + + // Resources container resources + Resources ContainerResources } // valid checks that the container configuration is valid. @@ -436,6 +449,10 @@ func createContainer(pod *Pod, contConfig ContainerConfig) (*Container, error) { return nil, err } + if err := c.addResources(); err != nil { + return nil, err + } + // Deduce additional system mount info that should be handled by the agent // inside the VM c.getSystemMountInfo() @@ -591,6 +608,10 @@ func (c *Container) stop() error { return err } + if err := c.removeResources(); err != nil { + return err + } + if err := c.detachDevices(); err != nil { return err } @@ -754,3 +775,37 @@ func (c *Container) detachDevices() error { return nil } + +func (c *Container) addResources() error { + //TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578 + if c.config == nil { + return nil + } + + vCPUs := ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod) + if vCPUs != 0 { + virtLog.Debugf("hot adding %d vCPUs", vCPUs) + if err := c.pod.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil { + return err + } + } + + return nil +} + +func (c *Container) removeResources() error { + //TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578 + if c.config == nil { + return nil + } + + vCPUs := ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod) + if vCPUs != 0 { + virtLog.Debugf("hot removing %d vCPUs", vCPUs) + if err := c.pod.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil { + return err + } + } + + return nil +} diff --git a/vendor/github.com/containers/virtcontainers/container_test.go b/vendor/github.com/containers/virtcontainers/container_test.go index 4aef3ec..a2e7a06 100644 --- a/vendor/github.com/containers/virtcontainers/container_test.go +++ b/vendor/github.com/containers/virtcontainers/container_test.go @@ -26,6 +26,7 @@ import ( "syscall" "testing" + vcAnnotations "github.com/containers/virtcontainers/pkg/annotations" "github.com/stretchr/testify/assert" ) @@ -281,3 +282,53 @@ func TestCheckPodRunningSuccessful(t *testing.T) { err := c.checkPodRunning("test_cmd") assert.Nil(t, err, "%v", err) } + +func TestContainerAddResources(t *testing.T) { + assert := assert.New(t) + + c := &Container{} + err := c.addResources() + assert.Nil(err) + + c.config = &ContainerConfig{Annotations: make(map[string]string)} + c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodSandbox) + err = c.addResources() + assert.Nil(err) + + c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodContainer) + err = c.addResources() + assert.Nil(err) + + c.config.Resources = ContainerResources{ + CPUQuota: 5000, + CPUPeriod: 1000, + } + c.pod = &Pod{hypervisor: &mockHypervisor{}} + err = c.addResources() + assert.Nil(err) +} + +func TestContainerRemoveResources(t *testing.T) { + assert := assert.New(t) + + c := &Container{} + err := c.addResources() + assert.Nil(err) + + c.config = &ContainerConfig{Annotations: make(map[string]string)} + c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodSandbox) + err = c.removeResources() + assert.Nil(err) + + c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodContainer) + err = c.removeResources() + assert.Nil(err) + + c.config.Resources = ContainerResources{ + CPUQuota: 5000, + CPUPeriod: 1000, + } + c.pod = &Pod{hypervisor: &mockHypervisor{}} + err = c.removeResources() + assert.Nil(err) +} diff --git a/vendor/github.com/containers/virtcontainers/filesystem.go b/vendor/github.com/containers/virtcontainers/filesystem.go index 61db4a5..69e9adf 100644 --- a/vendor/github.com/containers/virtcontainers/filesystem.go +++ b/vendor/github.com/containers/virtcontainers/filesystem.go @@ -88,7 +88,7 @@ const mountsFile = "mounts.json" const devicesFile = "devices.json" // dirMode is the permission bits used for creating a directory -const dirMode = os.FileMode(0750) +const dirMode = os.FileMode(0750) | os.ModeDir // storagePathSuffix is the suffix used for all storage paths const storagePathSuffix = "/virtcontainers/pods" @@ -154,7 +154,7 @@ func (fs *filesystem) Logger() *logrus.Entry { func (fs *filesystem) createAllResources(pod Pod) (err error) { for _, resource := range []podResource{stateFileType, configFileType} { _, path, _ := fs.podURI(pod.id, resource) - err = os.MkdirAll(path, os.ModeDir) + err = os.MkdirAll(path, dirMode) if err != nil { return err } @@ -163,7 +163,7 @@ func (fs *filesystem) createAllResources(pod Pod) (err error) { for _, container := range pod.containers { for _, resource := range []podResource{stateFileType, configFileType} { _, path, _ := fs.containerURI(pod.id, container.id, resource) - err = os.MkdirAll(path, os.ModeDir) + err = os.MkdirAll(path, dirMode) if err != nil { fs.deletePodResources(pod.id, nil) return err diff --git a/vendor/github.com/containers/virtcontainers/filesystem_test.go b/vendor/github.com/containers/virtcontainers/filesystem_test.go index ab8a679..5b00461 100644 --- a/vendor/github.com/containers/virtcontainers/filesystem_test.go +++ b/vendor/github.com/containers/virtcontainers/filesystem_test.go @@ -80,16 +80,27 @@ func TestFilesystemCreateAllResourcesSuccessful(t *testing.T) { for _, container := range contConfigs { configPath := filepath.Join(configStoragePath, testPodID, container.ID) - _, err = os.Stat(configPath) + s, err := os.Stat(configPath) if err != nil { t.Fatal(err) } + // Check we created the dirs with the correct mode + if s.Mode() != dirMode { + t.Fatal(fmt.Errorf("dirmode [%v] != expected [%v]", s.Mode(), dirMode)) + } + runPath := filepath.Join(runStoragePath, testPodID, container.ID) - _, err = os.Stat(runPath) + s, err = os.Stat(runPath) if err != nil { t.Fatal(err) } + + // Check we created the dirs with the correct mode + if s.Mode() != dirMode { + t.Fatal(fmt.Errorf("dirmode [%v] != expected [%v]", s.Mode(), dirMode)) + } + } } diff --git a/vendor/github.com/containers/virtcontainers/hyperstart_agent.go b/vendor/github.com/containers/virtcontainers/hyperstart_agent.go index ae3f908..29a73e7 100644 --- a/vendor/github.com/containers/virtcontainers/hyperstart_agent.go +++ b/vendor/github.com/containers/virtcontainers/hyperstart_agent.go @@ -27,6 +27,7 @@ import ( proxyClient "github.com/clearcontainers/proxy/client" "github.com/containers/virtcontainers/pkg/hyperstart" + ns "github.com/containers/virtcontainers/pkg/nsenter" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" ) @@ -314,7 +315,19 @@ func (h *hyper) exec(pod *Pod, c Container, cmd Cmd) (*Process, error) { Process: *hyperProcess, } - process, err := prepareAndStartShim(pod, h.shim, c.id, token, h.state.URL, cmd) + enterNSList := []ns.Namespace{ + { + PID: c.process.Pid, + Type: ns.NSTypeNet, + }, + { + PID: c.process.Pid, + Type: ns.NSTypePID, + }, + } + + process, err := prepareAndStartShim(pod, h.shim, c.id, + token, h.state.URL, cmd, []ns.NSType{}, enterNSList) if err != nil { return nil, err } @@ -411,6 +424,13 @@ func (h *hyper) startOneContainer(pod Pod, c *Container) error { Process: process, } + if c.config.Resources.CPUQuota != 0 && c.config.Resources.CPUPeriod != 0 { + container.Constraints = hyperstart.Constraints{ + CPUQuota: c.config.Resources.CPUQuota, + CPUPeriod: c.config.Resources.CPUPeriod, + } + } + container.SystemMountsInfo.BindMountDev = c.systemMountsInfo.BindMountDev if c.state.Fstype != "" { @@ -488,7 +508,17 @@ func (h *hyper) createContainer(pod *Pod, c *Container) (*Process, error) { return nil, err } - return prepareAndStartShim(pod, h.shim, c.id, token, h.state.URL, c.config.Cmd) + createNSList := []ns.NSType{ns.NSTypePID} + + enterNSList := []ns.Namespace{ + { + Path: pod.networkNS.NetNsPath, + Type: ns.NSTypeNet, + }, + } + + return prepareAndStartShim(pod, h.shim, c.id, token, + h.state.URL, c.config.Cmd, createNSList, enterNSList) } // startContainer is the agent Container starting implementation for hyperstart. diff --git a/vendor/github.com/containers/virtcontainers/hypervisor.go b/vendor/github.com/containers/virtcontainers/hypervisor.go index a39fcba..c9e32cd 100644 --- a/vendor/github.com/containers/virtcontainers/hypervisor.go +++ b/vendor/github.com/containers/virtcontainers/hypervisor.go @@ -51,6 +51,9 @@ const ( defaultBlockDriver = VirtioSCSI ) +// In some architectures the maximum number of vCPUs depends on the number of physical cores. +var defaultMaxQemuVCPUs = maxQemuVCPUs() + // deviceType describes a virtualized device type. type deviceType int @@ -81,6 +84,9 @@ const ( // vhostuserDev is a Vhost-user device type vhostuserDev + + // CPUDevice is CPU device type + cpuDev ) // Set sets an hypervisor type based on the input string. @@ -179,6 +185,9 @@ type HypervisorConfig struct { // Pod configuration VMConfig.VCPUs overwrites this. DefaultVCPUs uint32 + //DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM. + DefaultMaxVCPUs uint32 + // DefaultMem specifies default memory size in MiB for the VM. // Pod configuration VMConfig.Memory overwrites this. DefaultMemSz uint32 @@ -237,6 +246,10 @@ func (conf *HypervisorConfig) valid() (bool, error) { conf.BlockDeviceDriver = defaultBlockDriver } + if conf.DefaultMaxVCPUs == 0 { + conf.DefaultMaxVCPUs = defaultMaxQemuVCPUs + } + return true, nil } diff --git a/vendor/github.com/containers/virtcontainers/hypervisor_test.go b/vendor/github.com/containers/virtcontainers/hypervisor_test.go index c6619f9..a14892b 100644 --- a/vendor/github.com/containers/virtcontainers/hypervisor_test.go +++ b/vendor/github.com/containers/virtcontainers/hypervisor_test.go @@ -181,6 +181,7 @@ func TestHypervisorConfigDefaults(t *testing.T) { DefaultMemSz: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxQemuVCPUs, } if reflect.DeepEqual(hypervisorConfig, hypervisorConfigDefaultsExpected) == false { t.Fatal() diff --git a/vendor/github.com/containers/virtcontainers/kata_agent.go b/vendor/github.com/containers/virtcontainers/kata_agent.go index eca261e..ebc3dc3 100644 --- a/vendor/github.com/containers/virtcontainers/kata_agent.go +++ b/vendor/github.com/containers/virtcontainers/kata_agent.go @@ -28,6 +28,7 @@ import ( "syscall" vcAnnotations "github.com/containers/virtcontainers/pkg/annotations" + ns "github.com/containers/virtcontainers/pkg/nsenter" "github.com/containers/virtcontainers/pkg/uuid" kataclient "github.com/kata-containers/agent/protocols/client" "github.com/kata-containers/agent/protocols/grpc" @@ -42,12 +43,13 @@ var ( defaultKataID = "charch0" errorMissingProxy = errors.New("Missing proxy pointer") errorMissingOCISpec = errors.New("Missing OCI specification") - kataHostSharedDir = "/tmp/kata-containers/shared/pods/" - kataGuestSharedDir = "/tmp/kata-containers/shared/pods/" + kataHostSharedDir = "/run/kata-containers/shared/pods/" + kataGuestSharedDir = "/run/kata-containers/shared/containers/" mountGuest9pTag = "kataShared" type9pFs = "9p" devPath = "/dev" vsockSocketScheme = "vsock" + kataBlkDevType = "blk" ) // KataAgentConfig is a structure storing information needed @@ -297,7 +299,19 @@ func (k *kataAgent) exec(pod *Pod, c Container, cmd Cmd) (*Process, error) { return nil, err } - return prepareAndStartShim(pod, k.shim, c.id, req.ExecId, k.state.URL, cmd) + enterNSList := []ns.Namespace{ + { + PID: c.process.Pid, + Type: ns.NSTypeNet, + }, + { + PID: c.process.Pid, + Type: ns.NSTypePID, + }, + } + + return prepareAndStartShim(pod, k.shim, c.id, req.ExecId, + k.state.URL, cmd, []ns.NSType{}, enterNSList) } func (k *kataAgent) generateInterfacesAndRoutes(networkNS NetworkNamespace) ([]*grpc.Interface, []*grpc.Route, error) { @@ -569,7 +583,8 @@ func (k *kataAgent) createContainer(pod *Pod, c *Container) (*Process, error) { return nil, errorMissingOCISpec } - var containerStorage []*grpc.Storage + var ctrStorages []*grpc.Storage + var ctrDevices []*grpc.Device // The rootfs storage volume represents the container rootfs // mount point inside the guest. @@ -579,7 +594,8 @@ func (k *kataAgent) createContainer(pod *Pod, c *Container) (*Process, error) { rootfs := &grpc.Storage{} // This is the guest absolute root path for that container. - rootPath := filepath.Join(kataGuestSharedDir, c.id, rootfsDir) + rootPathParent := filepath.Join(kataGuestSharedDir, c.id) + rootPath := filepath.Join(rootPathParent, rootfsDir) if c.state.Fstype != "" { // This is a block based device rootfs. @@ -588,16 +604,34 @@ func (k *kataAgent) createContainer(pod *Pod, c *Container) (*Process, error) { if err != nil { return nil, err } + virtPath := filepath.Join(devPath, driveName) + + // Create a new device with empty ContainerPath so that we get + // the device being waited for by the agent inside the VM, + // without trying to match and update it into the OCI spec list + // of actual devices. The device corresponding to the rootfs is + // a very specific case. + rootfsDevice := &grpc.Device{ + Type: kataBlkDevType, + VmPath: virtPath, + ContainerPath: "", + } + + ctrDevices = append(ctrDevices, rootfsDevice) - rootfs.Source = filepath.Join(devPath, driveName) - rootfs.MountPoint = rootPath // Should we remove the "rootfs" suffix? + rootfs.Source = virtPath + rootfs.MountPoint = rootPathParent rootfs.Fstype = c.state.Fstype + if c.state.Fstype == "xfs" { + rootfs.Options = []string{"nouuid"} + } + // Add rootfs to the list of container storage. // We only need to do this for block based rootfs, as we // want the agent to mount it into the right location - // (/tmp/kata-containers/shared/pods/podID/ctrID/ - containerStorage = append(containerStorage, rootfs) + // (kataGuestSharedDir/ctrID/ + ctrStorages = append(ctrStorages, rootfs) } else { // This is not a block based device rootfs. @@ -605,9 +639,9 @@ func (k *kataAgent) createContainer(pod *Pod, c *Container) (*Process, error) { // shared drive between the host and the guest. // With 9pfs we don't need to ask the agent to // mount the rootfs as the shared directory - // (/tmp/kata-containers/shared/pods/) is already - // mounted in the guest. We only need to mount the - // rootfs from the host and it will show up in the guest. + // (kataGuestSharedDir) is already mounted in the + // guest. We only need to mount the rootfs from + // the host and it will show up in the guest. if err := bindMountContainerRootfs(kataHostSharedDir, pod.id, c.id, c.rootFs, false); err != nil { bindUnmountAllRootfs(kataHostSharedDir, *pod) return nil, err @@ -652,18 +686,20 @@ func (k *kataAgent) createContainer(pod *Pod, c *Container) (*Process, error) { continue } - deviceStorage := &grpc.Storage{ - Source: d.VirtPath, - MountPoint: d.DeviceInfo.ContainerPath, + kataDevice := &grpc.Device{ + Type: kataBlkDevType, + VmPath: d.VirtPath, + ContainerPath: d.DeviceInfo.ContainerPath, } - containerStorage = append(containerStorage, deviceStorage) + ctrDevices = append(ctrDevices, kataDevice) } req := &grpc.CreateContainerRequest{ ContainerId: c.id, ExecId: c.id, - Storages: containerStorage, + Storages: ctrStorages, + Devices: ctrDevices, OCI: grpcSpec, } @@ -671,7 +707,17 @@ func (k *kataAgent) createContainer(pod *Pod, c *Container) (*Process, error) { return nil, err } - return prepareAndStartShim(pod, k.shim, c.id, req.ExecId, k.state.URL, c.config.Cmd) + createNSList := []ns.NSType{ns.NSTypePID} + + enterNSList := []ns.Namespace{ + { + Path: pod.networkNS.NetNsPath, + Type: ns.NSTypeNet, + }, + } + + return prepareAndStartShim(pod, k.shim, c.id, req.ExecId, + k.state.URL, c.config.Cmd, createNSList, enterNSList) } func (k *kataAgent) startContainer(pod Pod, c *Container) error { diff --git a/vendor/github.com/containers/virtcontainers/network.go b/vendor/github.com/containers/virtcontainers/network.go index 8b710f5..41ef49c 100644 --- a/vendor/github.com/containers/virtcontainers/network.go +++ b/vendor/github.com/containers/virtcontainers/network.go @@ -1257,8 +1257,10 @@ func createEndpointsFromScan(networkNSPath string, config NetworkConfig) ([]Endp cnmLogger().WithField("interface", netInfo.Iface.Name).Info("Physical network interface found") endpoint, err = createPhysicalEndpoint(netInfo) } else { + var socketPath string + // Check if this is a dummy interface which has a vhost-user socket associated with it - socketPath, err := vhostUserSocketPath(netInfo) + socketPath, err = vhostUserSocketPath(netInfo) if err != nil { return err } diff --git a/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go b/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go index 035f3d7..eeed10d 100644 --- a/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go +++ b/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go @@ -202,6 +202,16 @@ type SystemMountsInfo struct { DevShmSize int `json:"devShmSize"` } +// Constraints describes the constrains for a container +type Constraints struct { + // CPUQuota specifies the total amount of time in microseconds + // The number of microseconds per CPUPeriod that the container is guaranteed CPU access + CPUQuota int64 + + // CPUPeriod specifies the CPU CFS scheduler period of time in microseconds + CPUPeriod uint64 +} + // Container describes a container running on a pod. type Container struct { ID string `json:"id"` @@ -216,6 +226,7 @@ type Container struct { RestartPolicy string `json:"restartPolicy"` Initialize bool `json:"initialize"` SystemMountsInfo SystemMountsInfo `json:"systemMountsInfo"` + Constraints Constraints `json:"constraints"` } // IPAddress describes an IP address and its network mask. diff --git a/vendor/github.com/containers/virtcontainers/qemu.go b/vendor/github.com/containers/virtcontainers/qemu.go index bd268e5..0e139f4 100644 --- a/vendor/github.com/containers/virtcontainers/qemu.go +++ b/vendor/github.com/containers/virtcontainers/qemu.go @@ -35,10 +35,18 @@ type qmpChannel struct { qmp *govmmQemu.QMP } +// CPUDevice represents a CPU device which was hot-added in a running VM +type CPUDevice struct { + // ID is used to identify this CPU in the hypervisor options. + ID string +} + // QemuState keeps Qemu's state type QemuState struct { Bridges []Bridge - UUID string + // HotpluggedCPUs is the list of CPUs that were hot-added + HotpluggedVCPUs []CPUDevice + UUID string } // qemu is an Hypervisor interface implementation for the Linux qemu hypervisor. @@ -593,6 +601,9 @@ func (q *qemu) hotplugBlockDevice(drive Drive, op operation) error { if q.config.BlockDeviceDriver == VirtioBlock { driver := "virtio-blk-pci" addr, bus, err := q.addDeviceToBridge(drive.ID) + if err != nil { + return err + } if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bus); err != nil { return err @@ -631,8 +642,11 @@ func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operati case blockDev: drive := devInfo.(Drive) return q.hotplugBlockDevice(drive, op) + case cpuDev: + vcpus := devInfo.(uint32) + return q.hotplugCPUs(vcpus, op) default: - return fmt.Errorf("Only hotplug for block devices supported for now, provided device type : %v", devType) + return fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) } } @@ -652,6 +666,104 @@ func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) erro return q.pod.storage.storeHypervisorState(q.pod.id, q.state) } +func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error { + if vcpus == 0 { + q.Logger().Warnf("cannot hotplug 0 vCPUs") + return nil + } + + defer func(qemu *qemu) { + if q.qmpMonitorCh.qmp != nil { + q.qmpMonitorCh.qmp.Shutdown() + } + }(q) + + qmp, err := q.qmpSetup() + if err != nil { + return err + } + + q.qmpMonitorCh.qmp = qmp + + if op == addDevice { + return q.hotplugAddCPUs(vcpus) + } + + return q.hotplugRemoveCPUs(vcpus) +} + +func (q *qemu) hotplugAddCPUs(amount uint32) error { + currentVCPUs := q.qemuConfig.SMP.CPUs + uint32(len(q.state.HotpluggedVCPUs)) + + // Don't exceed the maximum amount of vCPUs + if currentVCPUs+amount > q.config.DefaultMaxVCPUs { + return fmt.Errorf("Unable to hotplug %d CPUs, currently this POD has %d CPUs and the maximum amount of CPUs is %d", + amount, currentVCPUs, q.config.DefaultMaxVCPUs) + } + + // get the list of hotpluggable CPUs + hotpluggableVCPUs, err := q.qmpMonitorCh.qmp.ExecuteQueryHotpluggableCPUs(q.qmpMonitorCh.ctx) + if err != nil { + return fmt.Errorf("failed to query hotpluggable CPUs: %v", err) + } + + var hotpluggedVCPUs uint32 + for _, hc := range hotpluggableVCPUs { + // qom-path is the path to the CPU, non-empty means that this CPU is already in use + if hc.QOMPath != "" { + continue + } + + // CPU type, i.e host-x86_64-cpu + driver := hc.Type + cpuID := fmt.Sprintf("cpu-%d", len(q.state.HotpluggedVCPUs)) + socketID := fmt.Sprintf("%d", hc.Properties.Socket) + coreID := fmt.Sprintf("%d", hc.Properties.Core) + threadID := fmt.Sprintf("%d", hc.Properties.Thread) + if err := q.qmpMonitorCh.qmp.ExecuteCPUDeviceAdd(q.qmpMonitorCh.ctx, driver, cpuID, socketID, coreID, threadID); err != nil { + // don't fail, let's try with other CPU + continue + } + + // a new vCPU was added, update list of hotplugged vCPUs and check if all vCPUs were added + q.state.HotpluggedVCPUs = append(q.state.HotpluggedVCPUs, CPUDevice{cpuID}) + hotpluggedVCPUs++ + if hotpluggedVCPUs == amount { + // All vCPUs were hotplugged + return q.pod.storage.storeHypervisorState(q.pod.id, q.state) + } + } + + // All vCPUs were NOT hotplugged + if err := q.pod.storage.storeHypervisorState(q.pod.id, q.state); err != nil { + q.Logger().Errorf("failed to save hypervisor state after hotplug %d vCPUs: %v", hotpluggedVCPUs, err) + } + + return fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount) +} + +func (q *qemu) hotplugRemoveCPUs(amount uint32) error { + hotpluggedVCPUs := uint32(len(q.state.HotpluggedVCPUs)) + + // we can only remove hotplugged vCPUs + if amount > hotpluggedVCPUs { + return fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs) + } + + for i := uint32(0); i < amount; i++ { + // get the last vCPUs and try to remove it + cpu := q.state.HotpluggedVCPUs[len(q.state.HotpluggedVCPUs)-1] + if err := q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, cpu.ID); err != nil { + return fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err) + } + + // remove from the list the vCPU hotunplugged + q.state.HotpluggedVCPUs = q.state.HotpluggedVCPUs[:len(q.state.HotpluggedVCPUs)-1] + } + + return q.pod.storage.storeHypervisorState(q.pod.id, q.state) +} + func (q *qemu) pausePod() error { return q.togglePausePod(true) } diff --git a/vendor/github.com/containers/virtcontainers/qemu_amd64.go b/vendor/github.com/containers/virtcontainers/qemu_amd64.go index 09ab2b8..6e978a6 100644 --- a/vendor/github.com/containers/virtcontainers/qemu_amd64.go +++ b/vendor/github.com/containers/virtcontainers/qemu_amd64.go @@ -78,6 +78,11 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } +// returns the maximum number of vCPUs supported +func maxQemuVCPUs() uint32 { + return uint32(240) +} + func newQemuArch(machineType string) qemuArch { if machineType == "" { machineType = defaultQemuMachineType diff --git a/vendor/github.com/containers/virtcontainers/qemu_arch_base.go b/vendor/github.com/containers/virtcontainers/qemu_arch_base.go index 02999b1..68c79c9 100644 --- a/vendor/github.com/containers/virtcontainers/qemu_arch_base.go +++ b/vendor/github.com/containers/virtcontainers/qemu_arch_base.go @@ -213,6 +213,7 @@ func (q *qemuArchBase) cpuTopology(vcpus uint32) govmmQemu.SMP { Sockets: vcpus, Cores: defaultCores, Threads: defaultThreads, + MaxCPUs: defaultMaxQemuVCPUs, } return smp diff --git a/vendor/github.com/containers/virtcontainers/qemu_arm64.go b/vendor/github.com/containers/virtcontainers/qemu_arm64.go index 4af471c..620401c 100644 --- a/vendor/github.com/containers/virtcontainers/qemu_arm64.go +++ b/vendor/github.com/containers/virtcontainers/qemu_arm64.go @@ -16,7 +16,11 @@ package virtcontainers -import govmmQemu "github.com/intel/govmm/qemu" +import ( + "runtime" + + govmmQemu "github.com/intel/govmm/qemu" +) type qemuArm64 struct { // inherit from qemuArchBase, overwrite methods if needed @@ -46,6 +50,11 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } +// returns the maximum number of vCPUs supported +func maxQemuVCPUs() uint32 { + return uint32(runtime.NumCPU()) +} + func newQemuArch(machineType string) qemuArch { if machineType == "" { machineType = defaultQemuMachineType diff --git a/vendor/github.com/containers/virtcontainers/qemu_test.go b/vendor/github.com/containers/virtcontainers/qemu_test.go index ac907b4..805a697 100644 --- a/vendor/github.com/containers/virtcontainers/qemu_test.go +++ b/vendor/github.com/containers/virtcontainers/qemu_test.go @@ -37,6 +37,7 @@ func newQemuConfig() HypervisorConfig { DefaultMemSz: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxQemuVCPUs, } } @@ -142,6 +143,7 @@ func TestQemuCPUTopology(t *testing.T) { Sockets: uint32(vcpus), Cores: defaultCores, Threads: defaultThreads, + MaxCPUs: defaultMaxQemuVCPUs, } vmConfig := Resources{ diff --git a/vendor/github.com/containers/virtcontainers/shim.go b/vendor/github.com/containers/virtcontainers/shim.go index ebd2ae6..e34ce6e 100644 --- a/vendor/github.com/containers/virtcontainers/shim.go +++ b/vendor/github.com/containers/virtcontainers/shim.go @@ -23,6 +23,7 @@ import ( "syscall" "time" + ns "github.com/containers/virtcontainers/pkg/nsenter" "github.com/mitchellh/mapstructure" "github.com/sirupsen/logrus" ) @@ -54,6 +55,8 @@ type ShimParams struct { Terminal bool Detach bool PID int + CreateNS []ns.NSType + EnterNS []ns.Namespace } // ShimConfig is the structure providing specific configuration @@ -151,7 +154,8 @@ func stopShim(pid int) error { return nil } -func prepareAndStartShim(pod *Pod, shim shim, cid, token, url string, cmd Cmd) (*Process, error) { +func prepareAndStartShim(pod *Pod, shim shim, cid, token, url string, cmd Cmd, + createNSList []ns.NSType, enterNSList []ns.Namespace) (*Process, error) { process := &Process{ Token: token, StartTime: time.Now().UTC(), @@ -164,13 +168,12 @@ func prepareAndStartShim(pod *Pod, shim shim, cid, token, url string, cmd Cmd) ( Console: cmd.Console, Terminal: cmd.Interactive, Detach: cmd.Detach, + CreateNS: createNSList, + EnterNS: enterNSList, } - var pid int - if err := pod.network.run(pod.networkNS.NetNsPath, func() (shimErr error) { - pid, shimErr = shim.start(*pod, shimParams) - return - }); err != nil { + pid, err := shim.start(*pod, shimParams) + if err != nil { return nil, err } @@ -188,6 +191,15 @@ func startShim(args []string, params ShimParams) (int, error) { cmd.Stderr = os.Stderr } + cloneFlags := 0 + for _, nsType := range params.CreateNS { + cloneFlags |= ns.CloneFlagsTable[nsType] + } + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: uintptr(cloneFlags), + } + var f *os.File var err error if params.Console != "" { @@ -199,15 +211,11 @@ func startShim(args []string, params ShimParams) (int, error) { cmd.Stdin = f cmd.Stdout = f cmd.Stderr = f - cmd.SysProcAttr = &syscall.SysProcAttr{ - // Create Session - Setsid: true, - - // Set Controlling terminal to Ctty - Setctty: true, - Ctty: int(f.Fd()), - } - + // Create Session + cmd.SysProcAttr.Setsid = true + // Set Controlling terminal to Ctty + cmd.SysProcAttr.Setctty = true + cmd.SysProcAttr.Ctty = int(f.Fd()) } defer func() { if f != nil { @@ -215,7 +223,9 @@ func startShim(args []string, params ShimParams) (int, error) { } }() - if err := cmd.Start(); err != nil { + if err := ns.NsEnter(params.EnterNS, func() error { + return cmd.Start() + }); err != nil { return -1, err } diff --git a/vendor/github.com/containers/virtcontainers/utils.go b/vendor/github.com/containers/virtcontainers/utils.go index 7fcc66d..69e63f5 100644 --- a/vendor/github.com/containers/virtcontainers/utils.go +++ b/vendor/github.com/containers/virtcontainers/utils.go @@ -96,3 +96,18 @@ func writeToFile(path string, data []byte) error { return nil } + +// ConstraintsToVCPUs converts CPU quota and period to vCPUs +func ConstraintsToVCPUs(quota int64, period uint64) uint { + if quota != 0 && period != 0 { + // Use some math magic to round up to the nearest whole vCPU + // (that is, a partial part of a quota request ends up assigning + // a whole vCPU, for instance, a request of 1.5 'cpu quotas' + // will give 2 vCPUs). + // This also has the side effect that we will always allocate + // at least 1 vCPU. + return uint((uint64(quota) + (period - 1)) / period) + } + + return 0 +} diff --git a/vendor/github.com/containers/virtcontainers/utils_test.go b/vendor/github.com/containers/virtcontainers/utils_test.go index 7dbe695..dac0a1a 100644 --- a/vendor/github.com/containers/virtcontainers/utils_test.go +++ b/vendor/github.com/containers/virtcontainers/utils_test.go @@ -162,3 +162,20 @@ func TestWriteToFile(t *testing.T) { assert.True(t, reflect.DeepEqual(testData, data)) } + +func TestConstraintsToVCPUs(t *testing.T) { + assert := assert.New(t) + + vcpus := ConstraintsToVCPUs(0, 100) + assert.Zero(vcpus) + + vcpus = ConstraintsToVCPUs(100, 0) + assert.Zero(vcpus) + + expectedVCPUs := uint(4) + vcpus = ConstraintsToVCPUs(4000, 1000) + assert.Equal(expectedVCPUs, vcpus) + + vcpus = ConstraintsToVCPUs(4000, 1200) + assert.Equal(expectedVCPUs, vcpus) +}