Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

User namespace #3

Draft
wants to merge 7 commits into
base: rata/sidecar-ordering-annotations-1.17
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/kubelet/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ go_library(
"kubelet_node_status_windows.go",
"kubelet_pods.go",
"kubelet_resources.go",
"kubelet_usernamespace_linux.go",
"kubelet_usernamespace_others.go",
"kubelet_volumes.go",
"pod_container_deletor.go",
"pod_workers.go",
Expand Down
62 changes: 62 additions & 0 deletions pkg/kubelet/container/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ type Runtime interface {
// This method just proxies a new runtimeConfig with the updated
// CIDR value down to the runtime shim.
UpdatePodCIDR(podCIDR string) error
// GetRuntimeConfigInfo returns runtime's configuration details, eg: if user-namespaces are enabled or not
GetRuntimeConfigInfo() (*RuntimeConfigInfo, error)
// GetHostUID returns the uid from the host usernamespace that is mapped to the container usernamespace uid, containerUID
GetHostUID(containerUID int) (int, error)
// GetHostGID returns the gid from the host usernamespace that is mapped to the container usernamespace gid, containerGID
GetHostGID(containerGID int) (int, error)
}

// StreamingRuntime is the interface implemented by runtimes that handle the serving of the
Expand Down Expand Up @@ -465,6 +471,62 @@ type RuntimeStatus struct {
Conditions []RuntimeCondition
}

// RuntimeConfigInfo contains runtime's configuration details, eg: user-namespaces mapping between host and container
type RuntimeConfigInfo struct {
UserNamespaceConfig UserNamespaceConfigInfo
}

// UserNamespaceConfigInfo contains runtime's user-namespace configuration
type UserNamespaceConfigInfo struct {
UidMappings []*UserNSMapping
GidMappings []*UserNSMapping
}

// UserNSMaping represents mapping of user-namespaces between host and container
type UserNSMapping struct {
ContainerID uint32
HostID uint32
Size uint32
}

// IsUserNamespaceEnabled returns true if user-namespace feature is enabled at runtime
func (c *RuntimeConfigInfo) IsUserNamespaceEnabled() bool {
if len(c.UserNamespaceConfig.UidMappings) == 1 &&
c.UserNamespaceConfig.UidMappings[0].HostID == uint32(0) && c.UserNamespaceConfig.UidMappings[0].Size == uint32(4294967295) {
return false
}
return true
}

// IsUserNamespaceSupported returns true if user-namespace feature is supported at runtime
func (c *RuntimeConfigInfo) IsUserNamespaceSupported() bool {
if len(c.UserNamespaceConfig.UidMappings) == 1 &&
c.UserNamespaceConfig.UidMappings[0].HostID == uint32(0) && c.UserNamespaceConfig.UidMappings[0].Size == uint32(0) {
return false
}
return true
}

// GetHostUIDFor returns uid on host usernamespace that is mapped to the given uid in container usernamespace
func (c *RuntimeConfigInfo) GetHostUIDFor(containerUID uint32) (int, error) {
for _, mapping := range c.UserNamespaceConfig.UidMappings {
if containerUID >= mapping.ContainerID && containerUID < mapping.ContainerID+mapping.Size {
return int(mapping.HostID + (containerUID - mapping.ContainerID)), nil
}
}
return -1, fmt.Errorf("IdMapping not found for container usernamespace UID %v", containerUID)
}

// GetHostGIDFor returns gid on host usernamespace that is mapped to the given gid in container usernamespace
func (c *RuntimeConfigInfo) GetHostGIDFor(containerGID uint32) (int, error) {
for _, mapping := range c.UserNamespaceConfig.GidMappings {
if containerGID >= mapping.ContainerID && containerGID < mapping.ContainerID+mapping.Size {
return int(mapping.HostID + (containerGID - mapping.ContainerID)), nil
}
}
return -1, fmt.Errorf("IdMapping not found for container usernamespace GID %v", containerGID)
}

// GetRuntimeCondition gets a specified runtime condition from the runtime status.
func (r *RuntimeStatus) GetRuntimeCondition(t RuntimeConditionType) *RuntimeCondition {
for i := range r.Conditions {
Expand Down
56 changes: 39 additions & 17 deletions pkg/kubelet/container/testing/fake_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,27 @@ type FakePod struct {
// FakeRuntime is a fake container runtime for testing.
type FakeRuntime struct {
sync.Mutex
CalledFunctions []string
PodList []*FakePod
AllPodList []*FakePod
ImageList []kubecontainer.Image
APIPodStatus v1.PodStatus
PodStatus kubecontainer.PodStatus
StartedPods []string
KilledPods []string
StartedContainers []string
KilledContainers []string
RuntimeStatus *kubecontainer.RuntimeStatus
VersionInfo string
APIVersionInfo string
RuntimeType string
Err error
InspectErr error
StatusErr error
CalledFunctions []string
PodList []*FakePod
AllPodList []*FakePod
ImageList []kubecontainer.Image
APIPodStatus v1.PodStatus
PodStatus kubecontainer.PodStatus
StartedPods []string
KilledPods []string
StartedContainers []string
KilledContainers []string
RuntimeStatus *kubecontainer.RuntimeStatus
RuntimeConfigInfo *RuntimeConfigInfo
RuntimeConfigInfoErr error
VersionInfo string
APIVersionInfo string
RuntimeType string
Err error
InspectErr error
StatusErr error
RemappedUID int
RemappedGID int
}

const FakeHost = "localhost:12345"
Expand Down Expand Up @@ -123,6 +127,8 @@ func (f *FakeRuntime) ClearCalls() {
f.StartedContainers = []string{}
f.KilledContainers = []string{}
f.RuntimeStatus = nil
f.RuntimeConfigInfo = nil
f.RuntimeConfigInfoErr = nil
f.VersionInfo = ""
f.RuntimeType = ""
f.Err = nil
Expand Down Expand Up @@ -205,6 +211,22 @@ func (f *FakeRuntime) Status() (*kubecontainer.RuntimeStatus, error) {
return f.RuntimeStatus, f.StatusErr
}

func (f *FakeRuntime) GetRuntimeConfigInfo() (*RuntimeConfigInfo, error) {
f.Lock()
defer f.Unlock()

f.CalledFunctions = append(f.CalledFunctions, "GetRuntimeConfigInfo")
return f.RuntimeConfigInfo, f.RuntimeConfigInfoErr
}

func (f *FakeRuntime) GetRemappedIds() (int, int) {
f.Lock()
defer f.Unlock()

f.CalledFunctions = append(f.CalledFunctions, "GetRemappedIds")
return f.RemappedUID, f.RemappedGID
}

func (f *FakeRuntime) GetPods(all bool) ([]*kubecontainer.Pod, error) {
f.Lock()
defer f.Unlock()
Expand Down
15 changes: 15 additions & 0 deletions pkg/kubelet/container/testing/runtime_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@ func (r *Mock) Status() (*kubecontainer.RuntimeStatus, error) {
return args.Get(0).(*kubecontainer.RuntimeStatus), args.Error(0)
}

func (r *Mock) GetRuntimeConfigInfo() (*RuntimeConfigInfo, error) {
args := r.Called()
return args.Get(0).(*RuntimeConfigInfo), args.Error(1)
}

func (r *Mock) GetHostUID(containerUID int) (int, error) {
args := r.Called(containerUID)
return args.Get(0).(int), args.Error(1)
}

func (r *Mock) GetHostGID(containerGID int) (int, error) {
args := r.Called(containerGID)
return args.Get(0).(int), args.Error(1)
}

func (r *Mock) GetPods(all bool) ([]*kubecontainer.Pod, error) {
args := r.Called(all)
return args.Get(0).([]*kubecontainer.Pod), args.Error(1)
Expand Down
12 changes: 12 additions & 0 deletions pkg/kubelet/dockershim/docker_sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ func (ds *dockerService) PodSandboxStatus(ctx context.Context, req *runtimeapi.P
Network: networkNamespaceMode(r),
Pid: pidNamespaceMode(r),
Ipc: ipcNamespaceMode(r),
User: userNamespaceMode(r),
},
},
},
Expand Down Expand Up @@ -690,6 +691,17 @@ func ipcNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.Namespace
return runtimeapi.NamespaceMode_POD
}

// userNamespaceMode returns the user runtimeapi.NamespaceMode for this container.
// Supports: POD, NODE
func userNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode {
if container != nil && container.HostConfig != nil {
if string(container.HostConfig.UsernsMode) == namespaceModeHost {
return runtimeapi.NamespaceMode_NODE
}
}
return runtimeapi.NamespaceMode_POD
}

func constructPodSandboxCheckpoint(config *runtimeapi.PodSandboxConfig) checkpointmanager.Checkpoint {
data := CheckpointData{}
for _, pm := range config.GetPortMappings() {
Expand Down
108 changes: 108 additions & 0 deletions pkg/kubelet/dockershim/docker_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@ package dockershim
import (
"context"
"fmt"
"io/ioutil"
"net/http"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -81,6 +85,10 @@ const (
// to kubelet behavior and system settings in addition to any API flags that may be introduced.
)

var (
linuxIDMappingRegexp = regexp.MustCompile("([aA-zZ]+):([0-9]+):([0-9]+)")
)

// CRIService includes all methods necessary for a CRI server.
type CRIService interface {
runtimeapi.RuntimeServiceServer
Expand Down Expand Up @@ -319,6 +327,106 @@ type dockerService struct {

// TODO: handle context.

// GetRuntimeConfigInfo returns the runtime config.
func (ds *dockerService) GetRuntimeConfigInfo(_ context.Context, r *runtimeapi.GetRuntimeConfigInfoRequest) (*runtimeapi.GetRuntimeConfigInfoResponse, error) {
dockerInfo, err := ds.client.Info()
if err != nil {
return nil, fmt.Errorf("failed to execute Info() call to the Docker client: %v", err)
}
uidMapping := &runtimeapi.LinuxIDMapping{ContainerId: uint32(0)}
gidMapping := &runtimeapi.LinuxIDMapping{ContainerId: uint32(0)}
Comment on lines +336 to +337

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it really needed to do uint32(0)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess not. Probably just to be explicit.


if isUserNsEnabled(dockerInfo) {
remappedNonRootHostID, err := getRemappedNonRootHostID(dockerInfo)
if err != nil {
return nil, fmt.Errorf("failed to get remappedNonRootHostID: %v", err)
}
uidMappingSize, gidMappingSize, err := getUserNsMappingSizes(remappedNonRootHostID)
if err != nil {
return nil, fmt.Errorf("failed to get user-namespace mapping sizes: %v", err)
}

uidMapping.HostId = remappedNonRootHostID
gidMapping.HostId = remappedNonRootHostID
uidMapping.Size_ = uidMappingSize
gidMapping.Size_ = gidMappingSize
} else {
uidMapping.Size_ = uint32(4294967295)
gidMapping.Size_ = uint32(4294967295)
}

linuxConfig := &runtimeapi.LinuxUserNamespaceConfig{
UidMappings: []*runtimeapi.LinuxIDMapping{uidMapping},
GidMappings: []*runtimeapi.LinuxIDMapping{gidMapping},
}
activeRuntimeConfig := &runtimeapi.ActiveRuntimeConfig{UserNamespaceConfig: linuxConfig}
return &runtimeapi.GetRuntimeConfigInfoResponse{RuntimeConfig: activeRuntimeConfig}, nil
}

// isUserNsEnabled parses docker info. Returns true if user-namespace feature is found to enabled, otherwise false
func isUserNsEnabled(dockerInfo *dockertypes.Info) bool {
for _, secOpt := range dockerInfo.SecurityOptions {
if strings.Contains(secOpt, "userns") {
return true
}
}
return false
}

// getRemappedNonRootHostID parses docker info to determine ID on the host usernamespace which is mapped to {U/G}ID 0 in the container user-namespace
func getRemappedNonRootHostID(dockerInfo *dockertypes.Info) (uint32, error) {
if strings.HasPrefix(dockerInfo.DockerRootDir, "/var/lib/docker/") {
remappedNonRootHostID64, err := strconv.ParseUint(strings.Split(strings.TrimPrefix(dockerInfo.DockerRootDir, "/var/lib/docker/"), ".")[0], 10, 0)
if err != nil {
return uint32(0), fmt.Errorf("failed to parse DockerRootDir, %v: %v", dockerInfo.DockerRootDir, err)
}
return uint32(remappedNonRootHostID64), nil
} else {
return uint32(0), fmt.Errorf("unexpected DockerRootDir, %v. Expected prefixed with '/var/lib/docker' ", dockerInfo.DockerRootDir)
}
}
mauriciovasquezbernal marked this conversation as resolved.
Show resolved Hide resolved

// getUserNsMappingSizes return uid and gid mapping sizes
func getUserNsMappingSizes(remappedNonRootHostID uint32) (uint32, uint32, error) {
mappings, err := ioutil.ReadFile("/etc/subuid")
if err != nil {
return 0, 0, fmt.Errorf("failed to read /etc/subuid: %v", err)
}
uidMappingSize, err := getIDMappingSize(mappings, remappedNonRootHostID)
if err != nil {
return 0, 0, fmt.Errorf("failed to get uid mapping size: %v", err)
}

mappings, err = ioutil.ReadFile("/etc/subgid")
if err != nil {
return 0, 0, fmt.Errorf("failed to read /etc/subgid: %v", err)
}
gidMappingSize, err := getIDMappingSize(mappings, remappedNonRootHostID)
if err != nil {
return 0, 0, fmt.Errorf("failed to get gid mapping size: %v", err)
}
return uidMappingSize, gidMappingSize, nil
}

// getIDMappingSize parses input byte array and returns mapping size
func getIDMappingSize(mappings []byte, hostID uint32) (uint32, error) {
matches := linuxIDMappingRegexp.FindAllSubmatch(mappings, -1)
for _, match := range matches {
uid, err := strconv.ParseUint(string(match[2]), 10, 32)
if err != nil {
return 0, fmt.Errorf("error in parsing linux user-namespace mapping entry: %s", match)
}
if uint32(uid) == hostID {
size, err := strconv.ParseUint(string(match[3]), 10, 32)
if err != nil {
return 0, fmt.Errorf("error in parsing linux user-namespace mapping entry: %s", match)
}
return uint32(size), nil
}
}
return 0, fmt.Errorf("could not find user-namespace mapping entry for ID %v", hostID)
}

// Version returns the runtime name, runtime version and runtime API version
func (ds *dockerService) Version(_ context.Context, r *runtimeapi.VersionRequest) (*runtimeapi.VersionResponse, error) {
v, err := ds.getDockerVersion()
Expand Down
6 changes: 6 additions & 0 deletions pkg/kubelet/dockershim/security_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ func modifyCommonNamespaceOptions(nsOpts *runtimeapi.NamespaceOption, hostConfig

// modifyHostOptionsForSandbox applies NetworkMode/UTSMode to sandbox's dockercontainer.HostConfig.
func modifyHostOptionsForSandbox(nsOpts *runtimeapi.NamespaceOption, network *knetwork.PluginManager, hc *dockercontainer.HostConfig) {
if nsOpts.GetUser() == runtimeapi.NamespaceMode_NODE {
hc.UsernsMode = namespaceModeHost
}
if nsOpts.GetIpc() == runtimeapi.NamespaceMode_NODE {
hc.IpcMode = namespaceModeHost
}
Expand Down Expand Up @@ -199,6 +202,9 @@ func modifyHostOptionsForContainer(nsOpts *runtimeapi.NamespaceOption, podSandbo
hc.NetworkMode = dockercontainer.NetworkMode(sandboxNSMode)
hc.IpcMode = dockercontainer.IpcMode(sandboxNSMode)
hc.UTSMode = ""
if nsOpts.GetUser() == runtimeapi.NamespaceMode_NODE {
hc.UsernsMode = namespaceModeHost
}
mauriciovasquezbernal marked this conversation as resolved.
Show resolved Hide resolved

if nsOpts.GetNetwork() == runtimeapi.NamespaceMode_NODE {
hc.UTSMode = namespaceModeHost
Expand Down
Loading