Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ platform = "linux/amd64"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}
Comment on lines +66 to +68

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At a minimum, this needs to be scoped to nodes where cgroup v2 is used. That's the default on Bottlerocket but it's still possible to switch back to v1. For that, I'd recommend adding a guard helper to schnauzer.

I'm also skeptical that it makes sense to enable this system-wide; if a container isn't prepared to lock down the delegated hierarchy, it could be exposed to additional risks.

Better would be a per-pod annotation so that individual pods can opt-in.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bcressey Thanks for the review!

For concern 1, I agree — I'll add an is_cgroup_v2 guard helper in schnauzer (following the existing pattern for system-detection helpers like fips_enabled) so this only takes effect on nodes running cgroup v2.

Regarding concerns 2 and 3, I've been looking into this but I'm still getting familiar with the full architecture. From what I can tell, a per-pod approach could work via Kubernetes RuntimeClasses — containerd already supports per-runtime cgroup_writable values and maps them from the pod's runtimeClassName through GetSandboxRuntime(). So we could define a second runtime (e.g. runc-cgroup-writable) in the containerd config template with the flag enabled, keeping it off by default and letting pods opt in. This wouldn't require upstream changes.

I also noticed that an NRI plugin could potentially achieve this by intercepting CreateContainer events and replacing the cgroup mount options based on pod annotations, though Bottlerocket doesn't ship any NRI plugins today so that would be a bigger lift.

I'm not deeply familiar with how Bottlerocket typically handles this kind of per-pod configuration, so I'd really appreciate your guidance on which direction makes sense here — or if there's another approach I'm not seeing.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bcressey Any path we can explore to continue this conversation ?

Thanks!


[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options]
SystemdCgroup = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,25 @@ default_runtime_name = "nvidia"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}

# CDI only nvidia container runtime
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia-cdi]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}

# legacy only nvidia container runtime
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia-legacy]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}

[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia.options]
SystemdCgroup = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ platform = "linux/amd64"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}

[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options]
SystemdCgroup = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,25 @@ default_runtime_name = "nvidia"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}

# CDI only nvidia container runtime
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia-cdi]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}

# legacy only nvidia container runtime
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia-legacy]
runtime_type = "io.containerd.runc.v2"
base_runtime_spec = "/etc/containerd/cri-base.json"
{{#if settings.container-runtime.cgroup-writable}}
cgroup_writable = {{settings.container-runtime.cgroup-writable}}
{{/if}}

[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia.options]
SystemdCgroup = true
Expand Down