Skip to content

Commit 64e28c4

Browse files
committed
nvidia-k8s-device-plugin: configure driver root for container path normalization
Set containerDriverRoot to the Bottlerocket sysroot path so the device plugin discovers libraries correctly and generates CDI specs with normalized /usr/lib/ container paths. Add --additional-symlinks support patches (1002, 1003) to the device plugin vendored nvidia-container-toolkit code. Signed-off-by: Maher Homsi <maherhom@amazon.com>
1 parent 81007ee commit 64e28c4

4 files changed

Lines changed: 193 additions & 1 deletion

File tree

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2+
From: Maher Homsi <maherhom@amazon.com>
3+
Date: Wed, 16 Apr 2025 00:00:00 +0000
4+
Subject: [PATCH] Enable library path compatibility symlinks
5+
6+
Enable the CreateLibSymlinksHook to generate backwards-compatibility
7+
symlinks in containers for library paths that have moved.
8+
9+
Signed-off-by: Maher Homsi <maherhom@amazon.com>
10+
---
11+
internal/cdi/cdi.go | 1 +
12+
1 file changed, 1 insertion(+)
13+
14+
diff --git a/internal/cdi/cdi.go b/internal/cdi/cdi.go
15+
index bee83c6e4..a1b2c3d4e 100644
16+
--- a/internal/cdi/cdi.go
17+
+++ b/internal/cdi/cdi.go
18+
@@ -127,6 +127,7 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
19+
nvcdi.WithNvmlLib(c.nvmllib),
20+
nvcdi.WithVendor(c.vendor),
21+
nvcdi.WithDisabledHook(nvcdi.HookEnableCudaCompat),
22+
+ nvcdi.WithEnabledHooks(nvcdi.CreateLibSymlinksHook),
23+
}
24+
25+
c.cdilibs = make(map[string]nvcdi.SpecGenerator)
26+
--
27+
2.52.0
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
From 71fc7cb6a3c15ca99e7a9de859c2ace90a197f16 Mon Sep 17 00:00:00 2001
2+
From: Maher Homsi <maherhom@amazon.com>
3+
Date: Tue, 28 Apr 2026 22:35:11 +0000
4+
Subject: [PATCH] vendor: add CreateLibSymlinksHook to vendored
5+
nvidia-container-toolkit
6+
7+
Update the vendored nvidia-container-toolkit to include the
8+
CreateLibSymlinksHook feature. This is needed because patch 1002 enables
9+
this hook in the device plugin, but the vendored toolkit code does not
10+
include it.
11+
12+
Signed-off-by: Maher Homsi <maherhom@amazon.com>
13+
---
14+
.../internal/discover/hooks.go | 15 +++-
15+
.../internal/discover/lib_path_symlinks.go | 57 +++++++++++++++++++
16+
.../nvidia-container-toolkit/pkg/nvcdi/api.go | 2 +
17+
.../pkg/nvcdi/driver-nvml.go | 3 +
18+
4 files changed, 74 insertions(+), 3 deletions(-)
19+
create mode 100644 vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go
20+
21+
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go
22+
index 66bef75..a1f608a 100644
23+
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go
24+
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go
25+
@@ -36,6 +36,8 @@ const (
26+
ChmodHook = HookName("chmod")
27+
// A CreateSymlinksHook is used to create symlinks in the container.
28+
CreateSymlinksHook = HookName("create-symlinks")
29+
+ // A CreateLibSymlinksHook is used to create library path compatibility symlinks.
30+
+ CreateLibSymlinksHook = HookName("create-lib-symlinks")
31+
// DisableDeviceNodeModificationHook refers to the hook used to ensure that
32+
// device nodes are not created by libnvidia-ml.so or nvidia-smi in a
33+
// container.
34+
@@ -57,6 +59,8 @@ var defaultDisabledHooks = []HookName{
35+
// ChmodHook is disabled by default as it was a workaround for older
36+
// versions of crun that has since been fixed.
37+
ChmodHook,
38+
+ // CreateLibSymlinksHook is disabled by default; opt-in for backwards compat.
39+
+ CreateLibSymlinksHook,
40+
}
41+
42+
var _ Discover = (*Hook)(nil)
43+
@@ -204,19 +208,24 @@ func (c cdiHookCreator) isDisabled(name HookName, args ...string) bool {
44+
45+
// still reject hooks that require args if none were provided
46+
switch name {
47+
- case CreateSymlinksHook, ChmodHook:
48+
+ case CreateSymlinksHook, CreateLibSymlinksHook, ChmodHook:
49+
return len(args) == 0
50+
}
51+
return false
52+
}
53+
54+
func (c cdiHookCreator) requiredArgs(name HookName) []string {
55+
- return append(c.fixedArgs, string(name))
56+
+ cliName := name
57+
+ switch name {
58+
+ case CreateLibSymlinksHook:
59+
+ cliName = CreateSymlinksHook
60+
+ }
61+
+ return append(c.fixedArgs, string(cliName))
62+
}
63+
64+
func (c cdiHookCreator) transformArgs(name HookName, args ...string) []string {
65+
switch name {
66+
- case CreateSymlinksHook:
67+
+ case CreateSymlinksHook, CreateLibSymlinksHook:
68+
var transformedArgs []string
69+
for _, arg := range args {
70+
transformedArgs = append(transformedArgs, "--link", arg)
71+
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go
72+
new file mode 100644
73+
index 0000000..7cbf052
74+
--- /dev/null
75+
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go
76+
@@ -0,0 +1,57 @@
77+
+package discover
78+
+
79+
+import (
80+
+ "fmt"
81+
+ "path/filepath"
82+
+)
83+
+
84+
+const defaultLegacyLibPath = "/usr/lib/nvidia/tesla"
85+
+
86+
+type libPathSymlinks struct {
87+
+ Discover
88+
+ hookCreator HookCreator
89+
+ legacyLibPath string
90+
+}
91+
+
92+
+// WithLibPathSymlinks decorates the provided discoverer to add a hook that
93+
+// creates backwards-compatibility symlinks from legacyLibPath/<lib> to the
94+
+// actual library paths.
95+
+func WithLibPathSymlinks(mounts Discover, hookCreator HookCreator, legacyLibPath string) Discover {
96+
+ if legacyLibPath == "" {
97+
+ legacyLibPath = defaultLegacyLibPath
98+
+ }
99+
+ return &libPathSymlinks{
100+
+ Discover: mounts,
101+
+ hookCreator: hookCreator,
102+
+ legacyLibPath: legacyLibPath,
103+
+ }
104+
+}
105+
+
106+
+// Hooks returns hooks from the wrapped discoverer plus a hook to create
107+
+// library path compatibility symlinks.
108+
+func (d *libPathSymlinks) Hooks() ([]Hook, error) {
109+
+ hooks, err := d.Discover.Hooks()
110+
+ if err != nil {
111+
+ return nil, fmt.Errorf("failed to get hooks: %v", err)
112+
+ }
113+
+
114+
+ mounts, err := d.Mounts()
115+
+ if err != nil {
116+
+ return nil, fmt.Errorf("failed to get library mounts: %v", err)
117+
+ }
118+
+
119+
+ var links []string
120+
+ for _, mount := range mounts {
121+
+ basename := filepath.Base(mount.Path)
122+
+ link := fmt.Sprintf("%s::%s", mount.Path, filepath.Join(d.legacyLibPath, basename))
123+
+ links = append(links, link)
124+
+ }
125+
+
126+
+ symlinkHook := d.hookCreator.Create(CreateLibSymlinksHook, links...)
127+
+ if symlinkHook != nil {
128+
+ hookList, _ := symlinkHook.Hooks()
129+
+ hooks = append(hooks, hookList...)
130+
+ }
131+
+
132+
+ return hooks, nil
133+
+}
134+
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go
135+
index fce32bc..d827c06 100644
136+
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go
137+
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go
138+
@@ -63,6 +63,8 @@ const (
139+
EnableCudaCompatHook = discover.EnableCudaCompatHook
140+
// An UpdateLDCacheHook is used to update the ldcache in the container.
141+
UpdateLDCacheHook = discover.UpdateLDCacheHook
142+
+ // A CreateLibSymlinksHook is used to create library path compatibility symlinks.
143+
+ CreateLibSymlinksHook = discover.CreateLibSymlinksHook
144+
145+
// Deprecated: Use CreateSymlinksHook instead.
146+
HookCreateSymlinks = CreateSymlinksHook
147+
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go
148+
index e145a2d..32ed643 100644
149+
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go
150+
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go
151+
@@ -104,6 +104,9 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir
152+
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version)
153+
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
154+
155+
+ libPathSymlinksDiscoverer := discover.WithLibPathSymlinks(libraries, l.hookCreator, "")
156+
+ discoverers = append(discoverers, libPathSymlinksDiscoverer)
157+
+
158+
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
159+
discoverers = append(discoverers, updateLDCache)
160+
161+
--
162+
2.53.0
163+

packages/nvidia-k8s-device-plugin/nvidia-k8s-device-plugin-conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ flags:
3333
deviceListStrategy: "volume-mounts"
3434
{{/if}}
3535
deviceIDStrategy: {{default "index" settings.kubelet-device-plugins.nvidia.device-id-strategy}}
36-
containerDriverRoot: "/"
36+
containerDriverRoot: "/x86_64-bottlerocket-linux-gnu/sys-root"
3737
{{#if settings.kubelet-device-plugins.nvidia.device-sharing-strategy}}
3838
{{#if (eq settings.kubelet-device-plugins.nvidia.device-sharing-strategy "time-slicing")}}
3939
sharing:

packages/nvidia-k8s-device-plugin/nvidia-k8s-device-plugin.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ Source6: nvidia-mps-control-daemon-exec-start-conf
2222

2323
Patch0001: 0001-Update-MPS-roots-for-immutable-host-OS.patch
2424
Patch1001: 1001-Ensure-that-generated-CDI-specs-do-not-contain-enabl.patch
25+
Patch1002: 1002-Enable-library-path-compatibility-symlinks.patch
26+
Patch1003: 1003-vendor-add-CreateLibSymlinksHook.patch
2527

2628
BuildRequires: %{_cross_os}glibc-devel
2729

0 commit comments

Comments
 (0)