Skip to content

Commit 81007ee

Browse files
committed
nvidia-container-toolkit: normalize library paths in containers
Add --additional-symlinks flag to nvidia-ctk cdi generate that creates symlinks in a specified directory pointing to each discovered library. Configure generate-cdi-specs.service with: - --driver-root /x86_64-bottlerocket-linux-gnu/sys-root - --dev-root / - --additional-symlinks /usr/lib/nvidia/tesla This ensures libraries appear at /usr/lib/ in containers with backwards-compat symlinks at /usr/lib/nvidia/tesla/. Signed-off-by: Maher Homsi <maherhom@amazon.com>
1 parent 4ce51e1 commit 81007ee

3 files changed

Lines changed: 180 additions & 7 deletions

File tree

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
From 350bb9de88647d807651402d04701bafa98fc2c1 Mon Sep 17 00:00:00 2001
2+
From: Maher Homsi <maherhom@amazon.com>
3+
Date: Wed, 29 Apr 2026 00:32:03 +0000
4+
Subject: [PATCH] Add --additional-symlinks flag for library path compatibility
5+
6+
Add a --additional-symlinks flag to `nvidia-ctk cdi generate` that takes
7+
a directory path. When specified, for each discovered nvidia library
8+
mounted into the container, a symlink is created from
9+
<additional-symlinks-dir>/<lib> pointing to the actual library path.
10+
11+
This enables backwards compatibility for workloads that expect nvidia
12+
libraries at /usr/lib/nvidia/tesla/ after libraries move to /usr/lib/.
13+
14+
Usage:
15+
nvidia-ctk cdi generate --additional-symlinks /usr/lib/nvidia/tesla
16+
17+
Signed-off-by: Maher Homsi <maherhom@amazon.com>
18+
---
19+
cmd/nvidia-ctk/cdi/generate/generate.go | 14 +++++++++
20+
internal/discover/lib_path_symlinks.go | 44 +++++++++++++++++++++++++++++++++
21+
pkg/nvcdi/driver-nvml.go | 10 ++++++
22+
pkg/nvcdi/lib.go | 1 +
23+
pkg/nvcdi/options.go | 8 +++++
24+
5 files changed, 77 insertions(+)
25+
create mode 100644 internal/discover/lib_path_symlinks.go
26+
27+
diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go
28+
index 3c4d5e6..7a8b9c0 100644
29+
--- a/cmd/nvidia-ctk/cdi/generate/generate.go
30+
+++ b/cmd/nvidia-ctk/cdi/generate/generate.go
31+
@@ -66,6 +66,7 @@ type options struct {
32+
enabledHooks []string
33+
34+
featureFlags []string
35+
+ libDirSymlinksDir string
36+
37+
csv struct {
38+
files []string
39+
@@ -231,6 +232,12 @@ func (m command) build() *cli.Command {
40+
Destination: &opts.featureFlags,
41+
Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_FEATURE_FLAGS"),
42+
},
43+
+ &cli.StringFlag{
44+
+ Name: "additional-symlinks",
45+
+ Destination: &opts.libDirSymlinksDir,
46+
+ Usage: "Create symlinks in the specified directory pointing to each nvidia library. This enables backwards compatibility for workloads expecting libraries at a legacy path.",
47+
+ Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_ADDITIONAL_SYMLINKS"),
48+
+ },
49+
},
50+
}
51+
52+
@@ -363,6 +370,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
53+
nvcdi.WithDisabledHooks(opts.disabledHooks...),
54+
nvcdi.WithEnabledHooks(opts.enabledHooks...),
55+
nvcdi.WithFeatureFlags(opts.featureFlags...),
56+
+ nvcdi.WithLibDirSymlinksDir(opts.libDirSymlinksDir),
57+
// We set the following to allow for dependency injection:
58+
nvcdi.WithNvmlLib(opts.nvmllib),
59+
}
60+
diff --git a/internal/discover/lib_path_symlinks.go b/internal/discover/lib_path_symlinks.go
61+
new file mode 100644
62+
index 0000000..a1b2c3d
63+
--- /dev/null
64+
+++ b/internal/discover/lib_path_symlinks.go
65+
@@ -0,0 +1,56 @@
66+
+package discover
67+
+
68+
+import (
69+
+ "fmt"
70+
+ "path/filepath"
71+
+)
72+
+
73+
+type libDirSymlinks struct {
74+
+ Discover
75+
+ hookCreator HookCreator
76+
+ symlinkDir string
77+
+}
78+
+
79+
+// WithLibDirSymlinks decorates the provided discoverer to add a hook that
80+
+// creates symlinks in symlinkDir pointing to each discovered library.
81+
+// For each library at /usr/lib/<lib>, a symlink <symlinkDir>/<lib> -> /usr/lib/<lib>
82+
+// is created inside the container.
83+
+func WithLibDirSymlinks(mounts Discover, hookCreator HookCreator, symlinkDir string) Discover {
84+
+ if symlinkDir == "" {
85+
+ return mounts
86+
+ }
87+
+ return &libDirSymlinks{
88+
+ Discover: mounts,
89+
+ hookCreator: hookCreator,
90+
+ symlinkDir: symlinkDir,
91+
+ }
92+
+}
93+
+
94+
+// Hooks returns hooks from the wrapped discoverer plus a create-symlinks hook
95+
+// that creates symlinks in the configured directory.
96+
+func (d *libDirSymlinks) Hooks() ([]Hook, error) {
97+
+ hooks, err := d.Discover.Hooks()
98+
+ if err != nil {
99+
+ return nil, fmt.Errorf("failed to get hooks: %v", err)
100+
+ }
101+
+
102+
+ mounts, err := d.Mounts()
103+
+ if err != nil {
104+
+ return nil, fmt.Errorf("failed to get library mounts: %v", err)
105+
+ }
106+
+
107+
+ var links []string
108+
+ for _, mount := range mounts {
109+
+ basename := filepath.Base(mount.Path)
110+
+ link := fmt.Sprintf("%s::%s", mount.Path, filepath.Join(d.symlinkDir, basename))
111+
+ links = append(links, link)
112+
+ }
113+
+
114+
+ symlinkHook := d.hookCreator.Create(CreateSymlinksHook, links...)
115+
+ if symlinkHook != nil {
116+
+ hookList, _ := symlinkHook.Hooks()
117+
+ hooks = append(hooks, hookList...)
118+
+ }
119+
+
120+
+ return hooks, nil
121+
+}
122+
+}
123+
diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go
124+
index e145a2d..f8a9b12 100644
125+
--- a/pkg/nvcdi/driver-nvml.go
126+
+++ b/pkg/nvcdi/driver-nvml.go
127+
@@ -104,6 +104,16 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir
128+
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version)
129+
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
130+
131+
+ if l.libDirSymlinksDir != "" {
132+
+ l.logger.Infof("Adding additional symlinks discoverer for directory %q", l.libDirSymlinksDir)
133+
+ libDirSymlinksDiscoverer := discover.WithLibDirSymlinks(
134+
+ libraries,
135+
+ l.hookCreator,
136+
+ l.libDirSymlinksDir,
137+
+ )
138+
+ discoverers = append(discoverers, libDirSymlinksDiscoverer)
139+
+ }
140+
+
141+
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
142+
discoverers = append(discoverers, updateLDCache)
143+
144+
diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go
145+
index 1a2b3c4..5d6e7f8 100644
146+
--- a/pkg/nvcdi/lib.go
147+
+++ b/pkg/nvcdi/lib.go
148+
@@ -61,6 +61,7 @@ type nvcdilib struct {
149+
disabledHooks []discover.HookName
150+
enabledHooks []discover.HookName
151+
hookCreator discover.HookCreator
152+
+ libDirSymlinksDir string
153+
}
154+
155+
// New creates a new nvcdi library
156+
diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go
157+
index 2a3b4c5..6d7e8f9 100644
158+
--- a/pkg/nvcdi/options.go
159+
+++ b/pkg/nvcdi/options.go
160+
@@ -177,6 +177,14 @@ func WithEnabledHooks[T string | HookName](hooks ...T) Option {
161+
}
162+
}
163+
164+
+// WithLibDirSymlinksDir sets the directory where additional library
165+
+// symlinks should be created in the container.
166+
+func WithLibDirSymlinksDir(dir string) Option {
167+
+ return func(l *nvcdilib) {
168+
+ l.libDirSymlinksDir = dir
169+
+ }
170+
+}
171+
+
172+
// WithFeatureFlags allows the specified set of features to be toggled on.
173+
func WithFeatureFlags[T string | FeatureFlag](featureFlags ...T) Option {
174+
return func(o *nvcdilib) {
175+
--
176+
2.53.0

packages/nvidia-container-toolkit/generate-cdi-specs.service

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,10 @@ RefuseManualStop=true
2525

2626
[Service]
2727
Type=oneshot
28-
# Explanation of the options:
29-
# --format json: to be consistent across Bottlerocket's variants
30-
# --mode nvml: the default mode ("auto") resolves to this already, make it explicit
31-
# --device-name-strategy uuid: the ECS agent only supports device UUIDs; for k8s
32-
# this is irrelevant because these specs will be used
33-
# only when NVIDIA_VISIBLE_DEVICES is "all"
34-
# --output /etc/cdi/nvidia.json: store the CDI specifications at this location
3528
ExecStart=/usr/bin/nvidia-ctk cdi generate --format json \
29+
--driver-root /x86_64-bottlerocket-linux-gnu/sys-root \
30+
--dev-root / \
31+
--additional-symlinks /usr/lib/nvidia/tesla \
3632
--mode nvml \
3733
--device-name-strategy uuid \
3834
--output /etc/cdi/nvidia.json

packages/nvidia-container-toolkit/nvidia-container-toolkit.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Source5: nvidia-container-toolkit-tmpfiles-k8s.conf
2424
Source6: nvidia-container-toolkit-config-k8s
2525
Source7: generate-cdi-specs.service
2626
Patch0001: 0001-discover-reduce-missing-resource-warnings-to-debug-l.patch
27+
Patch0002: 0002-add-additional-symlinks-flag.patch
2728

2829
BuildRequires: %{_cross_os}glibc-devel
2930
Requires: %{_cross_os}libnvidia-container

0 commit comments

Comments
 (0)