Skip to content

Commit 4c53d11

Browse files
jpnurmiclaude
andcommitted
test(native): repro /dev/shm exhaustion on alpine/musl Docker CI
Add a Linux-only integration test that runs the native-backend crash example 40 times in a row, well past the ~27-iteration threshold at which Docker's default 64 MB /dev/shm fills with leaked /dev/shm/s-<id> entries. With the leak present, the next ftruncate succeeds (lazy) but the first page-fault from memset takes a SIGBUS and the app dies. Also instrument sentry__crash_ipc_init_app on Linux/Android with raw write(2) diagnostics around the new-shm ftruncate and the subsequent memset, so a CI run can pinpoint the SIGBUS to the page-fault rather than to ftruncate or any later step. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent bfa2c8f commit 4c53d11

2 files changed

Lines changed: 52 additions & 0 deletions

File tree

src/backends/native/sentry_crash_ipc.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,21 @@
1616
# include <sys/stat.h>
1717
# include <unistd.h>
1818

19+
// Diagnostic: raw write(2) to stderr so the message survives a SIGBUS
20+
// that may kill the process mid-page-fault (e.g. /dev/shm exhausted).
21+
// Used to localize the hang/abort to the post-ftruncate memset.
22+
static void
23+
shm_diag(const char *tag, const char *shm_name)
24+
{
25+
char buf[160];
26+
int n = snprintf(buf, sizeof(buf), "[sentry-shm-diag pid=%d] %s shm=%s\n",
27+
(int)getpid(), tag, shm_name ? shm_name : "?");
28+
if (n > 0) {
29+
ssize_t w = write(STDERR_FILENO, buf, (size_t)n);
30+
(void)w;
31+
}
32+
}
33+
1934
sentry_crash_ipc_t *
2035
sentry__crash_ipc_init_app(sem_t *init_sem)
2136
{
@@ -89,6 +104,7 @@ sentry__crash_ipc_init_app(sem_t *init_sem)
89104
}
90105
} else {
91106
// New shared memory, set size
107+
shm_diag("pre-ftruncate", ipc->shm_name);
92108
if (ftruncate(ipc->shm_fd, SENTRY_CRASH_SHM_SIZE) < 0) {
93109
SENTRY_WARNF("failed to resize shared memory: %s", strerror(errno));
94110
close(ipc->shm_fd);
@@ -99,6 +115,7 @@ sentry__crash_ipc_init_app(sem_t *init_sem)
99115
sentry_free(ipc);
100116
return NULL;
101117
}
118+
shm_diag("post-ftruncate", ipc->shm_name);
102119
}
103120

104121
// Map shared memory
@@ -152,7 +169,9 @@ sentry__crash_ipc_init_app(sem_t *init_sem)
152169

153170
// Initialize shared memory only if newly created
154171
if (!shm_exists) {
172+
shm_diag("pre-memset", ipc->shm_name);
155173
memset(ipc->shmem, 0, SENTRY_CRASH_SHM_SIZE);
174+
shm_diag("post-memset", ipc->shm_name);
156175
ipc->shmem->magic = SENTRY_CRASH_MAGIC;
157176
ipc->shmem->version = SENTRY_CRASH_VERSION;
158177
sentry__atomic_store(&ipc->shmem->state, SENTRY_CRASH_STATE_READY);

tests/test_integration_native.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,39 @@ def test_native_multiple_crashes(cmake, httpserver):
247247
assert waiting.result
248248

249249

250+
@pytest.mark.skipif(
251+
sys.platform != "linux", reason="POSIX /dev/shm exhaustion only repros on Linux"
252+
)
253+
def test_native_crash_shm_exhaustion(cmake, httpserver):
254+
"""Reproduce /dev/shm exhaustion on alpine/musl Docker CI.
255+
256+
Each native-backend crash leaves /dev/shm/s-<id> behind (the app side
257+
only unlinks on clean shutdown, and the daemon never unlinks). Under
258+
Docker's default 64 MB /dev/shm, ~27 leaked entries are enough to fill
259+
the tmpfs. The next ftruncate still succeeds (lazy), but the first
260+
page-fault from memset gets SIGBUS and the app dies silently.
261+
262+
Loops past that threshold so CI surfaces the failure.
263+
"""
264+
tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": "native"})
265+
266+
iterations = 40
267+
for _ in range(iterations):
268+
httpserver.expect_oneshot_request("/api/123456/envelope/").respond_with_data(
269+
"OK"
270+
)
271+
272+
with httpserver.wait(timeout=120) as waiting:
273+
for _ in range(iterations):
274+
run_crash(
275+
tmp_path,
276+
"sentry_example",
277+
["log", "stdout", "crash"],
278+
env=dict(os.environ, SENTRY_DSN=make_dsn(httpserver)),
279+
)
280+
assert waiting.result
281+
282+
250283
def test_native_context_capture(cmake, httpserver):
251284
"""Test that scope and context are captured"""
252285
tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": "native"})

0 commit comments

Comments
 (0)