Skip to content

Commit 03e8cfc

Browse files
committed
fix(cli): make spacy download --url actually use the custom URL
The custom-URL feature added in #13848 has been broken since release: any value passed via `--url` is silently dropped (when the URL has no trailing slash) or rejected by the post-construction guard (when it does), so the flag cannot succeed under any input. Two interlocking defects in `download_model`: 1. When `base_url` (= the user's custom URL) lacked a trailing slash, line 183 reassigned it to `about.__download_url__ + "/"`, discarding the user's choice instead of just appending `/`. 2. The line-185 `startswith(about.__download_url__)` guard then rejected any URL that *was* preserved, because a custom mirror by definition does not start with the GitHub URL. This change: * Appends `/` to the actual `base_url` instead of swapping it for the default. * Skips the GitHub-origin guard when `custom_url` is provided. The user has explicitly opted into a custom source via `--url`; the relative- path guard is still in force when no `--url` is passed, preserving the protection from #13848's review thread. Adds a regression test exercising both trailing-slash variants of `--url` plus the default-URL path so the relative-path rejection from `test_download_rejects_relative_urls` keeps working.
1 parent 0069cf9 commit 03e8cfc

2 files changed

Lines changed: 35 additions & 2 deletions

File tree

spacy/cli/download.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,11 @@ def download_model(
180180
base_url = custom_url if custom_url else about.__download_url__
181181
# urljoin requires that the path ends with /, or the last path part will be dropped
182182
if not base_url.endswith("/"):
183-
base_url = about.__download_url__ + "/"
183+
base_url = base_url + "/"
184184
download_url = urljoin(base_url, filename)
185-
if not download_url.startswith(about.__download_url__):
185+
# Only enforce the GitHub-origin guard when the user did not opt into a
186+
# custom URL. When --url is passed the user has explicitly chosen a source.
187+
if custom_url is None and not download_url.startswith(about.__download_url__):
186188
raise ValueError(f"Download from {filename} rejected. Was it a relative path?")
187189
pip_args = list(user_pip_args) if user_pip_args is not None else []
188190
cmd = _get_pip_install_cmd() + pip_args + [download_url]

spacy/tests/test_cli.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,3 +1237,34 @@ def test_download_rejects_relative_urls(monkeypatch):
12371237
download_module.download("en_core_web_sm-3.7.1", direct=True)
12381238
with pytest.raises(SystemExit):
12391239
download_module.download("../en_core_web_sm-3.7.1", direct=True)
1240+
1241+
1242+
def test_download_uses_custom_url(monkeypatch):
1243+
"""`download_model(custom_url=...)` must actually use the user-supplied URL,
1244+
both with and without a trailing slash, instead of falling back to the
1245+
default GitHub URL or being rejected by the origin guard."""
1246+
1247+
captured = {}
1248+
1249+
def fake_run(cmd):
1250+
captured["url"] = next(arg for arg in cmd if arg.startswith("http"))
1251+
1252+
monkeypatch.setattr(download_module, "run_command", fake_run)
1253+
monkeypatch.setattr(
1254+
download_module, "_get_pip_install_cmd", lambda: ["pip", "install"]
1255+
)
1256+
1257+
for base in ("https://my-mirror.example.com/models", "https://my-mirror.example.com/models/"):
1258+
captured.clear()
1259+
download_module.download_model("foo-1.0.tar.gz", custom_url=base)
1260+
assert captured["url"].startswith("https://my-mirror.example.com/models/"), (
1261+
f"custom_url={base!r} produced {captured['url']!r}"
1262+
)
1263+
1264+
# Without --url, the default GitHub URL is still used and the relative-path
1265+
# guard remains in effect.
1266+
captured.clear()
1267+
download_module.download_model("foo-1.0.tar.gz")
1268+
assert captured["url"].startswith(about.__download_url__)
1269+
with pytest.raises(ValueError):
1270+
download_module.download_model("../foo-1.0.tar.gz")

0 commit comments

Comments
 (0)