11#include " lemon/backends/backend_utils.h"
2+ #include " lemon/backends/install_staging.h"
23#include " lemon/runtime_config.h"
34#include " lemon/system_info.h"
45#include " lemon/backends/llamacpp_server.h"
2425#include < iostream>
2526#include < lemon/utils/aixlog.hpp>
2627#include < algorithm>
28+ #include < system_error>
2729#include < vector>
2830#include < nlohmann/json.hpp>
2931
@@ -257,27 +259,9 @@ namespace lemon::backends {
257259 }
258260
259261 std::string BackendUtils::find_executable_in_install_dir (const std::string& install_dir, const std::string& binary_name) {
260- if (fs::exists (install_dir)) {
261- // On Windows, executables have a .exe extension that may not be in binary_name
262- #ifdef _WIN32
263- const std::string binary_name_exe = binary_name + " .exe" ;
264- #endif
265- // This could be optimized with a cache but saving a few milliseconds every few minutes/hours is not going to do much
266- for (const fs::directory_entry& dir_entry : fs::recursive_directory_iterator (install_dir)) {
267- if (dir_entry.is_regular_file ()) {
268- const auto & fname = dir_entry.path ().filename ();
269- if (fname == binary_name
270- #ifdef _WIN32
271- || fname == binary_name_exe
272- #endif
273- ) {
274- return dir_entry.path ().string ();
275- }
276- }
277- }
278- }
279-
280- return " " ;
262+ // Delegates to the header-only helper so the executable-lookup logic has
263+ // a single source of truth shared with commit_staged_install().
264+ return find_executable_in_dir (install_dir, binary_name);
281265 }
282266
283267 std::string BackendUtils::get_backend_binary_path (const BackendSpec& spec, const std::string& backend) {
@@ -403,7 +387,11 @@ namespace lemon::backends {
403387 LOG (INFO , spec.log_name ()) << " Upgrading " << spec.binary << " from " << installed_version
404388 << " to " << expected_version << std::endl;
405389 needs_install = true ;
406- fs::remove_all (install_dir);
390+ // NOTE: do NOT remove install_dir here. The existing working
391+ // binary is kept in place until the replacement has been
392+ // downloaded, extracted, and verified; the atomic swap below
393+ // (commit_staged_install) handles removal so an interrupted
394+ // download cannot leave the backend with no usable binary.
407395 }
408396 } else if (!needs_install && !expected_version.empty ()) {
409397 // If the executable exists but version.txt is missing, SystemInfo
@@ -413,19 +401,45 @@ namespace lemon::backends {
413401 LOG (INFO , spec.log_name ()) << " Installed executable is missing version.txt; reinstalling "
414402 << spec.binary << " version " << expected_version << std::endl;
415403 needs_install = true ;
416- fs::remove_all (install_dir);
404+ // See note above: removal is deferred to the verified atomic swap.
417405 }
418406 }
419407
420408 if (needs_install) {
421409 LOG (INFO , spec.log_name ()) << " Installing " << spec.binary << " (version: "
422410 << expected_version << " )" << std::endl;
423411
424- // Create install directory
425- fs::create_directories (install_dir);
426-
427- std::string url = " https://github.com/" + repo + " /releases/download/" +
428- expected_version + " /" + filename;
412+ // Stage the new install in a sibling directory so the currently
413+ // installed (working) binary is left untouched until the download is
414+ // complete, extracted, and verified. Only then is staging atomically
415+ // swapped into place (see commit_staged_install below), so a slow or
416+ // interrupted download never destroys a working binary.
417+ const std::string staging_dir = install_dir + " .staging" ;
418+ std::error_code staging_ec;
419+ fs::remove_all (staging_dir, staging_ec); // clear any leftover from a prior aborted install
420+ fs::remove_all (install_dir + " .old" , staging_ec); // and any orphaned swap backup
421+ // If a stale staging tree could not be cleared (e.g. a locked file on
422+ // Windows), fail rather than extracting into it — a leftover binary
423+ // could otherwise satisfy verification and get promoted as a stale or
424+ // mixed install over the working one.
425+ if (fs::exists (staging_dir)) {
426+ throw std::runtime_error (" Could not clear stale staging directory: " + staging_dir);
427+ }
428+ fs::create_directories (staging_dir);
429+
430+ // Remove the staging tree on any early exit (exception) before the
431+ // swap, so a failed download/extraction never leaves a half-built
432+ // tree behind for the next attempt to trip over.
433+ struct StagingGuard {
434+ const std::string& dir;
435+ bool active = true ;
436+ ~StagingGuard () {
437+ if (active) {
438+ std::error_code ec;
439+ fs::remove_all (dir, ec);
440+ }
441+ }
442+ } staging_guard{staging_dir};
429443
430444 // Download archive to cache directory.
431445 // Preserve the actual filename (sanitised for use in a path) so that
@@ -442,6 +456,19 @@ namespace lemon::backends {
442456
443457 LOG (DEBUG , spec.log_name ()) << " Downloading to: " << zip_path << std::endl;
444458
459+ // Remove the downloaded archive on ANY exit from here on — success
460+ // OR exception, including a throw from commit_staged_install() below
461+ // (a swap/rename failure) — so the cache archive is never leaked.
462+ // Mirrors StagingGuard above; replaces the per-throw fs::remove(zip_path)
463+ // calls that did not cover the commit_staged_install throw path.
464+ struct ZipGuard {
465+ const std::string& path;
466+ ~ZipGuard () {
467+ std::error_code ec;
468+ fs::remove (path, ec);
469+ }
470+ } zip_guard{zip_path};
471+
445472 const std::string base_download_url = " https://github.com/" + repo + " /releases/download/" +
446473 expected_version + " /" ;
447474
@@ -585,7 +612,6 @@ namespace lemon::backends {
585612 if (!part_result.success ) {
586613 combined.close ();
587614 fs::remove (part_path);
588- fs::remove (zip_path);
589615 throw std::runtime_error (" Failed to download " + part_filename + " from: " + part_url +
590616 " - " + part_result.error_message );
591617 }
@@ -610,29 +636,45 @@ namespace lemon::backends {
610636 LOG (DEBUG , spec.log_name ()) << " Downloaded archive file size: "
611637 << (file_size / 1024 / 1024 ) << " MB" << std::endl;
612638
613- // Extract
614- if (! extract_archive (zip_path, install_dir, spec. log_name ())) {
615- fs::remove (zip_path);
616- fs::remove_all (install_dir);
639+ // Extract into the staging directory (NOT install_dir) so a failed
640+ // extraction cannot destroy the currently-installed binary. The
641+ // staging guard removes the partial tree when we throw.
642+ if (! extract_archive (zip_path, staging_dir, spec. log_name ())) {
617643 throw std::runtime_error (" Failed to extract archive: " + zip_path);
618644 }
619645
620- // Verify extraction
621- exe_path = find_executable_in_install_dir (install_dir, spec.binary );
646+ // Save version info into the staging tree so it travels with the
647+ // atomic swap below. Fail cleanly on a write error rather than
648+ // promoting a backend with no version.txt (which would make the next
649+ // status check force an unnecessary reinstall).
650+ {
651+ const std::string staged_version_file = (fs::path (staging_dir) / " version.txt" ).string ();
652+ std::ofstream vf (staged_version_file);
653+ vf << expected_version;
654+ vf.flush ();
655+ if (!vf.good ()) {
656+ throw std::runtime_error (" Failed to write version file: " + staged_version_file);
657+ }
658+ }
659+
660+ // Verify the staged tree contains the executable, then atomically
661+ // swap it into place. commit_staged_install keeps a recoverable .old
662+ // backup across the swap: it removes the staging tree and leaves
663+ // install_dir untouched on verification failure (returns ""), and on
664+ // a swap (rename) failure it rolls the backup back and throws — so a
665+ // botched download/extraction/swap never destroys the working binary.
666+ exe_path = commit_staged_install (staging_dir, install_dir, spec.binary );
622667 if (exe_path.empty ()) {
623668 LOG (ERROR , spec.log_name ()) << " Extraction completed but executable not found" << std::endl;
624- fs::remove (zip_path);
625- fs::remove_all (install_dir);
626669 throw std::runtime_error (" Extraction failed: executable not found" );
627670 }
671+ // Swap succeeded: staging was consumed by the rename, so disarm the
672+ // guard (its cleanup would now be a no-op, but disarm to make intent
673+ // explicit and skip a pointless filesystem call).
674+ staging_guard.active = false ;
628675
629676 LOG (DEBUG , spec.log_name ()) << " Executable verified at: " << exe_path << std::endl;
630677
631- // Save version info
632- std::ofstream vf (version_file);
633- vf << expected_version;
634- vf.close ();
635-
636678 #ifndef _WIN32
637679 // Make all binaries in bin/ executable (tar may lose permissions)
638680 {
@@ -649,8 +691,7 @@ namespace lemon::backends {
649691 chmod (exe_path.c_str (), 0755 );
650692 #endif
651693
652- // Delete ZIP file
653- fs::remove (zip_path);
694+ // (The downloaded archive is removed by zip_guard on scope exit.)
654695
655696 // Send completion event now that installation is fully done.
656697 // For split archives the combined on-disk size is only known after
0 commit comments