remsky
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/release.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.github/workflows/test_build.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/test_build.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/test_client_image.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/test_client_image.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 7 additions & 0 deletions b/‎.gitignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 32 additions & 0 deletions b/‎README.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎api/src/core/config.py‎
Lines changed: 4 additions & 1 deletion b/‎api/src/core/config.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎api/src/core/paths.py‎
Lines changed: 10 additions & 0 deletions b/‎api/src/core/paths.py‎
Lines changed: 10 additions & 0 deletions
@@ -13,7 +13,7 @@ jobs:
       fail-fast: false
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
 
     # Match Dockerfile dependencies
     - name: Install Dependencies
 
@@ -36,7 +36,7 @@ jobs:
           fi
 
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           ref: ${{ steps.resolve-ref.outputs.source_ref }}
           fetch-depth: 0
@@ -100,7 +100,7 @@ jobs:
     runs-on: ${{ matrix.runs_on }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           ref: ${{ needs.prepare-release.outputs.source_ref }}
 
@@ -115,14 +115,14 @@ jobs:
           df -h
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # Use v3
+        uses: docker/setup-buildx-action@v4
         with:
           driver-opts: |
             image=moby/buildkit:v0.21.1
             network=host
 
       - name: Log in to GitHub Container Registry
-        uses: docker/login-action@v3 # Use v3
+        uses: docker/login-action@v3
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
@@ -276,7 +276,7 @@ jobs:
       contents: write
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           fetch-depth: 0
 
 
@@ -79,7 +79,7 @@ jobs:
     runs-on: ${{ matrix.runs_on }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           ref: ${{ inputs.branch_name }}
 
@@ -88,7 +88,7 @@ jobs:
           sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
         with:
           driver-opts: image=moby/buildkit:v0.21.1
 
 
@@ -44,7 +44,7 @@ jobs:
       OWNER: ${{ vars.OWNER || 'remsky' }}
       IMAGE_NAME: ${{ vars.TEST_CLIENT_IMAGE_NAME || 'tts-api-test-client' }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - name: Resolve image refs
         id: refs
@@ -61,7 +61,7 @@ jobs:
           echo "latest=${BASE}:latest" >> "$GITHUB_OUTPUT"
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
 
       - name: Log in to GHCR
         if: github.event_name != 'workflow_dispatch' || inputs.push
 
@@ -23,6 +23,7 @@ build/
 # Environment
 # .env
 .venv/
+node_modules/
 env/
 venv/
 ENV/
@@ -76,6 +77,10 @@ examples/assorted_checks/test_transcription/output_long_form/*.transcript.txt
 examples/assorted_checks/test_transcription/output_long_form/*.wav
 examples/assorted_checks/test_transcription/output_long_form/*.synth_meta.json
 examples/assorted_checks/test_transcription/output_multilingual/*.wav
+examples/assorted_checks/benchmarks/output_data/model_unload_stats.txt
+examples/assorted_checks/benchmarks/output_data/model_unload_results.json
+examples/assorted_checks/benchmarks/output_plots/model_unload_longform.png
+examples/assorted_checks/benchmarks/output_plots/model_unload_short.png
 uv.lock
 !docker/test-client/uv.lock
 
@@ -87,3 +92,5 @@ pyproject.toml.bkp
 # Local scratch notes, scan outputs, anything not meant to ship
 .local/
 examples/assorted_checks/test_silence/out/*
+playwright-report/
+test-results/
@@ -4,6 +4,14 @@ Notable changes to this project will be documented in this file.
 
 Per-PR attribution and contributor credits are published automatically on the corresponding GitHub release page; this file is the curated, human-readable summary.
 
+## [v0.5.0] - 2026-06-06
+### Added
+- `POST /dev/unload` release model from VRAM without stopping container; lazy reload on next request. For freeing a shared GPU while idle. Reclaim scale with load (~0.7 GB; ~1.6 GB via long-form test on 4060Ti). (#474)
+### Fixed
+- Web UI long-playback bugfix around the 10-minute mark; in-browser audio buffer is now bounded ahead of `currentTime` with trailing eviction behind it, so long generations stop overflowing the SourceBuffer.
+- Web UI stays responsive on extended sessions; waveform animation is transition-gated and `PlayerState` short-circuits no-op updates, so controls don't drift into lag after 10+ minutes of playback.
+- Web UI MP3 seek/scrub works after stream completes; pausing or playback end auto-swaps to the full server file, allowing timeline navigation.
+
 ## [v0.4.0] - 2026-05-24
 ### Added
 - GPU image variants for Blackwell / RTX 50-series (`:latest-cu128`, `:vX.Y.Z-cu128`, amd64 only) with PyTorch cu128 wheels (#443). Default `:latest` and new `:latest-cu126` alias stay on cu126 for Maxwell/Pascal compatibility.
 
@@ -79,6 +79,8 @@ Configuration via environment variables, see `core/config.py`. The `:latest` and
         # The Docker GPU image is CUDA-only and won't run on Apple Silicon. With Docker, use `docker/cpu`.
         # For native MPS (Apple GPU) acceleration, run directly via UV with `./start-gpu_mac.sh`.
 
+        cd ../..  # back to repo root for the paths below
+
         # Models will auto-download, but if needed you can manually download:
         python docker/scripts/download_model.py --output api/src/models/v1_0
 
@@ -386,6 +388,22 @@ Key Performance Metrics:
 - Realtime Speed: Ranges between 35x-100x (generation time to output audio length)
 - Average Processing Rate: 137.67 tokens/second (cl100k_base)
 
+### Model Unload / VRAM Reclaim
+
+`POST /dev/unload` frees the model from VRAM and reloads lazily on the next request. Reclaim scales with load (the activation pool, not just weights) but plateaus: chunks cap at 450 tokens. Long-form = ~30 paragraphs. Same setup as above.
+
+<p align="center">
+  <img src="assets/gpu_model_unload_short.png" width="45%" alt="Short workload" style="border: 2px solid #333; padding: 10px; margin-right: 1%;">
+  <img src="assets/gpu_model_unload_longform.png" width="45%" alt="Long-form workload" style="border: 2px solid #333; padding: 10px;">
+</p>
+
+| Workload | Loaded | Floor | Reclaimed | Reload |
+| --- | --- | --- | --- | --- |
+| Short (6s audio) | 3.11 GB | 2.37 GB | 758 MiB | +4.9s |
+| Long-form (7.5m) | 3.98 GB | 2.37 GB | 1,656 MiB | +5.1s |
+
+Floor is host + CUDA context. Reproduce with `uv run --extra benchmarks assorted_checks/benchmarks/benchmark_model_unload.py` from `examples/`.
+
 ### Transcription roundtrip (WER/CER)
 
 End-to-end roundtrip: synthesize with Kokoro, transcribe the result back with [`faster-whisper`](https://github.com/SYSTRAN/faster-whisper), compare to the source text. Scripts and data live under `examples/assorted_checks/test_transcription/`.
@@ -548,6 +566,19 @@ except Exception as e:
 See `examples/phoneme_examples/generate_phonemes.py` for a sample script.
 </details>
 
+<details>
+<summary>Inline Control Tokens</summary>
+
+Two tokens can be embedded in the `input` text and are parsed server-side (API, WebUI, or any client):
+
+- **Pause**: `[pause:1.5s]` inserts that much silence. Must be exactly this form (colon, trailing `s`, case-insensitive). `[pause=1.5]`, `[PAUSE 1.0]`, and SSML `<break/>` are not recognized and get read aloud.
+- **Pronunciation**: `[Worcester](/wˈʊstər/)` speaks the IPA between the slashes instead of the word. English only; use `/dev/phonemize` to find the IPA.
+
+```text
+The city of [Worcester](/wˈʊstər/) is easy. [pause:1s] See?
+```
+</details>
+
 <details>
 <summary>Debug Endpoints</summary>
 
@@ -556,6 +587,7 @@ Monitor system state and resource usage with these endpoints:
 - `/debug/threads` - Get thread information and stack traces
 - `/debug/storage` - Monitor temp file and output directory usage
 - `/debug/system` - Get system information (CPU, memory, GPU)
+- `POST /dev/unload` - Release model from VRAM; reloads lazily on next request
 
 Useful for debugging resource exhaustion or performance issues.
 </details>
 
@@ -1 +1 @@
-0.4.0
+0.5.0
@@ -1,4 +1,7 @@
-from importlib.metadata import PackageNotFoundError, version as _pkg_version
+from importlib.metadata import (
+    PackageNotFoundError,
+    version as _pkg_version,
+)
 from pathlib import Path
 
 import torch
 
@@ -330,6 +330,16 @@ async def get_content_type(path: str) -> str:
         ".gif": "image/gif",
         ".svg": "image/svg+xml",
         ".ico": "image/x-icon",
+        # audio downloads: serve a real media type so the webui can play the file
+        # directly (the player swaps to this URL once generation finishes, #150).
+        ".mp3": "audio/mpeg",
+        ".wav": "audio/wav",
+        ".opus": "audio/opus",
+        ".flac": "audio/flac",
+        ".aac": "audio/aac",
+        ".m4a": "audio/mp4",
+        ".ogg": "audio/ogg",
+        ".pcm": "audio/pcm",
     }.get(ext, "application/octet-stream")