Skip to content

Commit 2153529

Browse files
authored
fix(audio): implement audio file source swapping for stream playback and seeking (#475)
Re: discussions/150
1 parent 6b58b53 commit 2153529

10 files changed

Lines changed: 331 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,10 @@ Per-PR attribution and contributor credits are published automatically on the co
77
## [Unreleased]
88
### Added
99
- `POST /dev/unload` release model from VRAM without stopping container; lazy reload on next request. For freeing a shared GPU while idle. Reclaim scale with load (~0.7 GB; ~1.6 GB via long-form test on 4060Ti). (#474)
10-
1110
### Fixed
1211
- Web UI long-playback bugfix around the 10-minute mark; in-browser audio buffer is now bounded ahead of `currentTime` with trailing eviction behind it, so long generations stop overflowing the SourceBuffer.
1312
- Web UI stays responsive on extended sessions; waveform animation is transition-gated and `PlayerState` short-circuits no-op updates, so controls don't drift into lag after 10+ minutes of playback.
14-
15-
### Notes
16-
- Scrubbing may not be fully not supported in current state on MP3 streamed playback. WAV etc, plays back fine on completion.
13+
- Web UI MP3 seek/scrub works after stream completes; pausing or playback end auto-swaps to the full server file, allowing timeline navigation.
1714

1815
## [v0.4.0] - 2026-05-24
1916
### Added

api/src/core/paths.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,16 @@ async def get_content_type(path: str) -> str:
330330
".gif": "image/gif",
331331
".svg": "image/svg+xml",
332332
".ico": "image/x-icon",
333+
# audio downloads: serve a real media type so the webui can play the file
334+
# directly (the player swaps to this URL once generation finishes, #150).
335+
".mp3": "audio/mpeg",
336+
".wav": "audio/wav",
337+
".opus": "audio/opus",
338+
".flac": "audio/flac",
339+
".aac": "audio/aac",
340+
".m4a": "audio/mp4",
341+
".ogg": "audio/ogg",
342+
".pcm": "audio/pcm",
333343
}.get(ext, "application/octet-stream")
334344

335345

api/tests/test_paths.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ async def test_get_content_type():
6767
("test.css", "text/css"),
6868
("test.png", "image/png"),
6969
("test.unknown", "application/octet-stream"),
70+
("test.mp3", "audio/mpeg"),
71+
("test.wav", "audio/wav"),
72+
("test.opus", "audio/opus"),
73+
("test.flac", "audio/flac"),
74+
("test.aac", "audio/aac"),
75+
("test.ogg", "audio/ogg"),
76+
("test.m4a", "audio/mp4"),
77+
("test.pcm", "audio/pcm"),
7078
]
7179

7280
for filename, expected in test_cases:

web/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ <h1>FastKoko</h1>
136136
</div>
137137
<footer class="page-footer">
138138
<a href="https://www.buymeacoffee.com/remsky" target="_blank" rel="noopener" class="bmc-link" aria-label="Buy me a coffee">
139-
<img src="https://img.buymeacoffee.com/button-api/?text=Buy me a coffee&emoji=☕&slug=remsky&button_colour=6366F1&font_colour=000000&outline_colour=0f172a&coffee_colour=FFDD00" alt="Buy me a coffee">
139+
<img src="https://img.buymeacoffee.com/button-api/?text=Buy me a coffee&emoji=☕&slug=remsky&button_colour=222222&font_colour=ffffff&outline_colour=111111&coffee_colour=FFDD00" alt="Buy me a coffee">
140140
</a>
141141
</footer>
142142
<a id="version-badge" class="version-badge" href="https://github.com/remsky/Kokoro-FastAPI/releases" target="_blank" rel="noopener" hidden></a>

web/src/App.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ export class App {
9292
message = isFirefox
9393
? 'Audio streaming is not currently supported in Firefox. Playback and/or download should stilll be available when generation finishes.'
9494
: 'This browser may not support streaming. Playback and/or download should still be available when generation finishes.';
95+
} else if (this.elements.autoplayToggle?.checked) {
96+
message = 'Auto-play on: pause after generation completes to enable full seek/scrub.';
9597
}
9698

9799
notice.textContent = message;
@@ -105,8 +107,9 @@ export class App {
105107
// Download button
106108
this.elements.downloadBtn.addEventListener('click', () => this.downloadAudio());
107109

108-
// Keep browser/output warning aligned with the selected format
110+
// Keep browser/output warning aligned with the selected format and autoplay state
109111
this.elements.formatSelect.addEventListener('change', () => this.applyBrowserStreamingNotice());
112+
this.elements.autoplayToggle.addEventListener('change', () => this.applyBrowserStreamingNotice());
110113

111114
// Cancel button
112115
this.elements.cancelBtn.addEventListener('click', () => {

web/src/components/PlayerControls.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ export class PlayerControls {
1616
this.setupAudioEvents();
1717
this.setupStateSubscription();
1818
this.timeUpdateInterval = null;
19+
this.updateControls(this.playerState.getState());
1920
}
2021

2122
formatTime(secs) {

web/src/components/WaveVisualizer.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export class WaveVisualizer {
1818
height: 100, // Increased height
1919
autostart: false,
2020
amplitude: 1,
21-
speed: 0.1
21+
speed: 0.03
2222
});
2323

2424
// Handle window resize

web/src/services/AudioService.js

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ export class AudioService {
1818
this.chunkQueue = [];
1919
this.streamFinished = false;
2020
this.feederWakeup = null;
21+
// once an MSE generation finishes, the full file lives on the server. swapping
22+
// the <audio> element over to it restores true duration + full seeking, which
23+
// the bounded streaming buffer can't provide for long generations (#150).
24+
this.usingFileSource = false;
25+
this.swapInProgress = false;
2126
}
2227

2328
supportsMSEMp3() {
@@ -183,6 +188,11 @@ export class AudioService {
183188

184189
this.audio.addEventListener('ended', () => {
185190
this.dispatchEvent('ended');
191+
// reaching the end is a safe break point too: swap so a replay/scrub uses
192+
// the full file rather than the evicted streaming buffer.
193+
if (this.canSwapToFileSource()) {
194+
this.swapToFileSource();
195+
}
186196
});
187197

188198
return new Promise((resolve, reject) => {
@@ -243,6 +253,12 @@ export class AudioService {
243253

244254
setTimeout(() => {
245255
this.dispatchEvent('downloadReady');
256+
// if the user isn't actively listening (autoplay off, or paused),
257+
// switch to the full file now so duration + seeking are correct
258+
// the moment generation finishes. otherwise defer to pause/ended.
259+
if (this.audio && this.audio.paused) {
260+
this.swapToFileSource();
261+
}
246262
}, 800);
247263

248264
return;
@@ -491,6 +507,11 @@ export class AudioService {
491507
if (this.audio) {
492508
this.audio.pause();
493509
this.dispatchEvent('pause');
510+
// pausing a finished generation is a safe break point: swap to the full
511+
// file so the user can scrub the whole track and see the real duration.
512+
if (this.canSwapToFileSource()) {
513+
this.swapToFileSource();
514+
}
494515
}
495516
}
496517

@@ -504,6 +525,93 @@ export class AudioService {
504525
}
505526
}
506527

528+
// true only for a finished MSE stream that hasn't been swapped yet. block mode
529+
// already plays a full-file blob, so it never needs (or gets) a swap.
530+
canSwapToFileSource() {
531+
return (
532+
!this.usingFileSource &&
533+
!this.swapInProgress &&
534+
this.streamFinished &&
535+
this.mediaSource !== null &&
536+
!!this.serverDownloadPath &&
537+
!!this.audio &&
538+
!this.audio.error
539+
);
540+
}
541+
542+
// tear down the bounded MSE buffer and point the same <audio> element at the
543+
// finished server file (FileResponse serves range requests, so duration and
544+
// seeking become correct). reusing the element keeps every listener attached.
545+
// only called at safe moments (idle on completion, on pause, on ended) so active
546+
// playback is never interrupted mid-stream.
547+
async swapToFileSource(targetTime = null, shouldPlay = false) {
548+
if (!this.canSwapToFileSource()) {
549+
return false;
550+
}
551+
this.swapInProgress = true;
552+
553+
const audio = this.audio;
554+
const resumeTime = targetTime != null ? targetTime : (audio.currentTime || 0);
555+
const volume = audio.volume;
556+
const rate = audio.playbackRate;
557+
const previousObjectUrl = this.objectUrl;
558+
const fileUrl = this.serverDownloadPath;
559+
560+
// drop MSE references up front so a late feeder/updateend can't touch them.
561+
this.mediaSource = null;
562+
this.sourceBuffer = null;
563+
this.objectUrl = null;
564+
565+
return await new Promise((resolve) => {
566+
const detach = () => {
567+
audio.removeEventListener('loadedmetadata', onLoaded);
568+
audio.removeEventListener('error', onError);
569+
};
570+
571+
const onLoaded = () => {
572+
detach();
573+
if (audio !== this.audio) { this.swapInProgress = false; resolve(false); return; }
574+
const duration = audio.duration;
575+
if (Number.isFinite(duration) && duration > 0) {
576+
// 1:1 timeline (sequence mode appended the same bytes), so the
577+
// playhead lands where the stream left off.
578+
audio.currentTime = Math.min(Math.max(resumeTime, 0), Math.max(0, duration - 0.05));
579+
}
580+
audio.volume = volume;
581+
audio.playbackRate = rate;
582+
this.usingFileSource = true;
583+
this.swapInProgress = false;
584+
this.dispatchEvent('ready');
585+
if (shouldPlay) {
586+
this.play();
587+
}
588+
resolve(true);
589+
};
590+
591+
const onError = () => {
592+
detach();
593+
if (audio !== this.audio) { this.swapInProgress = false; resolve(false); return; }
594+
this.swapInProgress = false;
595+
// the stream buffer is gone, but the file is still downloadable. surface
596+
// that so the user isn't left with a dead player.
597+
console.warn('Failed to switch to file playback:', audio.error);
598+
this.dispatchEvent('playbackUnavailable');
599+
resolve(false);
600+
};
601+
602+
audio.addEventListener('loadedmetadata', onLoaded, { once: true });
603+
audio.addEventListener('error', onError, { once: true });
604+
605+
audio.src = fileUrl;
606+
audio.load();
607+
608+
// safe to revoke now that the element no longer references the MSE url.
609+
if (previousObjectUrl) {
610+
URL.revokeObjectURL(previousObjectUrl);
611+
}
612+
});
613+
}
614+
507615
setVolume(volume) {
508616
if (this.audio) {
509617
this.audio.volume = Math.max(0, Math.min(1, volume));
@@ -594,6 +702,8 @@ export class AudioService {
594702
this.rejectPendingOperations(new Error('AudioService cancelled'));
595703
this.chunkQueue = [];
596704
this.streamFinished = true;
705+
this.usingFileSource = false;
706+
this.swapInProgress = false;
597707
this.wakeFeeder();
598708
this.revokeObjectUrl();
599709
}
@@ -624,6 +734,8 @@ export class AudioService {
624734
this.rejectPendingOperations(new Error('AudioService cleanup'));
625735
this.chunkQueue = [];
626736
this.streamFinished = true;
737+
this.usingFileSource = false;
738+
this.swapInProgress = false;
627739
this.wakeFeeder();
628740
this.revokeObjectUrl();
629741
}

0 commit comments

Comments
 (0)