From 99aa371cb9c6aaeb60d2a87549f8862963724fc5 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 7 Nov 2023 17:46:33 -0500 Subject: [PATCH] Experiment with Open AI voices --- src/css/_controls.scss | 13 +++ src/plugins/tts/AbstractTTSEngine.js | 66 +++++++++---- src/plugins/tts/FestivalTTSEngine.js | 133 +++++++++++++++++---------- src/plugins/tts/WebTTSEngine.js | 17 +++- src/plugins/tts/plugin.tts.js | 55 +++++++---- 5 files changed, 201 insertions(+), 83 deletions(-) diff --git a/src/css/_controls.scss b/src/css/_controls.scss index 7b16504f1..046c13f28 100644 --- a/src/css/_controls.scss +++ b/src/css/_controls.scss @@ -132,6 +132,19 @@ } } } + + .engine.selected { + font-weight: bold; + text-decoration: none; + pointer-events: none; + } + + .engine:not(:last-child)::after { + content: "|"; + text-decoration: none; + display: inline-block; + padding: 0 0.5ch; + } } .scrubber { diff --git a/src/plugins/tts/AbstractTTSEngine.js b/src/plugins/tts/AbstractTTSEngine.js index 6477db0cb..47999567a 100644 --- a/src/plugins/tts/AbstractTTSEngine.js +++ b/src/plugins/tts/AbstractTTSEngine.js @@ -51,16 +51,17 @@ export default class AbstractTTSEngine { this.events = $({}); /** @type {SpeechSynthesisVoice} */ this.voice = null; + this.preloadNext = true; // Listen for voice changes (fired by subclasses) this.events.on('voiceschanged', this.updateBestVoice); - this.events.trigger('voiceschanged'); + setTimeout(() => this.events.trigger('voiceschanged'), 0); } /** * @abstract * @return {boolean} */ - static isSupported() { throw new Error("Unimplemented abstract class"); } + isSupported() { throw new Error("Unimplemented abstract class"); } /** * @abstract @@ -78,13 +79,14 @@ export default class AbstractTTSEngine { /** * @param {number} leafIndex * @param {number} numLeafs total number of leafs in the current book + * @param {PageChunkIterator} chunkIterator */ - start(leafIndex, numLeafs) { + start(leafIndex, numLeafs, chunkIterator = null) { this.playing = true; this.paused = false; this.opts.onLoadingStart(); - this._chunkIterator = new PageChunkIterator(numLeafs, leafIndex, { + this._chunkIterator = chunkIterator ?? new PageChunkIterator(numLeafs, leafIndex, { server: this.opts.server, bookPath: this.opts.bookPath, pageBufferSize: 5, @@ -140,7 +142,7 @@ export default class AbstractTTSEngine { } /** @param {string} voiceURI */ - setVoice(voiceURI) { + async setVoice(voiceURI) { // if the user actively selects a voice, don't re-choose best voice anymore // MS Edge fires voices changed randomly very often this.events.off('voiceschanged', this.updateBestVoice); @@ -149,7 +151,15 @@ export default class AbstractTTSEngine { if (this.opts.bookLanguage && hasLocalStorage()) { localStorage.setItem(`BRtts-voice-${this.opts.bookLanguage}`, this.voice.voiceURI); } - if (this.activeSound) this.activeSound.setVoice(this.voice); + if (this.activeSound) { + if (this.nextStepPromise) { + this._chunkIterator.decrement(); + this.nextStepPromise = this.loadNextStep(); + } + this.opts.onLoadingStart(); + await this.activeSound.setVoice(this.voice); + this.opts.onLoadingComplete(); + } } /** @param {number} newRate */ @@ -158,29 +168,53 @@ export default class AbstractTTSEngine { if (this.activeSound) this.activeSound.setPlaybackRate(newRate); } - /** @private */ - async step() { + async loadNextStep() { const chunk = await this._chunkIterator.next(); if (chunk == PageChunkIterator.AT_END) { - this.stop(); - this.opts.onDone(); - return; + return { chunk }; } - this.opts.onLoadingStart(); + const sound = this.createSound(chunk); sound.chunk = chunk; sound.rate = this.playbackRate; sound.voice = this.voice; - sound.load(() => this.opts.onLoadingComplete()); - - this.opts.onLoadingComplete(); + await sound.load(() => this.opts.onLoadingComplete()); + return { + chunk, + sound, + }; + } + /** @private */ + async step() { + const loadPromise = this.nextStepPromise ? this.nextStepPromise : this.loadNextStep(); + if (!this.nextStepPromise) this.opts.onLoadingStart(); + else { + const raceResolve = await Promise.race([ + loadPromise, + new Promise(resolve => setTimeout(() => resolve('timeout'), 100)), + ]); + + if (raceResolve === 'timeout') { + this.opts.onLoadingStart(); + } + } + const {chunk, sound} = await loadPromise; + if (chunk == PageChunkIterator.AT_END) { + this.stop(); + this.opts.onDone(); + return; + } + // this.opts.onLoadingComplete(); await this.opts.beforeChunkPlay(chunk); if (!this.playing) return; - const playPromise = await this.playSound(sound) + const playPromise = this.playSound(sound) .then(()=> this.opts.afterChunkPlay(sound.chunk)); + if (this.preloadNext) { + this.nextStepPromise = this.loadNextStep(); + } if (this.paused) this.pause(); await playPromise; diff --git a/src/plugins/tts/FestivalTTSEngine.js b/src/plugins/tts/FestivalTTSEngine.js index cb099a09e..a5ef0aeb6 100644 --- a/src/plugins/tts/FestivalTTSEngine.js +++ b/src/plugins/tts/FestivalTTSEngine.js @@ -1,7 +1,5 @@ import AbstractTTSEngine from './AbstractTTSEngine.js'; -import { sleep } from '../../BookReader/utils.js'; -/* global soundManager */ -import 'soundmanager2'; +import { promisifyEvent } from '../../BookReader/utils.js'; import 'jquery.browser'; /** @typedef {import("./AbstractTTSEngine.js").TTSEngineOptions} TTSEngineOptions */ @@ -13,8 +11,8 @@ import 'jquery.browser'; **/ export default class FestivalTTSEngine extends AbstractTTSEngine { /** @override */ - static isSupported() { - return typeof(soundManager) !== 'undefined' && soundManager.supported(); + isSupported() { + return true; //typeof(soundManager) !== 'undefined' && soundManager.supported(); } /** @param {TTSEngineOptions} options */ @@ -29,32 +27,40 @@ export default class FestivalTTSEngine extends AbstractTTSEngine { /** @override */ getVoices() { return [ - { default: true, lang: "en-US", localService: false, name: "Festival - English (US)", voiceURI: null } + { default: true, lang: "en-US", localService: false, name: "OpenAI alloy", voiceURI: 'OpenAI - alloy', openaiVoice: 'alloy', openaiModel: 'tts-1' }, + { default: true, lang: "en-US", localService: false, name: "OpenAI echo", voiceURI: 'OpenAI - echo', openaiVoice: 'echo', openaiModel: 'tts-1' }, + { default: true, lang: "en-US", localService: false, name: "OpenAI fable", voiceURI: 'OpenAI - fable', openaiVoice: 'fable', openaiModel: 'tts-1' }, + { default: true, lang: "en-US", localService: false, name: "OpenAI onyx", voiceURI: 'OpenAI - onyx', openaiVoice: 'onyx', openaiModel: 'tts-1' }, + { default: true, lang: "en-US", localService: false, name: "OpenAI nova", voiceURI: 'OpenAI - nova', openaiVoice: 'nova', openaiModel: 'tts-1' }, + { default: true, lang: "en-US", localService: false, name: "OpenAI shimmer", voiceURI: 'OpenAI - shimmer', openaiVoice: 'shimmer', openaiModel: 'tts-1' }, + // { default: true, lang: "en-US", localService: false, name: "OpenAI alloy HD", voiceURI: 'OpenAI - alloy HD', openaiVoice: 'alloy', openaiModel: 'tts-1-hd' }, + // { default: true, lang: "en-US", localService: false, name: "OpenAI echo HD", voiceURI: 'OpenAI - echo HD', openaiVoice: 'echo', openaiModel: 'tts-1-hd' }, + // { default: true, lang: "en-US", localService: false, name: "OpenAI fable HD", voiceURI: 'OpenAI - fable HD', openaiVoice: 'fable', openaiModel: 'tts-1-hd' }, + // { default: true, lang: "en-US", localService: false, name: "OpenAI onyx HD", voiceURI: 'OpenAI - onyx HD', openaiVoice: 'onyx', openaiModel: 'tts-1-hd' }, + // { default: true, lang: "en-US", localService: false, name: "OpenAI nova HD", voiceURI: 'OpenAI - nova HD', openaiVoice: 'nova', openaiModel: 'tts-1-hd' }, + // { default: true, lang: "en-US", localService: false, name: "OpenAI shimmer HD", voiceURI: 'OpenAI - shimmer HD', openaiVoice: 'shimmer', openaiModel: 'tts-1-hd' }, ]; } /** @override */ init() { + super.init(); // setup sound manager - soundManager.setup({ - debugMode: false, - // Note, there's a bug in Chrome regarding range requests. - // Flash is used as a workaround. - // See https://bugs.chromium.org/p/chromium/issues/detail?id=505707 - preferFlash: true, - url: '/bookreader/BookReader/soundmanager/swf', - useHTML5Audio: true, - //flash 8 version of swf is buggy when calling play() on a sound that is still loading - flashVersion: 9 - }); + // soundManager.setup({ + // debugMode: false, + // useHTML5Audio: true, + // //flash 8 version of swf is buggy when calling play() on a sound that is still loading + // flashVersion: 9 + // }); } /** * @override * @param {number} leafIndex * @param {number} numLeafs total number of leafs in the current book + * @param {PageChunkIterator} chunkIterator */ - start(leafIndex, numLeafs) { + start(leafIndex, numLeafs, chunkIterator = null) { let promise = null; // Hack for iOS @@ -63,12 +69,12 @@ export default class FestivalTTSEngine extends AbstractTTSEngine { } promise = promise || Promise.resolve(); - promise.then(() => super.start(leafIndex, numLeafs)); + promise.then(() => super.start(leafIndex, numLeafs, chunkIterator)); } /** @override */ createSound(chunk) { - return new FestivalTTSSound(this.getSoundUrl(chunk.text)); + return new FestivalTTSSound(this.getSoundUrl(chunk.text), this.voice); } /** @@ -78,9 +84,13 @@ export default class FestivalTTSEngine extends AbstractTTSEngine { * @return {String} url */ getSoundUrl(dataString) { - return 'https://' + this.opts.server + '/BookReader/BookReaderGetTTS.php?string=' - + encodeURIComponent(dataString) - + '&format=.' + this.audioFormat; + return `https://${this.opts.server}/BookReader/BookReaderGetTTS.php?${ + new URLSearchParams({ + string: dataString, + format: this.audioFormat, + // voice: this.voice.name, + }) + }}`; } /** @@ -92,18 +102,20 @@ export default class FestivalTTSEngine extends AbstractTTSEngine { * @return {PromiseLike} */ async iOSCaptureUserIntentHack() { - const sound = soundManager.createSound({ url: SILENCE_1MS[this.audioFormat] }); - await new Promise(res => sound.play({onfinish: res})); - sound.destruct(); + const sound = new Audio(SILENCE_1MS[this.audioFormat]); + const endedPromise = promisifyEvent(sound, 'ended'); + await sound.play(); + await endedPromise; } } /** @extends AbstractTTSSound */ class FestivalTTSSound { /** @param {string} soundUrl **/ - constructor(soundUrl) { + constructor(soundUrl, voice) { this.soundUrl = soundUrl; - /** @type {SMSound} */ + this.voice = voice; + /** @type {HTMLAudioElement} */ this.sound = null; this.rate = 1; /** @type {function} calling this resolves the "play" promise */ @@ -111,52 +123,79 @@ class FestivalTTSSound { } get loaded() { - return this.sound && this.sound.loaded; + return !!this.sound; + } + + /** @param {SpeechSynthesisVoice} voice */ + async setVoice(voice) { + if (voice == this.voice) return; + this.voice = voice; + if (!this.sound?.paused && !this.sound?.ended) { + this.sound.pause(); + const timeOffset = this.sound.currentTime; + const url = await this.fetchBlobUrl(); + this.sound.src = url; + this.sound.play(); + this.sound.currentTime = Math.max(0, timeOffset - 5); + } } - load(onload) { - this.sound = soundManager.createSound({ - url: this.soundUrl, - // API recommended, but only fires once play started on safari - onload: () => { - if (this.rate != 1) this.sound.setPlaybackRate(this.rate); - onload(); + async preload() { + if (!this.sound) await this.load(); + } + + async fetchBlobUrl() { + const text = new URL(this.soundUrl).searchParams.get('string'); + const resp = await fetch('https://api.openai.com/v1/audio/speech', { + method: 'POST', + headers: { + 'Authorization': 'Bearer ' + (window.OPEN_API_KEY ||= prompt('OPEN_API_KEY')), + 'Content-Type': 'application/json' }, - onresume: async () => { - await sleep(25); - if (this.rate != 1) this.sound.setPlaybackRate(this.rate); - } + body: JSON.stringify({ + model: this.voice.openaiModel, + input: text, + voice: this.voice.openaiVoice, + }), }); - return this.sound.load(); + // const resp = await fetch(this.soundUrl); + return URL.createObjectURL(await resp.blob()); + } + + async load(onload) { + this.sound = new Audio(await this.fetchBlobUrl()); + onload?.(); + return; } async play() { await new Promise(res => { this._finishResolver = res; - this.sound.play({ onfinish: res }); + this.sound.play(); + promisifyEvent(this.sound, 'ended').then(res); }); - this.sound.destruct(); + // this.sound.destruct(); } /** @override */ stop() { - this.sound.stop(); + this.sound.pause(); return Promise.resolve(); } /** @override */ pause() { this.sound.pause(); } /** @override */ - resume() { this.sound.resume(); } + resume() { this.sound.play(); } /** @override */ setPlaybackRate(rate) { this.rate = rate; - this.sound.setPlaybackRate(rate); + this.sound.playbackRate = rate; } /** @override */ finish() { - this.sound.stop(); + this.sound.pause(); this._finishResolver(); } } diff --git a/src/plugins/tts/WebTTSEngine.js b/src/plugins/tts/WebTTSEngine.js index 73b8c56c9..7c17cc4e9 100644 --- a/src/plugins/tts/WebTTSEngine.js +++ b/src/plugins/tts/WebTTSEngine.js @@ -12,7 +12,7 @@ import AbstractTTSEngine from './AbstractTTSEngine.js'; * TTS using Web Speech APIs **/ export default class WebTTSEngine extends AbstractTTSEngine { - static isSupported() { + isSupported() { return typeof(window.speechSynthesis) !== 'undefined' && !/samsungbrowser/i.test(navigator.userAgent); } @@ -27,7 +27,18 @@ export default class WebTTSEngine extends AbstractTTSEngine { } /** @override */ - start(leafIndex, numLeafs) { + init() { + super.init(); + + // Stop the audio before the user navigates away + window.addEventListener('beforeunload', () => { + // Chrome just keeps going :P + if (this.isPlaying) this.stop(); + }); + } + + /** @override */ + start(leafIndex, numLeafs, chunkIterator = null) { // Need to run in this function to capture user intent to start playing audio if ('mediaSession' in navigator) { const audio = new Audio(SILENCE_6S_MP3); @@ -67,7 +78,7 @@ export default class WebTTSEngine extends AbstractTTSEngine { }); } - return super.start(leafIndex, numLeafs); + return super.start(leafIndex, numLeafs, chunkIterator); } /** @override */ diff --git a/src/plugins/tts/plugin.tts.js b/src/plugins/tts/plugin.tts.js index 76b41190c..7f81f8945 100644 --- a/src/plugins/tts/plugin.tts.js +++ b/src/plugins/tts/plugin.tts.js @@ -4,6 +4,7 @@ */ import FestivalTTSEngine from './FestivalTTSEngine.js'; import WebTTSEngine from './WebTTSEngine.js'; +// import CompositeTTSEngine from './CompositeTTSEngine.js'; import { toISO6391, approximateWordCount } from './utils.js'; import { en as tooltips } from './tooltip_dict.js'; import { renderBoxesInPageContainerLayer } from '../../BookReader/PageContainer.js'; @@ -26,27 +27,32 @@ BookReader.prototype.setup = (function (super_) { /** @type { {[pageIndex: number]: Array<{ l: number, r: number, t: number, b: number }>} } */ this._ttsBoxesByIndex = {}; - let TTSEngine = WebTTSEngine.isSupported() ? WebTTSEngine : - FestivalTTSEngine.isSupported() ? FestivalTTSEngine : - null; + const engineOptions = { + server: options.server, + bookPath: options.bookPath, + bookLanguage: toISO6391(options.bookLanguage), + onLoadingStart: this.showProgressPopup.bind(this, 'Loading audio...'), + onLoadingComplete: this.removeProgressPopup.bind(this), + onDone: this.ttsStop.bind(this), + beforeChunkPlay: this.ttsBeforeChunkPlay.bind(this), + afterChunkPlay: this.ttsSendChunkFinishedAnalyticsEvent.bind(this), + }; + + /** @type {AbstractTTSEngine[]} */ + this.ttsEnginesAll = [ + new FestivalTTSEngine(engineOptions), + new WebTTSEngine(engineOptions), + ]; + let ttsEngine = this.ttsEnginesAll.find(engine => engine.isSupported()); - if (/_forceTTSEngine=(festival|web)/.test(location.toString())) { - const engineName = location.toString().match(/_forceTTSEngine=(festival|web)/)[1]; - TTSEngine = { festival: FestivalTTSEngine, web: WebTTSEngine }[engineName]; + if (/_forceTTSEngine=(openai|browser)/.test(location.toString())) { + const engineName = location.toString().match(/_forceTTSEngine=(openai|browser)/)[1]; + ttsEngine = { openai: this.ttsEnginesAll[0], browser: this.ttsEnginesAll[1] }[engineName]; } - if (TTSEngine) { + if (ttsEngine) { /** @type {AbstractTTSEngine} */ - this.ttsEngine = new TTSEngine({ - server: options.server, - bookPath: options.bookPath, - bookLanguage: toISO6391(options.bookLanguage), - onLoadingStart: this.showProgressPopup.bind(this, 'Loading audio...'), - onLoadingComplete: this.removeProgressPopup.bind(this), - onDone: this.ttsStop.bind(this), - beforeChunkPlay: this.ttsBeforeChunkPlay.bind(this), - afterChunkPlay: this.ttsSendChunkFinishedAnalyticsEvent.bind(this), - }); + this.ttsEngine = ttsEngine; } } }; @@ -119,6 +125,16 @@ BookReader.prototype.initNavbar = (function (super_) { return function () { const $el = super_.call(this); if (this.options.enableTtsPlugin && this.ttsEngine) { + const engines = this.ttsEnginesAll.map(engine => { + const url = new URL(location.toString()); + url.searchParams.set('_forceTTSEngine', engine.constructor.name == 'FestivalTTSEngine' ? 'openai' : 'browser'); + return { + selected: engine === this.ttsEngine, + name: engine.constructor.name == 'FestivalTTSEngine' ? 'OpenAI' : 'Browser', + url, + }; + }); + this.refs.$BRReadAloudToolbar = $(` `);