Skip to content

Commit

Permalink
Experiment with Open AI voices
Browse files Browse the repository at this point in the history
  • Loading branch information
cdrini committed Nov 7, 2023
1 parent defeff6 commit 99aa371
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 83 deletions.
13 changes: 13 additions & 0 deletions src/css/_controls.scss
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@
}
}
}

.engine.selected {
font-weight: bold;
text-decoration: none;
pointer-events: none;
}

.engine:not(:last-child)::after {
content: "|";
text-decoration: none;
display: inline-block;
padding: 0 0.5ch;
}
}

.scrubber {
Expand Down
66 changes: 50 additions & 16 deletions src/plugins/tts/AbstractTTSEngine.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,17 @@ export default class AbstractTTSEngine {
this.events = $({});
/** @type {SpeechSynthesisVoice} */
this.voice = null;
this.preloadNext = true;
// Listen for voice changes (fired by subclasses)
this.events.on('voiceschanged', this.updateBestVoice);
this.events.trigger('voiceschanged');
setTimeout(() => this.events.trigger('voiceschanged'), 0);
}

/**
* @abstract
* @return {boolean}
*/
static isSupported() { throw new Error("Unimplemented abstract class"); }
isSupported() { throw new Error("Unimplemented abstract class"); }

/**
* @abstract
Expand All @@ -78,13 +79,14 @@ export default class AbstractTTSEngine {
/**
* @param {number} leafIndex
* @param {number} numLeafs total number of leafs in the current book
* @param {PageChunkIterator} chunkIterator
*/
start(leafIndex, numLeafs) {
start(leafIndex, numLeafs, chunkIterator = null) {
this.playing = true;
this.paused = false;
this.opts.onLoadingStart();

this._chunkIterator = new PageChunkIterator(numLeafs, leafIndex, {
this._chunkIterator = chunkIterator ?? new PageChunkIterator(numLeafs, leafIndex, {
server: this.opts.server,
bookPath: this.opts.bookPath,
pageBufferSize: 5,
Expand Down Expand Up @@ -140,7 +142,7 @@ export default class AbstractTTSEngine {
}

/** @param {string} voiceURI */
setVoice(voiceURI) {
async setVoice(voiceURI) {
// if the user actively selects a voice, don't re-choose best voice anymore
// MS Edge fires voices changed randomly very often
this.events.off('voiceschanged', this.updateBestVoice);
Expand All @@ -149,7 +151,15 @@ export default class AbstractTTSEngine {
if (this.opts.bookLanguage && hasLocalStorage()) {
localStorage.setItem(`BRtts-voice-${this.opts.bookLanguage}`, this.voice.voiceURI);
}
if (this.activeSound) this.activeSound.setVoice(this.voice);
if (this.activeSound) {
if (this.nextStepPromise) {
this._chunkIterator.decrement();
this.nextStepPromise = this.loadNextStep();
}
this.opts.onLoadingStart();
await this.activeSound.setVoice(this.voice);
this.opts.onLoadingComplete();
}
}

/** @param {number} newRate */
Expand All @@ -158,29 +168,53 @@ export default class AbstractTTSEngine {
if (this.activeSound) this.activeSound.setPlaybackRate(newRate);
}

/** @private */
async step() {
async loadNextStep() {
const chunk = await this._chunkIterator.next();
if (chunk == PageChunkIterator.AT_END) {
this.stop();
this.opts.onDone();
return;
return { chunk };
}
this.opts.onLoadingStart();

const sound = this.createSound(chunk);
sound.chunk = chunk;
sound.rate = this.playbackRate;
sound.voice = this.voice;
sound.load(() => this.opts.onLoadingComplete());

this.opts.onLoadingComplete();
await sound.load(() => this.opts.onLoadingComplete());
return {
chunk,
sound,
};
}

/** @private */
async step() {
const loadPromise = this.nextStepPromise ? this.nextStepPromise : this.loadNextStep();
if (!this.nextStepPromise) this.opts.onLoadingStart();
else {
const raceResolve = await Promise.race([
loadPromise,
new Promise(resolve => setTimeout(() => resolve('timeout'), 100)),
]);

if (raceResolve === 'timeout') {
this.opts.onLoadingStart();
}
}
const {chunk, sound} = await loadPromise;
if (chunk == PageChunkIterator.AT_END) {
this.stop();
this.opts.onDone();
return;
}
// this.opts.onLoadingComplete();
await this.opts.beforeChunkPlay(chunk);

if (!this.playing) return;

const playPromise = await this.playSound(sound)
const playPromise = this.playSound(sound)
.then(()=> this.opts.afterChunkPlay(sound.chunk));
if (this.preloadNext) {
this.nextStepPromise = this.loadNextStep();
}

if (this.paused) this.pause();
await playPromise;
Expand Down
133 changes: 86 additions & 47 deletions src/plugins/tts/FestivalTTSEngine.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import AbstractTTSEngine from './AbstractTTSEngine.js';
import { sleep } from '../../BookReader/utils.js';
/* global soundManager */
import 'soundmanager2';
import { promisifyEvent } from '../../BookReader/utils.js';
import 'jquery.browser';

/** @typedef {import("./AbstractTTSEngine.js").TTSEngineOptions} TTSEngineOptions */
Expand All @@ -13,8 +11,8 @@ import 'jquery.browser';
**/
export default class FestivalTTSEngine extends AbstractTTSEngine {
/** @override */
static isSupported() {
return typeof(soundManager) !== 'undefined' && soundManager.supported();
isSupported() {
return true; //typeof(soundManager) !== 'undefined' && soundManager.supported();
}

/** @param {TTSEngineOptions} options */
Expand All @@ -29,32 +27,40 @@ export default class FestivalTTSEngine extends AbstractTTSEngine {
/** @override */
getVoices() {
return [
{ default: true, lang: "en-US", localService: false, name: "Festival - English (US)", voiceURI: null }
{ default: true, lang: "en-US", localService: false, name: "OpenAI alloy", voiceURI: 'OpenAI - alloy', openaiVoice: 'alloy', openaiModel: 'tts-1' },
{ default: true, lang: "en-US", localService: false, name: "OpenAI echo", voiceURI: 'OpenAI - echo', openaiVoice: 'echo', openaiModel: 'tts-1' },
{ default: true, lang: "en-US", localService: false, name: "OpenAI fable", voiceURI: 'OpenAI - fable', openaiVoice: 'fable', openaiModel: 'tts-1' },
{ default: true, lang: "en-US", localService: false, name: "OpenAI onyx", voiceURI: 'OpenAI - onyx', openaiVoice: 'onyx', openaiModel: 'tts-1' },
{ default: true, lang: "en-US", localService: false, name: "OpenAI nova", voiceURI: 'OpenAI - nova', openaiVoice: 'nova', openaiModel: 'tts-1' },
{ default: true, lang: "en-US", localService: false, name: "OpenAI shimmer", voiceURI: 'OpenAI - shimmer', openaiVoice: 'shimmer', openaiModel: 'tts-1' },
// { default: true, lang: "en-US", localService: false, name: "OpenAI alloy HD", voiceURI: 'OpenAI - alloy HD', openaiVoice: 'alloy', openaiModel: 'tts-1-hd' },
// { default: true, lang: "en-US", localService: false, name: "OpenAI echo HD", voiceURI: 'OpenAI - echo HD', openaiVoice: 'echo', openaiModel: 'tts-1-hd' },
// { default: true, lang: "en-US", localService: false, name: "OpenAI fable HD", voiceURI: 'OpenAI - fable HD', openaiVoice: 'fable', openaiModel: 'tts-1-hd' },
// { default: true, lang: "en-US", localService: false, name: "OpenAI onyx HD", voiceURI: 'OpenAI - onyx HD', openaiVoice: 'onyx', openaiModel: 'tts-1-hd' },
// { default: true, lang: "en-US", localService: false, name: "OpenAI nova HD", voiceURI: 'OpenAI - nova HD', openaiVoice: 'nova', openaiModel: 'tts-1-hd' },
// { default: true, lang: "en-US", localService: false, name: "OpenAI shimmer HD", voiceURI: 'OpenAI - shimmer HD', openaiVoice: 'shimmer', openaiModel: 'tts-1-hd' },
];
}

/** @override */
init() {
super.init();
// setup sound manager
soundManager.setup({
debugMode: false,
// Note, there's a bug in Chrome regarding range requests.
// Flash is used as a workaround.
// See https://bugs.chromium.org/p/chromium/issues/detail?id=505707
preferFlash: true,
url: '/bookreader/BookReader/soundmanager/swf',
useHTML5Audio: true,
//flash 8 version of swf is buggy when calling play() on a sound that is still loading
flashVersion: 9
});
// soundManager.setup({
// debugMode: false,
// useHTML5Audio: true,
// //flash 8 version of swf is buggy when calling play() on a sound that is still loading
// flashVersion: 9
// });
}

/**
* @override
* @param {number} leafIndex
* @param {number} numLeafs total number of leafs in the current book
* @param {PageChunkIterator} chunkIterator
*/
start(leafIndex, numLeafs) {
start(leafIndex, numLeafs, chunkIterator = null) {
let promise = null;

// Hack for iOS
Expand All @@ -63,12 +69,12 @@ export default class FestivalTTSEngine extends AbstractTTSEngine {
}

promise = promise || Promise.resolve();
promise.then(() => super.start(leafIndex, numLeafs));
promise.then(() => super.start(leafIndex, numLeafs, chunkIterator));
}

/** @override */
createSound(chunk) {
return new FestivalTTSSound(this.getSoundUrl(chunk.text));
return new FestivalTTSSound(this.getSoundUrl(chunk.text), this.voice);
}

/**
Expand All @@ -78,9 +84,13 @@ export default class FestivalTTSEngine extends AbstractTTSEngine {
* @return {String} url
*/
getSoundUrl(dataString) {
return 'https://' + this.opts.server + '/BookReader/BookReaderGetTTS.php?string='
+ encodeURIComponent(dataString)
+ '&format=.' + this.audioFormat;
return `https://${this.opts.server}/BookReader/BookReaderGetTTS.php?${
new URLSearchParams({
string: dataString,
format: this.audioFormat,
// voice: this.voice.name,
})
}}`;
}

/**
Expand All @@ -92,71 +102,100 @@ export default class FestivalTTSEngine extends AbstractTTSEngine {
* @return {PromiseLike}
*/
async iOSCaptureUserIntentHack() {
const sound = soundManager.createSound({ url: SILENCE_1MS[this.audioFormat] });
await new Promise(res => sound.play({onfinish: res}));
sound.destruct();
const sound = new Audio(SILENCE_1MS[this.audioFormat]);
const endedPromise = promisifyEvent(sound, 'ended');
await sound.play();
await endedPromise;
}
}

/** @extends AbstractTTSSound */
class FestivalTTSSound {
/** @param {string} soundUrl **/
constructor(soundUrl) {
constructor(soundUrl, voice) {
this.soundUrl = soundUrl;
/** @type {SMSound} */
this.voice = voice;
/** @type {HTMLAudioElement} */
this.sound = null;
this.rate = 1;
/** @type {function} calling this resolves the "play" promise */
this._finishResolver = null;
}

get loaded() {
return this.sound && this.sound.loaded;
return !!this.sound;
}

/** @param {SpeechSynthesisVoice} voice */
async setVoice(voice) {
if (voice == this.voice) return;
this.voice = voice;
if (!this.sound?.paused && !this.sound?.ended) {
this.sound.pause();
const timeOffset = this.sound.currentTime;
const url = await this.fetchBlobUrl();
this.sound.src = url;
this.sound.play();
this.sound.currentTime = Math.max(0, timeOffset - 5);
}
}

load(onload) {
this.sound = soundManager.createSound({
url: this.soundUrl,
// API recommended, but only fires once play started on safari
onload: () => {
if (this.rate != 1) this.sound.setPlaybackRate(this.rate);
onload();
async preload() {
if (!this.sound) await this.load();
}

async fetchBlobUrl() {
const text = new URL(this.soundUrl).searchParams.get('string');
const resp = await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
'Authorization': 'Bearer ' + (window.OPEN_API_KEY ||= prompt('OPEN_API_KEY')),
'Content-Type': 'application/json'
},
onresume: async () => {
await sleep(25);
if (this.rate != 1) this.sound.setPlaybackRate(this.rate);
}
body: JSON.stringify({
model: this.voice.openaiModel,
input: text,
voice: this.voice.openaiVoice,
}),
});
return this.sound.load();
// const resp = await fetch(this.soundUrl);
return URL.createObjectURL(await resp.blob());
}

async load(onload) {
this.sound = new Audio(await this.fetchBlobUrl());
onload?.();
return;
}

async play() {
await new Promise(res => {
this._finishResolver = res;
this.sound.play({ onfinish: res });
this.sound.play();
promisifyEvent(this.sound, 'ended').then(res);
});
this.sound.destruct();
// this.sound.destruct();
}

/** @override */
stop() {
this.sound.stop();
this.sound.pause();
return Promise.resolve();
}

/** @override */
pause() { this.sound.pause(); }
/** @override */
resume() { this.sound.resume(); }
resume() { this.sound.play(); }
/** @override */
setPlaybackRate(rate) {
this.rate = rate;
this.sound.setPlaybackRate(rate);
this.sound.playbackRate = rate;
}

/** @override */
finish() {
this.sound.stop();
this.sound.pause();
this._finishResolver();
}
}
Expand Down
Loading

0 comments on commit 99aa371

Please sign in to comment.