From b792a844e0dba47ce2bb4f711afd4c9adcf2c3b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 13 Jan 2025 01:48:29 +0000 Subject: [PATCH] translations: use a more distinctive separator I found that the translator would sometimes replace the pipe character with another symbol (maybe it got confused thinking the character is part of the text?). Added spaces around the pipe to make it more clear that it's definitely the separator. --- beetsplug/lyrics.py | 7 +++++-- test/plugins/test_lyrics.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index a9ee2666ac..1cc00d83fd 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -733,6 +733,7 @@ def scrape(cls, html: str) -> str | None: class Translator(RequestHandler): TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + SEPARATOR = " | " remove_translations = partial(re.compile(r" / [^\n]+").sub, "") _log: beets.logging.Logger @@ -762,14 +763,16 @@ def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]: map the translations back to the original texts. """ unique_texts = list(dict.fromkeys(texts)) + text = self.SEPARATOR.join(unique_texts) data: list[TranslatorAPI.Response] = self.post_json( self.TRANSLATE_URL, headers={"Ocp-Apim-Subscription-Key": self.api_key}, - json=[{"text": "|".join(unique_texts)}], + json=[{"text": text}], params={"api-version": "3.0", "to": self.to_language}, ) - translations = data[0]["translations"][0]["text"].split("|") + translated_text = data[0]["translations"][0]["text"] + translations = translated_text.split(self.SEPARATOR) trans_by_text = dict(zip(unique_texts, translations)) return list(zip(texts, (trans_by_text.get(t, "") for t in texts))) diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 328c7deab5..18b3e16172 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -531,23 +531,23 @@ def callback(request, _): if b"Refrain" in request.body: translations = ( "" - "|[Refrain : Doja Cat]" - "|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 - "|Mon corps ne me laissait pas le cacher (Cachez-le)" - "|Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 - "|Chevauchant à travers le tonnerre, la foudre" + " | [Refrain : Doja Cat]" + " | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 + " | Mon corps ne me laissait pas le cacher (Cachez-le)" + " | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 + " | Chevauchant à travers le tonnerre, la foudre" ) elif b"00:00.00" in request.body: translations = ( "" - "|[00:00.00] Quelques paroles synchronisées" - "|[00:01.00] Quelques paroles plus synchronisées" + " | [00:00.00] Quelques paroles synchronisées" + " | [00:01.00] Quelques paroles plus synchronisées" ) else: translations = ( "" - "|Quelques paroles synchronisées" - "|Quelques paroles plus synchronisées" + " | Quelques paroles synchronisées" + " | Quelques paroles plus synchronisées" ) return [