From 1904ee5ce33aa2f5814975db8683d0dfcfa1e516 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 24 Oct 2023 08:47:18 -0700 Subject: [PATCH] default to UTF-8 when writing .pot files (#300) * default to UTF-8 when writing .pot files * add back line break Co-authored-by: Hadley Wickham * update test of format(template=TRUE) output * NEWS --------- Co-authored-by: Hadley Wickham --- DESCRIPTION | 2 +- NEWS.md | 1 + R/write_po_file.R | 8 +++++--- tests/testthat/test-po_compile.R | 15 +++++++++++++++ tests/testthat/test-write-po-file.R | 2 +- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c0322401..17c8aeb3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ Suggests: crayon, knitr, rmarkdown, - testthat (>= 3.0.0), + testthat (>= 3.1.5), withr VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index 52f07a08..404ae924 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,7 @@ * [Note] potools gains a logo featuring a [potoo](https://en.wikipedia.org/wiki/Potoo) thanks to the artistic skills of @allisonhorst * [Feature] New function `po_explain_plurals()` to help de-mystify how to supply plurals for different languages. For example, `po_explain_plurals("pl", 3)` explains that "For Polish (Polski), plural index 2 applies when n = 0, 5-21, 25-31, 35-41, ...", [#278](https://github.com/MichaelChirico/potools/issues/278). Thanks @hadley for the suggestion to independently export this functionality which was already used as part of `translate_package()`. * [Bugfix] A test for non-GNU versions of `gettext` has been improved, [#221](https://github.com/MichaelChirico/potools/issues/221). Please report any issues here -- I don't have any good way of testing non-GNU tools. + * [Bugfix] .po files generated by {potools} will default to using `charset=UTF-8`, not `charset=ASCII`, to prevent encoding issues (@michaelchirico, [#299](https://github.com/MichaelChirico/potools/issues/299)). Thanks @maelle for the report! ## New languages/locales supported out of the box: diff --git a/R/write_po_file.R b/R/write_po_file.R index a0025c25..a69a9c35 100644 --- a/R/write_po_file.R +++ b/R/write_po_file.R @@ -18,6 +18,9 @@ write_po_files <- function(message_data, po_dir, params, template = FALSE, use_b if (is_base_package) { params$package <- "R" params$bugs <- "bugs.r-project.org" + # #299: msg* commands work best with charset=UTF-8, + # but the 'base' .pot files leave it unsepecified (CHARSET). + if (template) params$charset <- "CHARSET" } if (template) { @@ -248,7 +251,7 @@ write_po_file <- function( po_data[ , 'msgid_plural' := strsplit(msgid_plural, "|||", fixed = TRUE)] # tools::xgettext2pot() tries to make the entries' whitespace align, which xgettext doesn't do - if (use_base_rules & po_data$message_source[1L] == "R") { + if (use_base_rules && po_data$message_source[1L] == "R") { plural_fmt <- '\n%s%smsgid "%s"\nmsgid_plural "%s"\n%s' msgstr_fmt <- 'msgstr[%d] "%s"' } else { @@ -403,7 +406,7 @@ po_metadata = function(package='', version='', language='', author='', email='', is.null(copyright) || is.character(copyright) || is.list(copyright) ) pm = c(as.list(environment()), list(...)) - pm$charset <- "UTF-8" + if (is.null(pm$charset)) pm$charset <- "UTF-8" if (is.null(pm$pot_timestamp)) pm$pot_timestamp <- Sys.time() if (is.null(pm$po_timestamp)) pm$po_timestamp <- pm$pot_timestamp if (is.null(pm$language_team)) pm$language_team <- pm$language @@ -420,7 +423,6 @@ format.po_metadata = function(x, template = FALSE, use_plurals = FALSE, ...) { x$email = "EMAIL@ADDRESS" x$language = '' x$language_team = "LANGUAGE " - x$charset = 'CHARSET' } if (is.character(x$copyright)) { x$copyright = list(years = format(x$pot_timestamp, "%Y"), holder = x$copyright) diff --git a/tests/testthat/test-po_compile.R b/tests/testthat/test-po_compile.R index 589b144b..559dc15b 100644 --- a/tests/testthat/test-po_compile.R +++ b/tests/testthat/test-po_compile.R @@ -14,3 +14,18 @@ test_that("get_po_metadata() returns 0 rows if no .po fles", { meta <- get_po_metadata(temp) expect_equal(nrow(meta), 0) }) + +test_that("po_compile() can handle UTF-8 msgstr", { + temp <- local_test_package( + `R/foo.R` = "foo <- function() message('Hello!')" + ) + + po_extract(temp) # R/* -> .pot + po_create("es", temp) # .pot -> R-es.po + r_es_po <- file.path(temp, "po", "R-es.po") + l <- readLines(r_es_po) + l[grep('msgstr ""', l)[2L]] <- 'msgstr "\U00A1Hello!"' + cat(l, file = r_es_po, sep = "\n") + + expect_no_error(po_compile(temp, verbose=FALSE)) +}) diff --git a/tests/testthat/test-write-po-file.R b/tests/testthat/test-write-po-file.R index cbf4b26e..0af385a1 100644 --- a/tests/testthat/test-write-po-file.R +++ b/tests/testthat/test-write-po-file.R @@ -12,7 +12,7 @@ test_that("po_metadata constructor & methods work", { format(metadata, template = TRUE), c( '"Language-Team: LANGUAGE \\n"', - '"Content-Type: text/plain; charset=CHARSET\\n"', + '"Content-Type: text/plain; charset=UTF-8\\n"', '# SOME DESCRIPTIVE TITLE.', '# This file is distributed under the same license as the R package.', '# FIRST AUTHOR , YEAR.'