Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement po_create()/po_update() for creating/updating translations #235

Merged
merged 36 commits into from
Nov 11, 2021
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
3affcc0
Implement tr_add() for adding new translations
MichaelChirico Jul 12, 2021
63dcbc5
Add or update as necessary
hadley Nov 1, 2021
cfe1eff
Only add previous for tr_add()
hadley Nov 1, 2021
b205c34
Fix typo
hadley Nov 1, 2021
6124465
Split tr_add() into po_create() and po_update()
hadley Nov 4, 2021
c644cb7
lang->languages
MichaelChirico Nov 6, 2021
a088663
lang->languages
MichaelChirico Nov 6, 2021
28f3e28
Merge commit 'bd9c91a2abc717fe52b56cf6879f83f233a299a1'
hadley Nov 9, 2021
310f2b0
Move to own file
hadley Nov 9, 2021
10e16fd
Mark .pot file as UTF-8
hadley Nov 9, 2021
42885b1
Split po_create() and po_update() into pieces
hadley Nov 9, 2021
205ee8f
Fix broken tests
hadley Nov 9, 2021
0d3391d
Add test for po_create()
hadley Nov 9, 2021
d8880e0
WS
hadley Nov 9, 2021
efa92b6
Extract out local_test_package() helper
hadley Nov 9, 2021
511fad1
Extract & test po_language_files()
hadley Nov 9, 2021
d7c3882
Add tests for create and update
hadley Nov 9, 2021
0daabcd
Add missing line
hadley Nov 9, 2021
b604281
might as well use fifelse
MichaelChirico Nov 10, 2021
ef5449f
Add links to solaris docs
hadley Nov 10, 2021
7322e55
Revert unintentional change
hadley Nov 10, 2021
78afb31
Move local_test_package() to better home
hadley Nov 10, 2021
7133453
Revert CHARSET -> UTF-8 change
hadley Nov 10, 2021
df99ea4
Merge commit '3eaf727e03b1238d01492b99f58f938587b4b73b'
hadley Nov 10, 2021
f081b01
Merged origin/add-translation into add-translation
hadley Nov 10, 2021
6fd0c30
More docs about updating
hadley Nov 10, 2021
0e4371c
Standardise number of dots
hadley Nov 10, 2021
4b3353f
Improve docs
hadley Nov 10, 2021
ae5c5b7
Tweak messaging
hadley Nov 10, 2021
b4e71bd
Merge branch 'master' into add-translation
hadley Nov 10, 2021
b8e17e9
Revert accidental doc changes
hadley Nov 10, 2021
0f93068
add TODO
MichaelChirico Nov 11, 2021
3ff40d5
another TODO
MichaelChirico Nov 11, 2021
5efd88f
typo
MichaelChirico Nov 11, 2021
e6788bd
clarify fuzzy description
MichaelChirico Nov 11, 2021
a36031a
comment need for standardise_dots & americanize 🇺🇸
MichaelChirico Nov 11, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ export(translate_package)
export(get_message_data)
export(write_po_file, po_metadata)

export(po_extract)
export(po_compile)
export(po_create)
export(po_extract)
export(po_update)

export(check_cracked_messages, check_untranslated_cat, check_untranslated_src)

Expand Down
44 changes: 35 additions & 9 deletions R/msgmerge.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
# split off from tools::update_pkg_po() to only run the msgmerge & checkPoFile steps
run_msgmerge = function(po_file, pot_file) {
if (system(sprintf("msgmerge --update %s %s", po_file, shQuote(pot_file))) != 0L) {

# https://www.gnu.org/software/gettext/manual/html_node/msgmerge-Invocation.html
run_msgmerge <- function(po_file, pot_file, previous = FALSE, verbose = TRUE) {
args <- c(
"--update", shQuote(path.expand(po_file)),
if (previous) "--previous", #show previous match for fuzzy matches
hadley marked this conversation as resolved.
Show resolved Hide resolved
shQuote(path.expand(pot_file))
)

val <- system2("msgmerge", args, stdout = TRUE, stderr = TRUE)
if (!identical(attr(val, "status", exact = TRUE), NULL)) {
# nocov these warnings? i don't know how to trigger them as of this writing.
warningf("Running msgmerge on '%s' failed.", po_file)
warningf("Running msgmerge on './po/%s' failed:\n %s", basename(po_file), paste(val, collapse = "\n"))
} else if (verbose) {
messagef("Running msgmerge on './po/%s' succeeded:\n %s", basename(po_file), paste(val, collapse = "\n"))
}

res <- tools::checkPoFile(po_file, strictPlural = TRUE)
if (nrow(res)) {
warningf("tools::checkPoFile() found some issues in %s", po_file)
hadley marked this conversation as resolved.
Show resolved Hide resolved
print(res)
}
return(invisible())
tools::checkPoFile(po_file, strictPlural = TRUE)
invisible()
}

run_msgfmt = function(po_file, mo_file, verbose) {
Expand Down Expand Up @@ -54,3 +61,22 @@ update_en_quot_mo_files <- function(dir, verbose) {
}
return(invisible())
}

# https://www.gnu.org/software/gettext/manual/html_node/msginit-Invocation.html
run_msginit <- function(po_path, pot_path, locale, width = 80, verbose = TRUE) {
hadley marked this conversation as resolved.
Show resolved Hide resolved
args <- c(
"-i", shQuote(path.expand(pot_path)),
"-o", shQuote(path.expand(po_path)),
"-l", shQuote(locale),
"-w", width,
"--no-translator" # don't consult user-email etc
)
val <- system2("msginit", args, stdout = TRUE, stderr = TRUE)
if (!identical(attr(val, "status", exact = TRUE), NULL)) {
stopf("Running msginit on '%s' failed", pot_path)
} else if (verbose) {
messagef("Running msgint on './po/%s' succeeded:\n %s", basename(po_path), paste(val, collapse = "\n"))
}
return(invisible())
}

6 changes: 4 additions & 2 deletions R/po_compile.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#'
#' @param dir Path to package root directory.
#' @param package Name of package. If not supplied, read from `DESCRIPTION`.
#' @param lazy If `TRUE`, only `.mo` functions that are older than `.po`
#' files be updated
#' @param lazy If `TRUE`, only `.mo` files that are older than their
#' corresponding `.po` file will be updated.
#' @param verbose If `TRUE`, print information as it goes.
po_compile = function(dir = ".", package = NULL, lazy = TRUE, verbose = TRUE) {
po_metadata <- get_po_metadata(dir = dir, package = package)
Expand Down Expand Up @@ -52,11 +52,13 @@ get_po_metadata <- function(dir = ".", package = NULL) {

mo_names <- gsub(lang_regex, sprintf("\\1%s.mo", package), basename(po_paths))
mo_paths <- file.path(dir, "inst", "po", languages, "LC_MESSAGES", mo_names)
pot_paths <- pot_paths(dir, type, package = package)

data.table(
language = languages,
type = type,
po = po_paths,
pot = pot_paths,
mo = mo_paths
)
}
58 changes: 58 additions & 0 deletions R/po_create.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Create a new `.po` file
#'
#' `po_create()` creates a new `po/{languages}.po` containing the messages to be
#' translated. If a translation already exists, it'll be updated with any
#' changes to the `.pot` since it was last touched.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we clarify what happens to existing translations here too? Details may be overkill, but mentioning fuzzying seems appropriate.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll put most of the details in po_update() and then link to there from here.

I think the updating here is mostly incidental, just because if you use msginit without checking, it will reset the existing translations, which I don't think you ever want. But at the same time, the main point of this function is not the updating, so I don't want to draw too much attention to it.

#'
#' @param languages Language identifiers. These are typically two letters (e.g.
#' "en" = English, "fr" = French, "es" = Spanish, "zh" = Chinese), but
#' can include an additional suffix for languages that have regional
#' variations (e.g. "fr_CN" = French Canadian, "zh_CN" = simplified
#' characters as used in mainland China, "zh_TW" = traditional characters
#' as used in Taiwan.)
#' @inheritParams po_extract
#' @seealso [po_update()] to update all `.po` files with changes from the
#' `.pot`.
po_create <- function(languages, dir = ".", verbose = !is_testing()) {
package <- get_desc_data(dir, "Package")
po_files <- po_language_files(languages, dir)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might be nice to warn() here if e.g. R-pkg.pot is present but pkg.pot is not & the package has a src/ directory.

OTOH, if the translation is handled entirely by potools, this should only happen if src/ doesn't have any messages to translate, right? So maybe it will be a warning highly prone to false positives...

skipping for now, but file a follow-up issue if you think the warning is worthwhile.


for (ii in seq_len(nrow(po_files))) {
row <- po_files[ii]
if (file.exists(row$po_path)) {
if (verbose) messagef("Updating '%s' %s translation", row$language, row$type)
run_msgmerge(row$po_path, row$pot_path, previous = TRUE, verbose = verbose)
} else {
if (verbose) messagef("Creating '%s' %s translation", row$language, row$type)
run_msginit(row$po_path, row$pot_path, locale = row$language, verbose = verbose)
}
}

invisible(po_files)
}

po_language_files <- function(languages, dir = ".") {
po_files <- data.table::CJ(type = pot_types(dir), language = languages)
po_files[, "po_path" := file.path(dir, "po", paste0(po_prefix(po_files$type), po_files$language, ".po"))]
po_files[, "pot_path" := pot_paths(dir, po_files$type)]
po_files[]
}

pot_paths <- function(dir, type, package = NULL) {
if (is.null(package)) {
package <- get_desc_data(dir, "Package")
}
if (length(type) == 0) {
character()
} else {
file.path(dir, "po", paste0(po_prefix(type), package, ".pot"))
}

}
po_prefix <- function(type = c("R", "src")) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the one place this will break is for base, which uses R-base.pot for R and R.pot for src. I'll just add a TODO for now to make sure that case is handled later.

ifelse(type == "R", "R-", "")
}
pot_types <- function(dir = ".") {
types <- c("R", "src")
types[file.exists(pot_paths(dir, types))]
}
4 changes: 4 additions & 0 deletions R/po_extract.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#' Extract messages for translation into a `.pot` file
#'
#' @description
#' `po_extract()` scans your package for strings to be translated and
#' saves them into a `.pot` template file (in the package's `po` directory).
#' You should never modify this file by hand; instead modify the underlying
#' source code and re-run `po_extract()`.
#'
#' If you have existing translations, call [po_update()] after [po_extract()]
#' to update them with the changes.
#'
#' @returns The extracted messages as computed by [get_message_data()],
#' invisibly.
#' @inheritParams get_message_data
Expand Down
25 changes: 25 additions & 0 deletions R/po_update.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Update all `.po` files with changes in `.pot`
#'
#' `po_update()` updates existing `.po` file after the `.pot` file has changed.
#' The translations for existing messages are preserved; new messages are added;
#' and translations for deleted message are marked as deprecated and moved to
#' the bottom of the file.
#'
#' @inheritParams po_extract
#' @param lazy If `TRUE`, only `.po` files that are older than their
#' corresponding `.pot` file will be updated
#' @rdname po_create
po_update <- function(dir = ".", lazy = TRUE, verbose = !is_testing()) {
meta <- get_po_metadata(dir)
if (lazy) {
meta <- meta[is_outdated(meta$po, meta$pot)]
}

for (ii in seq_len(nrow(meta))) {
row <- meta[ii]
if (verbose) messagef("Updating '%s' %s translation", row$language, row$type)
run_msgmerge(row$po, row$pot, previous = TRUE)
}

invisible(meta)
}
16 changes: 16 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,19 @@ is_outdated <- function(src, dst) {
}

is_testing = function() identical(Sys.getenv("TESTTHAT"), "true")

local_test_package <- function(..., .envir = parent.frame()) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

love the look of this, and looks like a great redesign of generating test packages (the current suite is pretty unwieldy/arbitrarily organized). a good companion would be local_translation_conn() to keep translations right next to where they'll be used too.

but feels like it should be in tests/testthat/helper.R, no?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've generally been moving away from using helper files, because it seems more natural just to keep all code in one place. Additionally, as these local_ type helpers grow more complex, it's not crazy that you might also want to test them.

That said, I don't feel particularly strongly about it, and since you already have helper.R, that's a more sensible home, so I'll move it there.

temp <- withr::local_tempdir(.local_envir = .envir)
writeLines(con = file.path(temp, "DESCRIPTION"), c(
"Package: test",
"Version: 1.0.0"
))
dir_create(file.path(temp, c("po", "R")))

files <- list(...)
for (i in seq_along(files)) {
writeLines(files[[i]], file.path(temp, names(files)[[i]]))
}

temp
}
2 changes: 1 addition & 1 deletion R/write_po_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ format.po_metadata = function(x, template = FALSE, use_plurals = FALSE, ...) {
x$email = "EMAIL@ADDRESS"
x$language = ''
x$language_team = "LANGUAGE <[email protected]>"
x$charset = 'CHARSET'
x$charset = 'UTF-8'
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why this was CHARSET before, but I was getting an error from tools::checkPoFile() that it was failing to iconv CHARSET to UTF-8.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm note that .pot is not mentioned in ?tools::checkPoFile. CHARSET shows up in templates in base:

https://github.com/wch/r-source/blob/bf5885960808d7c539e74b238575f54de0faba04/src/library/base/po/R.pot#L17

https://github.com/wch/r-source/blob/bf5885960808d7c539e74b238575f54de0faba04/src/library/base/po/R-base.pot#L10

Here's what I'm seeing in the xgettext docs:

https://www.gnu.org/software/gettext/manual/gettext.html

MIME-Version, Content-Type, Content-Transfer-Encoding
These values are set according to the content of the POT file and the current locale. If the POT file contains charset=UTF-8, it means that the POT file contains non-ASCII characters, and we keep the UTF-8 encoding. Otherwise, when the POT file is plain ASCII, we use the locale’s encoding.

and

Content-Type
Replace ‘CHARSET’ with the character encoding used for your language, in your locale, or UTF-8. This field is needed for correct operation of the msgmerge and msgfmt programs, as well as for users whose locale’s character encoding differs from yours (see Charset conversion).

In the gettext sources, here is where xgettext initially populates the default CHARSET:

https://github.com/autotools-mirror/gettext/blob/030c0341a4a0ba6ad7fe62e83ff663bdc76cbe4d/gettext-tools/src/xgettext.c#L2015-L2028

Here write-po treats CHARSET as ASCII:

https://github.com/autotools-mirror/gettext/blob/030c0341a4a0ba6ad7fe62e83ff663bdc76cbe4d/gettext-tools/src/write-po.c#L1650-L1653

This snippet makes me thing setting UTF-8 is probably OK, since the default CHARSET has the probably-in-general faulty assumption that the strings in the .pot file are all-ASCII:

https://github.com/autotools-mirror/gettext/blob/030c0341a4a0ba6ad7fe62e83ff663bdc76cbe4d/gettext-tools/src/po-charset.c#L490-L509

That said, current r-devel .pot are all-ASCII:

grep -rP "[\x80-\xFF]" ~/svn/R-devel/ --include=*.pot
# <empty>

CRAN would require this to hold true for all packages at the R level, and probably the C level?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could probably patch tools::checkPoTools to accept the CHARSET ➡️ ASCII logic, but I'm not sure that would solve all of the issues of using checkPoTools() on .pot files when it was designed for .po.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'll back this change out here and we can discuss more thoroughly in a separate issue. I think potools should assume all .pot and .po files are UTF-8 and then fix any downstream problems that causes.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, backing out this change doesn't break any of my tests (or interactive usage) so definitely doesn't need to be here.

}
if (is.character(x$copyright)) {
x$copyright = list(years = format(x$pot_timestamp, "%Y"), holder = x$copyright)
Expand Down
4 changes: 2 additions & 2 deletions man/po_compile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions man/po_create.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/po_extract.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion po/R-potools.pot
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ msgstr ""
"Language-Team: LANGUAGE <[email protected]>\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=CHARSET\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

#: find_fuzzy_messages.R:6
Expand Down
18 changes: 18 additions & 0 deletions tests/testthat/_snaps/po_create.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# the user is told what's happening

Code
po_create("jp", verbose = TRUE)
Message <simpleMessage>
Creating 'jp' R translation
Running msgint on './po/R-jp.po' succeeded:
Created ./po/R-jp.po.

---

Code
po_create("jp", verbose = TRUE)
Message <simpleMessage>
Updating 'jp' R translation
Running msgmerge on './po/R-jp.po' succeeded:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied this from #257, but it feels a bit wordy to me. What do you think about not showing the call by default?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm it's coming from verbose=TRUE (now the default), are you suggesting we flip the default, or that we implement verbose=0,1,2,...?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's necessary to routinely display a message for success. Adding levels of verbosity is unlikely to be worth the effort IMO, so I'd be in favour of verbose = FALSE by default and not passing in verbose from the wrapping function.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh that's not quite right; I think on success we want to see the stdout/stderr from the command line tool, but not the call.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tweaked the code to behave the way that I want just to give you a concrete proposal to look at. Let me know what you think.

. done.

12 changes: 12 additions & 0 deletions tests/testthat/_snaps/po_update.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# user is told what's happening

Code
po_update(verbose = TRUE, lazy = FALSE)
Message <simpleMessage>
Updating 'fr' R translation
Running msgmerge on './po/R-fr.po' succeeded:
. done.
Updating 'ja' R translation
Running msgmerge on './po/R-ja.po' succeeded:
. done.

10 changes: 7 additions & 3 deletions tests/testthat/test-po_compile.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# metadata ----------------------------------------------------------------

test_that("can find R and src translations", {
temp <- withr::local_tempdir()
dir.create(file.path(temp, "po"))
temp <- local_test_package()
file.create(file.path(temp, "po", c("R-en.po", "en.po")))

meta <- withr::with_dir(temp, get_po_metadata(package = "test"))
meta <- withr::with_dir(temp, get_po_metadata())
expect_equal(meta$language, c("en", "en"))
expect_setequal(meta$type, c("R", "src"))
})

test_that("get_po_metadata() returns 0 rows if no .po fles", {
meta <- get_po_metadata(temp)
expect_equal(nrow(meta), 0)
})
28 changes: 28 additions & 0 deletions tests/testthat/test-po_create.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
test_that("the user is told what's happening", {
temp <- local_test_package()
file.create(file.path(temp, "po", "R-test.pot"))

withr::local_dir(temp)
expect_snapshot(po_create("jp", verbose = TRUE))
expect_snapshot(po_create("jp", verbose = TRUE))
})

test_that("can generate both R and src pot files", {
temp <- local_test_package()
file.create(file.path(temp, "po", c("R-test.pot", "test.pot")))

expect_equal(pot_types(temp), c("R", "src"))

files <- withr::with_dir(temp, po_language_files("en"))
expect_equal(files$type, c("R", "src"))
expect_equal(files$po_path, file.path(".", "po", c("R-en.po", "en.po")))
expect_equal(files$pot_path, file.path(".", "po", c("R-test.pot", "test.pot")))
})

test_that("can create multiple languages", {
temp <- local_test_package()
file.create(file.path(temp, "po", c("R-test.pot", "test.pot")))

files <- withr::with_dir(temp, po_language_files(c("en", "jp", "ar")))
expect_equal(nrow(files), 6)
})
9 changes: 9 additions & 0 deletions tests/testthat/test-po_update.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
test_that("user is told what's happening", {
temp <- local_test_package("R/test.r" = "message('Hello')")
withr::local_dir(temp)

po_extract()
po_create(c("ja", "fr"))

expect_snapshot(po_update(verbose = TRUE, lazy = FALSE))
})
2 changes: 1 addition & 1 deletion tests/testthat/test-write-po-file.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ test_that("po_metadata constructor & methods work", {
format(metadata, template = TRUE),
c(
'"Language-Team: LANGUAGE <[email protected]>\\n"',
'"Content-Type: text/plain; charset=CHARSET\\n"',
'"Content-Type: text/plain; charset=UTF-8\\n"',
'# SOME DESCRIPTIVE TITLE.',
'# This file is distributed under the same license as the R package.',
'# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.'
Expand Down