generated from sjessa/rr
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrr_helpers.R
221 lines (168 loc) · 8.58 KB
/
rr_helpers.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#' Run the important parts of the header.Rmd for interactive use, to establish
#' the output and figures directories
rr_i <- function() {
library(here)
if (R.version$major == "3") {
out <<- here::here("output", doc_id); dir.create(out, recursive = TRUE, showWarnings = FALSE)
figout <<- here::here("figures", doc_id); dir.create(figout, recursive = TRUE, showWarnings = FALSE)
} else if (R.version$major == "4" & grepl("narval", here::here())) {
out <<- here::here("R-4/output", doc_id); dir.create(out, recursive = TRUE, showWarnings = FALSE)
figout <<- here::here("R-4/figures", doc_id); dir.create(figout, recursive = TRUE, showWarnings = FALSE)
} else if (R.version$major == "4" & grepl("hydra", here::here())) {
out <<- here::here("R-4-hydra/output", doc_id); dir.create(out, recursive = TRUE, showWarnings = FALSE)
figout <<- here::here("R-4-hydra/figures", doc_id); dir.create(figout, recursive = TRUE, showWarnings = FALSE)
}
message("Output: ", out)
message("Figures: ", figout)
}
#' The here() function always returns a full path from the root directory
#' This function returns a path from the project root for less clutter
#' and greater portability
#'
#' @param path String, full path as generated by here::here("blah")
path_from_here <- function(path) {
paste0(basename(here::here()), # Project root directory
strsplit(path, basename(here::here()))[[1]][2]) # Chosen path relative to project root
}
# Savers / loaders ----
#' A wrapper function for writing a description with a TSV
#'
#' This funcion simply wraps readr::write_tsv, but also saves a user-provided
#' description alongside the tsv with the same filename but extension ".desc"
#'
#' @param df Data frame to write to tsv
#' @param path Path for output tsv file, as returned by here e.g. here("my/file.tsv")
#' @param desc String, brief description of file contents
#' @param verbose Logical, whether to print .desc file path to console
#'
#' @return Nothing
#'
#' @examples
#' mtcars %>%
#' rr_write_tsv(path = here("output/01/mtcars.tsv),
#' desc = "The mtcars dataset, verbatim")
rr_write_tsv <- function(df, path, desc, verbose = TRUE) {
# Need readr to simplify table writing
require(readr)
readr::write_tsv(df, path)
# Create the path for the description file, swapping .tsv extension to .desc
desc_path <- gsub("tsv$", "desc", path)
# Print the object description to the desc file
cat(desc, file = desc_path, sep = "\n")
# Output a message with path to desc file
if (verbose) message("...writing description of ", basename(as.character(path)), " to ", path_from_here(desc_path))
}
#' A wrapper function for saving source data along side a ggplot
#'
#' This funcion simply wraps ggplot2::ggplot, but also saves the input data
#' alongside the figure, with the same filename but extension ".source_data.tsv".
#' This is extremely useful for being able to quickly extract the data needed to
#' regenerate the figure, sometimes also required for papers.
#'
#' NOTE: Saving soure data only works if a ggplot is generated within a code chunk and the
#' document rendered by RMarkdown, otherwise, a warning is emitted and the function
#' proceeds with ggplot code.
#'
#' @param df Data frame, input to ggplot2
#' @param plot_num Numeric, index of plot within R Markdown chunk, used to determine
#' the filname of the figure when the document is rendered
#' @param ... Additional parameters passed to ggplot2::ggplot, e.g. "aes(x = mpg, y = cyl)"
#'
#' @return A ggplot2 object, to which additional gg elements can be added with +,
#' same as ggplot2::ggplot
#' @export
#'
#' @examples
#' mtcars2 %>%
#' rr_ggplot(1, aes(x = disp, y = wt)) +
#' geom_line() +
#' theme_bw()
rr_ggplot <- function(df, plot_num, ...) {
require(ggplot2)
require(readr)
if (!interactive()) {
# TODO: Currently, it's not possible to not specify plot_num, because
# it messes up the dots (...) which are passed to ggplot, so this if statement
# is never evaluated, an error is thrown instead:
# If the plot # is not provided
if (missing(plot_num)) {
plot_num <- 1
# This is beacuse plots are named by their number in each chunk, but
# that number cannot be accessed by this function
warning("!! If more than one ggplot is generated in this chunk with rr_ggplot(),",
"only the source data for the first one will be saved.",
"Pass plot # explicitly to plot_num argument to correct this.")
}
# Get the figure path for the current chunk, without file extensions
# https://github.com/yihui/knitr/issues/73#issuecomment-3514096
fig_path <- knitr::fig_path(number = plot_num)
# Make a path for the source data by appending a suffix to the figure path,
# and write source data there as a TSV
src_path <- paste0(fig_path, ".source_data.tsv")
write_tsv(df, src_path)
# Output a message with path to source data file
message("...writing source data of ggplot to ", path_from_here(src_path))
} else {
warning("!! This function is being run in an interactive session ",
"and the source data is NOT being saved. Render the document ",
"to save source data.")
}
# Proceed with ggplot
ggplot(data = df, ...)
}
#' A wrapper function for reading a TSV along with its metadata & description
#'
#' This funcion simply wraps readr::read_tsv, but at the same time, prints some
#' information about the file to help with reproducibility & dependency tracking:
#' * The description of the file, if one exists at the same filepath with ".desc" extension
#' * The timestamp for when the file was last modified
#' * The script that generated the file, under the assumption it was generated
#' by a script within the analysis folder of this repository
#'
#' NOTE: for files NOT produced in this repositoy, this function is not receommonded.
#'
#' To produce a toggle button showing/hiding the output of this function in an R Markdown
#' HTML report, wrap the chunk in <div class="fold o"></div>
#' (in which case the outpout is hidden by default)
#'
#' @param path String, as returned by here::here("blah")
#' @param ... Additional parameters passed to readr::read_tsv()
#'
#' @return A tibble, same as readr::read_tsv
#'
#' @examples
#' mtcars %>%
#' rr_write_tsv(path = here("output/01/mtcars.tsv),
#' desc = "The mtcars dataset, verbatim")
#'
#' mtcars2 <- rr_read_tsv(path = here("output/01/mtcars.tsv))
rr_read_tsv <- function(path, ...) {
require(readr)
require(stringr)
# Create the path for the description file, swapping .tsv extension to .desc
desc_path <- gsub("tsv$", "desc", path)
if(!file.exists(desc_path)) warning("!! No description file (.desc) found. ",
"To automatically write a description file ",
"when saving a tsv, use rr_write_tsv().")
# Get the number of the analysis, e.g. "01"
doc_idx <- stringr::str_extract(path, "(\\d)+")
# Search analysis folder for .Rmd file matching doc_idx
script <- list.files(here("analysis"), pattern = glob2rx(paste0(doc_idx, "*.Rmd")))
# Get the timestamp for when the file contents were last modified
timestamp <- file.info(path)$mtime
# We use cat here because it returns to stdout, which will be picked
# up by the code folding js script (hideOutput.js). Otherwise, there will
# be one folding button per line of output
# https://stackoverflow.com/questions/36699272/why-is-message-a-better-choice-than-print-in-r-for-writing-a-package
cat(paste0(path_from_here(path), " info:\n",
"...description : ", ifelse(file.exists(desc_path), readLines(desc_path), "NOT SPECIFIED"),
"\n...generated by: ", path_from_here(here("analysis", script)),
"\n...last updated: ", timestamp))
# e.g. output
# /Users/selinjessa/Repos/rr/output/01/mtcars.tsv info:
# ...description : The mtcars dataset, verbatim
# ...generated by: rr/analysis/01-first_step.Rmd
# ...last updated: 2020-05-23 22:31:53
# Read the file and return as dataframe
suppressMessages(readr::read_tsv(path, ...))
}