Skip to content

Commit

Permalink
Ready for CRAN
Browse files Browse the repository at this point in the history
  • Loading branch information
dipterix committed Aug 6, 2022
1 parent aca5a1e commit 518fd6f
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 22 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Package: filearray
Type: Package
Title: File-Backed Array for Out-of-Memory Computation
Version: 0.1.3.9001
Version: 0.1.4
Language: en-US
Encoding: UTF-8
License: LGPL-3
URL: http://dipterix.org/filearray/, https://github.com/dipterix/filearray
URL: https://dipterix.org/filearray/, https://github.com/dipterix/filearray
BugReports: https://github.com/dipterix/filearray/issues
Authors@R: c(
person(
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(filearray_bind)
export(filearray_checkload)
export(filearray_create)
export(filearray_load)
export(filearray_load_or_create)
export(filearray_threads)
export(fmap)
export(fmap2)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# filearray (development version)

# filearray 0.1.4

* Fixed a bug when allocated memory is one byte short than requested. The bug would crash R when triggered in certain cases.
* Removed limit to the maximum number of partitions when writing. The previous implementation creates and opens related file descriptors all at once before writing. This setup will raise errors when the number of connections reach to certain limit, often defined by the operating systems. This update only opens the connection on demand. The performance might be impacted when writing to disk, but in return, the program will be more robust
* Fixed `subset` function environment not resolved correctly when using formula
* Added `filearray_load_or_create` as an alternative to `filearray_checkload` by automatically replace existing obsolete array files if the headers, dimensions, or data types don't match. Also `on_missing` argument is provided to allow array initialization if new array is created.

# filearray 0.1.3

* Automatically detect whether symbolic-link works and show warnings
Expand Down
7 changes: 5 additions & 2 deletions R/class_filearray.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,16 @@ setRefClass(
}
return(default)
},
set_header = function(key, value){
set_header = function(key, value, save = TRUE){
force(value)
if(key %in% RESERVED_HEADERS){
stop("Key `", key, "` is preserved and should be read-only or altered via other methods.")
}
.self$.header[[key]] <- value
.self$.save_header()
if( save ) {
.self$.save_header()
}
invisible(value)
},
header_signature = function(include_path = TRUE){
header_sig <- digest::digest(.self$.header, algo = "sha256")
Expand Down
129 changes: 129 additions & 0 deletions R/load.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ guess_partition <- function(dim, elem_size){
#' @param symlink_ok whether arrays with symbolic-link partitions can pass
#' the test; this is usually used on bound arrays with symbolic-links; see
#' \code{\link{filearray_bind}};
#' @param verbose whether to print out some debug messages
#' @param on_missing function to handle file array (such as initialization)
#' when a new array is created; must take only one argument, the array object
#' @return A \code{\link{FileArray-class}} instance.
#'
#' @details The file arrays partition out-of-memory array objects and store them
Expand Down Expand Up @@ -107,6 +110,28 @@ guess_partition <- function(dim, elem_size){
#' }
#'
#'
#' # check-load, and create a new array if fail
#' x <- filearray_load_or_create(
#' filebase = filebase, dimension = c(200, 30, 8),
#' verbose = TRUE, signature = "henry"
#' )
#' x$get_header("signature")
#'
#' # check-load with initialization
#' x <- filearray_load_or_create(
#' filebase = filebase,
#' dimension = c(3, 4, 5),
#' verbose = TRUE, mode = "readonly",
#' on_missing = function(array) {
#' array[] <- seq_len(60)
#' }
#' )
#'
#' x[1:3,1,1]
#'
#' # Clean up
#' unlink(filebase, recursive = TRUE)
#'
NULL

#' @rdname filearray
Expand Down Expand Up @@ -190,3 +215,107 @@ filearray_checkload <- function(
}
return(arr)
}


#' @rdname filearray
#' @export
filearray_load_or_create <- function(
filebase, dimension, on_missing = NULL, type = NA,
..., mode = c("readonly", "readwrite"), symlink_ok = TRUE,
initialize = FALSE, partition_size = NA, verbose = FALSE
) {
mode <- match.arg(mode)
filebase <- normalizePath(filebase, mustWork = FALSE, winslash = "/")
if(length(filebase) != 1 || grepl("(^|^[A-Za-z]:)/$", filebase)) {
stop("Invalid filebase to store a file array.")
}


dimension <- as.integer(dimension)
if(length(dimension) < 2 || any(is.na(dimension) | dimension < 0)) {
stop("Incorrect dimension for a file array: `dimension` must a valid positive integer vector with length of two or above.")
}

if(!is.null(on_missing)) {
if(!is.function(on_missing) || !length(formals(on_missing))) {
stop("`filearray_load_or_create`: `on_missing` must be a function with one argument (i.e. the file array)")
}
}


additional_headers <- list(...)
add_header_names <- names(additional_headers)
if(length(additional_headers)) {
if(!length(add_header_names) || "" %in% trimws(additional_headers)) {
stop("`filearray_load_or_create`: additional parameters must be named.")
}
}


arr <- tryCatch(
expr = {
# try to load existing array
arr <- filearray_checkload(
filebase = filebase, mode = mode,
symlink_ok = symlink_ok, ...
)

# If no error raised, the array has been loaded

if(!is.na(type)) {
if(!identical(arr$type(), type)) {
stop("`filearray_load_or_create`: Requested array type does not match with existing array.")
}
} else {
# in case the array needs to be reconstructed, assuming the type
# remain the same
type <- arr$type()
}

# Now check the dimension
arr_dim <- as.integer(arr$dimension())
if(!identical(arr_dim, dimension)) {
stop("`filearray_load_or_create`: Requested dimension does not match with existing array.")
}

arr
},
error = function(e) {
if(verbose) {
message("`filearray_load_or_create`: cannot load the existing file array: ", e$message, "\nTrying creating a new one. If the array already exists, its file path will be removed.")
}
if(file.exists(filebase)) {
unlink(filebase, recursive = TRUE, force = TRUE)
}
pdir <- dirname(filebase)
if(!dir.exists(pdir)) {
dir.create(pdir, showWarnings = FALSE, recursive = TRUE)
}
# create the array
if(is.na(type)) { type <- 'double' }
arr <- filearray_create(
filebase = filebase,
dimension = dimension,
type = type,
partition_size = partition_size,
initialize = initialize
)
# run on_missing if the function exists
if(is.function(on_missing)) {
arr$.mode <- "readwrite"
on_missing(arr)
}
# seal the header
for(nm in add_header_names) {
arr$set_header(key = nm, value = additional_headers[[nm]], save = FALSE)
}
arr$.save_header()
arr
}
)

# set mode
arr$.mode <- mode
arr
}

4 changes: 2 additions & 2 deletions adhoc/rchk.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
rm "$HOME/Dropbox/projects/filearray_0.1.0.9000.tar.gz"
docker run -v "$HOME/Dropbox/projects":/projects rchk "/projects/filearray_0.1.0.9000.tar.gz"
# rm "$HOME/Dropbox/projects/filearray_0.1.3.9001.tar.gz"
docker run -v "$HOME/Dropbox/projects":/projects kalibera/rchk:latest "/projects/filearray_0.1.3.9001.tar.gz"
18 changes: 2 additions & 16 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## Dev environment
* osx (ARM), R 4.1.2
* osx (ARM), R 4.2.1

## Test environments
* osx (x64, github-action), R-release
Expand All @@ -9,19 +9,5 @@

## R CMD check results

On `release` and `devel`
On `oldrelease`, `release` and `devel`
0 errors | 0 warnings | 0 notes

On R-4.0
0 errors | 1 warning | 0 notes

```
Codoc mismatches from documentation object 'apply':
apply
Code: function(X, MARGIN, FUN, ...)
Docs: function(X, MARGIN, FUN, ..., simplify = TRUE)
Argument names in docs not in code:
simplify
```

This is because `simplify` was added to `apply` function since R-4.1.
41 changes: 41 additions & 0 deletions man/filearray.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 518fd6f

Please sign in to comment.