From 54d10a91c64499a0a3634a057ef12c85d3f0d339 Mon Sep 17 00:00:00 2001 From: David Liao Date: Sun, 5 Nov 2023 18:08:18 +0000 Subject: [PATCH] UPDATE 0.0.6 --- NAMESPACE | 1 + NEWS.md | 7 + R/flair_datasets.html | 427 ++++++++++++++++++++++++++++++++ R/flar_data.R | 49 ++-- R/toolbox.R | 1 + R/utils.R | 40 +++ _pkgdown.yml | 44 ++-- man/flair_data.Rd | 51 ++-- man/uninstall_python_package.Rd | 27 ++ vignettes/quickstart.Rmd | 2 +- vignettes/tutorial.Rmd | 2 +- 11 files changed, 598 insertions(+), 53 deletions(-) create mode 100644 R/flair_datasets.html create mode 100644 man/uninstall_python_package.Rd diff --git a/NAMESPACE b/NAMESPACE index 1abdd3ba..e6f54774 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,6 +36,7 @@ export(load_tagger_pos) export(load_tagger_sentiments) export(map_entities) export(show_flair_cache) +export(uninstall_python_package) import(reticulate) importFrom(attempt,stop_if_all) importFrom(data.table,":=") diff --git a/NEWS.md b/NEWS.md index fbc41641..55196351 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,13 @@ * The major module _flair_ in {`flaiR`} should be renamed from `flair()` to `import_flair()` to avoid overlapping with conventional practice `import flair` in Python. +* _install_python_package()_ and _uninstall_python_package()_ are new functions to install and uninstall Python packages using pip in the environment used by your flaiR package. + +* `zzz.R` is a revised code that proceeds through three steps. First, when installing and loading the package, {flaiR} utilizes the system's environment tool and undergoes three evaluation stages. Initially, {flaiR} requires at least Python 3 to be installed on your device. If Python 3 is not available, you will be unable to install {flaiR} successfully. Once this requirement is met, the system then checks for the appropriate versions of PyTorch and Flair. The primary focus here is on Flair. If it is not already installed, you will see a message indicating that 'Flair is being installed from Python'. This process represents a new format for loading the Python environment used by your flaiR package. + + + + diff --git a/R/flair_datasets.html b/R/flair_datasets.html new file mode 100644 index 00000000..d77c60e4 --- /dev/null +++ b/R/flair_datasets.html @@ -0,0 +1,427 @@ + + + + + + + + + + + + + + + +flair_datasets.R + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +

@title Access the flair_datasets Module +from Flair

+

@description Utilizes the {reticulate} +package to import the flair.datasets dataset from Flair’s +datasets in Python, enabling the use of this dataset in an R +environment.

+

@return A Python Module(flair.datasets) +from Flair, which can be utilized for NLP tasks.

+

@examples

+

@importFrom reticulate import

+

@references Python equivalent:

+

@seealso for additional information on +Flair’s capabilities and datasets in NLP.

+

@importFrom reticulate import @examples # print all the datasets from flair +names(flair_datasets())

+

@export

+
flair_datasets <- function() {
+  flair.datasets <- import("flair.datasets")
+  return(flair.datasets)
+}
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/R/flar_data.R b/R/flar_data.R index ceaa708b..1caf5af7 100644 --- a/R/flar_data.R +++ b/R/flar_data.R @@ -6,27 +6,46 @@ #' gives access to various classes and utilities in the `flair.data` module, #' most notably: #' \itemize{ -#' \item \strong{Sentence}: Represents a sentence, which is a list of -#' Tokens. This class provides various utilities for sentence -#' manipulation, such as adding tokens, tagging with pre-trained models, -#' and obtaining embeddings. -#' \item \strong{Token}: Represents a word or a sub-word unit in a sentence. -#' It can carry various annotations such as named entity tags, part-of-speech -#' tags, and embeddings. Additionally, the token provides functionalities -#' to retrieve or check its annotations. -#' \item \strong{Corpus}: Represents a collection of sentences, +#' \item \strong{BoundingBox(left, top, right, bottom)}: Bases: tuple (Python); list (R) +#' \itemize{ +#' \item left - str. Alias for field number 0. +#' \item top - int Alias for field number 1 +#' \item right - int Alias for field number 2 +#' \item bottom - int Alias for field number 3 +#' } +#' +#' \item \strong{Sentence(text, use_tokenizer=True, language_code=None, +#' start_position=0)}:A Sentence is a list of tokens and is used to +#' represent a sentence or text fragment. `Sentence` can be imported by +#' `flair_data()$Sentence` via {flaiR}. +#' \itemize{ +#' \item text \code{Union[str, List[str], List[Token]]} - The original string (sentence), or a pre-tokenized list of tokens. +#' \item use_tokenizer \code{Union[bool, Tokenizer]} - Specify a custom tokenizer to split the text into tokens. The default is \code{flair.tokenization.SegTokTokenizer}. If \code{use_tokenizer} is set to \code{False}, \code{flair.tokenization.SpaceTokenizer} will be used instead. The tokenizer will be ignored if \code{text} refers to pre-tokenized tokens. +#' \item language_code \code{Optional[str]} - Language of the sentence. If not provided, \code{langdetect} will be called when the \code{language_code} is accessed for the first time. +#' \item start_position \code{int} - Start character offset of the sentence in the superordinate document. +#' } +#' \item \strong{Span(tokens, tag=None, score=1.0)}: Bases: _PartOfSentence. +#' A Span is a slice of a Sentence, consisting of a list of Tokens. +#' `Span` can be imported by `flair_data()$Span`. +#' +#' \item \strong{Token(text, head_id=None, whitespace_after=1, start_position=0, sentence=None)}: +#' This class represents one word in a tokenized sentence. +#' Each token may have any number of tags. It may also point to its head in a +#' dependency tree. `Token` can be imported by `flair_data()$Token` via {flaiR}. +#' +#' \item \strong{Corpus(train=None, dev=None, test=None, name='corpus', sample_missing_splits=True)}: Represents a collection of sentences, #' facilitating operations like splitting into train/test/development #' sets and applying transformations. It is particularly useful #' for training and evaluating models on custom datasets. +#' `Corpus` can be imported by `flair_data()$Corpus` via {flaiR}. +#' +#' \item \strong{Dictionary}: Represents a mapping between items and indices. +#' It is useful for converting text into machine-readable formats. #' } -#' Additionally, the module offers utilities for reading data in the CoNLL -#' format, a common format for NER, POS tagging, and more. It also contains -#' the `Dictionary` class for item-index mapping, facilitating the conversion -#' of text into machine-readable formats. This function provides a bridge -#' to access these functionalities directly from R. #' -#' @return A Python module (`flair.data`). +#' @return A Python module (`flair.data`). To access the classes and utilities. #' +#' @seealso [flair.data](https://flairnlp.github.io/flair/master/api/flair.data.html#) #' @export #' #' @examples diff --git a/R/toolbox.R b/R/toolbox.R index 9dae86f7..93f15e17 100644 --- a/R/toolbox.R +++ b/R/toolbox.R @@ -120,6 +120,7 @@ highlight_text <- function(text, entities_mapping, font_family = "Arial") { return(HTML(justified_text)) } + #' @title Create Mapping for NER Highlighting #' #' @description This function generates a mapping list for Named Entity Recognition (NER) diff --git a/R/utils.R b/R/utils.R index 55f78bb8..d4f8fa7d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -481,4 +481,44 @@ install_python_package <- function(package_name, package_version = NULL, python_ # )) # } +#' @title Uninstall a Python Package +#' +#' @description `uninstall_python_package` function uninstalls a specified Python +#' package using the system's Python installation. It checks if Python is +#' installed and accessible, then proceeds to uninstall the package. Finally, +#' `uninstall_python_package` verifies that the package has been successfully uninstalled. +#' +#' @param package_name The name of the Python package to uninstall. +#' @param python_path The path to the Python executable. If not provided, it uses the system's default Python path. +#' +#' @return Invisibly returns TRUE if the package is successfully uninstalled, otherwise it stops with an error message. +#' @export +#' +#' @examples +#' \dontrun{ +#' uninstall_python_package("numpy") +#' } +uninstall_python_package <- function(package_name, python_path = Sys.which("python3")) { + # Check if Python is installed or found in the system PATH + if (python_path == "") { + stop("Python is not installed, not found in the system PATH, or an incorrect path was provided.") + } else { + message("Using Python at: ", python_path) + } + + # Uninstall the specified package + uninstall_command <- paste(python_path, "-m pip uninstall -y", package_name) + system(uninstall_command) + + # Check if the package is still installed + check_uninstall_command <- paste(python_path, "-c 'import ", package_name, "'", sep="") + package_uninstall_check <- try(system(check_uninstall_command, intern = TRUE, ignore.stderr = TRUE), silent = TRUE) + + if (inherits(package_uninstall_check, "try-error")) { + message("Package '", package_name, "' was successfully uninstalled.") + invisible(TRUE) + } else { + stop("Failed to uninstall the package. It may still be installed.") + } +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 44d42ec5..2327315f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -23,7 +23,7 @@ reference: desc:
This is a very high-level overview of flair. Accessing whole flair module. The flair library is a powerful NLP (Natural Language Processing) tool based on PyTorch. Its primary focus is to provide state-of-the-art models for NLP tasks while keeping the API simple and easy to use.
- contents: - - flair + - import_flair - title: The flair_embeddings Module (Python) desc:
Accessing flair.embeddings module from flair. `flair.embeddings` is a module from the Flair NLP library, which provides several state-of-the-art embeddings (vector representations) for natural language processing (NLP) tasks. Embeddings are crucial in NLP because they allow models to understand the semantic content of words, capturing relationships between words, similarities, and much more. The Flair library provides a rich set of embeddings, which can be either standalone or combined to enhance performance on various tasks.
@@ -92,6 +92,8 @@ reference: - embeddings_to_matrix - clear_flair_cache - show_flair_cache + - uninstall_python_package + - install_python_package - title: flairR Tutorial Dataset @@ -175,26 +177,26 @@ navbar: menu: - text: "All Function Reference" href: reference/index.html - - text: "The flair Module" - href: reference/index.html#the-flair-module - - text: "The flair_embeddings Module" - href: reference/index.html#the-flair-embeddings-module - - text: "The flair_data Module" - href: reference/index.html#the-flair-data-module - - text: "The flair_nn Module" - href: reference/index.html#the-flair-nn-module - - text: "The flair_trainers Module" - href: reference/index.html#the-flair-trainers-module - - text: "The flair_splitter Module" - href: reference/index.html#the-flair-splitter-module - - text: "The flair_models Module" - href: reference/index.html#the-flair-models-module - - text: "The flair_datasets Module" - href: reference/index.html#the-flair-datasets-module - - text: "The Pre-trianed Models for NLP Tasks" - href: articles/flair_models.html - - text: "The Wordembeddings in Flair" - href: articles/transformer_wordembeddings.html + # - text: "The flair Module" + # href: reference/index.html#the-flair-module + # - text: "The flair_embeddings Module" + # href: reference/index.html#the-flair-embeddings-module + # - text: "The flair_data Module" + # href: reference/index.html#the-flair-data-module + # - text: "The flair_nn Module" + # href: reference/index.html#the-flair-nn-module + # - text: "The flair_trainers Module" + # href: reference/index.html#the-flair-trainers-module + # - text: "The flair_splitter Module" + # href: reference/index.html#the-flair-splitter-module + # - text: "The flair_models Module" + # href: reference/index.html#the-flair-models-module + # - text: "The flair_datasets Module" + # href: reference/index.html#the-flair-datasets-module + # - text: "The Pre-trianed Models for NLP Tasks" + # href: articles/flair_models.html + # - text: "The Wordembeddings in Flair" + # href: articles/transformer_wordembeddings.html - icon: fa-newspaper-o text: News diff --git a/man/flair_data.Rd b/man/flair_data.Rd index 1a1e5c16..45552411 100644 --- a/man/flair_data.Rd +++ b/man/flair_data.Rd @@ -7,7 +7,7 @@ flair_data() } \value{ -A Python module (`flair.data`). +A Python module (`flair.data`). To access the classes and utilities. } \description{ The `flair.data` module provides essential utilities for text data @@ -15,24 +15,42 @@ processing and representation in the Flair library. This function gives access to various classes and utilities in the `flair.data` module, most notably: \itemize{ - \item \strong{Sentence}: Represents a sentence, which is a list of - Tokens. This class provides various utilities for sentence - manipulation, such as adding tokens, tagging with pre-trained models, - and obtaining embeddings. - \item \strong{Token}: Represents a word or a sub-word unit in a sentence. - It can carry various annotations such as named entity tags, part-of-speech - tags, and embeddings. Additionally, the token provides functionalities - to retrieve or check its annotations. - \item \strong{Corpus}: Represents a collection of sentences, + \item \strong{BoundingBox(left, top, right, bottom)}: Bases: tuple (Python); list (R) + \itemize{ + \item left - str. Alias for field number 0. + \item top - int Alias for field number 1 + \item right - int Alias for field number 2 + \item bottom - int Alias for field number 3 + } + + \item \strong{Sentence(text, use_tokenizer=True, language_code=None, + start_position=0)}:A Sentence is a list of tokens and is used to + represent a sentence or text fragment. `Sentence` can be imported by + `flair_data()$Sentence` via {flaiR}. + \itemize{ + \item text \code{Union[str, List[str], List[Token]]} - The original string (sentence), or a pre-tokenized list of tokens. + \item use_tokenizer \code{Union[bool, Tokenizer]} - Specify a custom tokenizer to split the text into tokens. The default is \code{flair.tokenization.SegTokTokenizer}. If \code{use_tokenizer} is set to \code{False}, \code{flair.tokenization.SpaceTokenizer} will be used instead. The tokenizer will be ignored if \code{text} refers to pre-tokenized tokens. + \item language_code \code{Optional[str]} - Language of the sentence. If not provided, \code{langdetect} will be called when the \code{language_code} is accessed for the first time. + \item start_position \code{int} - Start character offset of the sentence in the superordinate document. + } + \item \strong{Span(tokens, tag=None, score=1.0)}: Bases: _PartOfSentence. + A Span is a slice of a Sentence, consisting of a list of Tokens. + `Span` can be imported by `flair_data()$Span`. + + \item \strong{Token(text, head_id=None, whitespace_after=1, start_position=0, sentence=None)}: + This class represents one word in a tokenized sentence. + Each token may have any number of tags. It may also point to its head in a + dependency tree. `Token` can be imported by `flair_data()$Token` via {flaiR}. + + \item \strong{Corpus(train=None, dev=None, test=None, name='corpus', sample_missing_splits=True)}: Represents a collection of sentences, facilitating operations like splitting into train/test/development sets and applying transformations. It is particularly useful for training and evaluating models on custom datasets. + `Corpus` can be imported by `flair_data()$Corpus` via {flaiR}. + + \item \strong{Dictionary}: Represents a mapping between items and indices. + It is useful for converting text into machine-readable formats. } -Additionally, the module offers utilities for reading data in the CoNLL -format, a common format for NER, POS tagging, and more. It also contains -the `Dictionary` class for item-index mapping, facilitating the conversion -of text into machine-readable formats. This function provides a bridge -to access these functionalities directly from R. } \examples{ \dontrun{ @@ -48,3 +66,6 @@ Python reference: from flair.data import Sentence } } +\seealso{ +[flair.data](https://flairnlp.github.io/flair/master/api/flair.data.html#) +} diff --git a/man/uninstall_python_package.Rd b/man/uninstall_python_package.Rd new file mode 100644 index 00000000..cc9792cc --- /dev/null +++ b/man/uninstall_python_package.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{uninstall_python_package} +\alias{uninstall_python_package} +\title{Uninstall a Python Package} +\usage{ +uninstall_python_package(package_name, python_path = Sys.which("python3")) +} +\arguments{ +\item{package_name}{The name of the Python package to uninstall.} + +\item{python_path}{The path to the Python executable. If not provided, it uses the system's default Python path.} +} +\value{ +Invisibly returns TRUE if the package is successfully uninstalled, otherwise it stops with an error message. +} +\description{ +`uninstall_python_package` function uninstalls a specified Python +package using the system's Python installation. It checks if Python is +installed and accessible, then proceeds to uninstall the package. Finally, +`uninstall_python_package` verifies that the package has been successfully uninstalled. +} +\examples{ +\dontrun{ +uninstall_python_package("numpy") +} +} diff --git a/vignettes/quickstart.Rmd b/vignettes/quickstart.Rmd index acf85673..c36ce1af 100644 --- a/vignettes/quickstart.Rmd +++ b/vignettes/quickstart.Rmd @@ -68,7 +68,7 @@ __System Requirement:__ - Anaconda ___(highly recommended)___ -First, When first installing and loading the package, {flaiR} uses the system tool and goes through three evaluation stages. First and foremost, {flaiR} requires you to have at least Python 3 installed. If your device does not have Python 3, you will not be able to successfully install {flaiR}. Once this step is successful, the system further assesses whether you have the appropriate versions of PyTorch and Flair installed. The primary focus is on Flair, and if you haven't installed it, the message "Flair is installing from Python" will be displayed. +First, When first installing and loading the package, {flaiR} uses the system environment tool and goes through three evaluation stages. First, {flaiR} requires you to have at least Python 3 installed. If your device does not have Python 3, you will not be able to successfully install {flaiR}. Once this step is successful, the system further assesses whether you have the appropriate versions of PyTorch and Flair installed. The primary focus is on Flair, and if you haven't installed it, the message "Flair is installing from Python" will be displayed. During this process, you will observe numerous messages related to the installation of the Python environment and the Python flair module. Notably, flair has numerous dependencies, including libraries related to transformers (like the Pytorch, gensim, flair, etc). Thus, the installation might take some time to complete. diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 8d0a1586..1edb15ff 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -22,7 +22,7 @@ knitr::opts_chunk$set( system(paste(Sys.which("python3"), "-m pip install --upgrade pip")) system(paste(Sys.which("python3"), "-m pip install torch")) system(paste(Sys.which("python3"), "-m pip install flair")) -library(reticulate) +# Sys.setenv(RETICULATE_PYTHON = Sys.which("python3")) library(flaiR) # system(paste(reticulate::py_config()$python, "-m pip install flair")) # reticulate::py_install("flair")