404
- -Page not found
- - -diff --git a/404.html b/404.html deleted file mode 100644 index 5ddc3cf..0000000 --- a/404.html +++ /dev/null @@ -1,131 +0,0 @@ - - -
- - - - -Page not found
- - -
+import json
+import os
+import glob
+from typing import Dict, Any
+
+
+
+[docs]
+def get_files_matching(directory: str, criteria: str) -> list:
+ """
+ Get all files matching the criteria in the directory and its subdirectories.
+
+ :param directory: directory to search in.
+ :param criteria: criteria to match (e.g., `**meta.json`).
+
+ :return: list of files matching the criteria.
+ """
+ return glob.glob(os.path.join(directory, criteria), recursive=True)
+
+
+
+
+[docs]
+def write_to_file(dict: Dict[str, Any], dir_path: str, file_name: str) -> None:
+ """
+ Write a dictionary to a json file.
+
+ :param dict: Dictionary to be written.
+ :param dir_path: Path to the directory where the file will be saved.
+ :param file_name: Name of the file to be saved.
+ """
+ path = os.path.join(dir_path, file_name)
+ with open(path, "w") as convert_file:
+ convert_file.write(json.dumps(dict, indent=4))
+
+
+
+import json
+from typing import Dict, Any
+from tsdf.constants import METADATA_NAMING_PATTERN
+from tsdf import file_utils
+
+
+# the old (TSDB) and new field (TSDF) names
+TSDB_TSDF_KEY_MAP = {
+ "project_id": "study_id",
+ "quantities": "channels",
+ "datatype": "data_type",
+ "start_datetime_iso8601": "start_iso8601",
+ "end_datetime_iso8601": "end_iso8601",
+}
+
+# the field whose value should be an array
+TSDB_ARRAY_KEYS = {"channels", "units"}
+
+
+
+[docs]
+def _rename_keys_in_metadata(old_dict: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ This function renames the keys in a metadata file.
+ If a key in the metadata matches a key in the provided dictionary, it is renamed to the corresponding value in the dictionary.
+ It handles nested dictionaries and lists of dictionaries.
+
+ :param old_dict: The metadata file (dictionary) with keys to rename
+ :return: The updated metadata file (dictionary)
+ """
+ new_dict = {}
+ for key, value in old_dict.items():
+ new_key = TSDB_TSDF_KEY_MAP.get(key, key)
+ if isinstance(value, dict):
+ new_dict[new_key] = _rename_keys_in_metadata(value)
+ elif isinstance(value, list):
+ new_dict[new_key] = [
+ _rename_keys_in_metadata(v) if isinstance(v, dict) else v for v in value
+ ]
+ else:
+ new_dict[new_key] = value
+ return new_dict
+
+
+
+
+[docs]
+def _convert_to_array(data: Dict[str, Any], key: str) -> Dict[str, Any]:
+ """
+ This function converts the value of a specified key in a dictionary to an array if it's not already an array.
+ It handles nested dictionaries and lists of dictionaries.
+
+ :param data: The dictionary with a value to convert
+ :param key: The key in the dictionary whose value to convert
+ :return: The updated dictionary
+ """
+ for k, value in data.items():
+ if k == key and not isinstance(value, list):
+ data[k] = [str(value)]
+ elif isinstance(value, dict):
+ data[k] = _convert_to_array(value, key)
+ elif isinstance(value, list):
+ data[k] = [
+ _convert_to_array(v, key) if isinstance(v, dict) else v for v in value
+ ]
+ return data
+
+
+
+[docs]
+def convert_tsdb_to_tsdf(data: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Converts a data from TSDB (legacy) to TSDF (0.1) format.
+
+ :param data: The data in legacy (tsdb) format.
+ :return: The data in tsdf format.
+ """
+ # rename the keys in the dictionary
+ new_data = _rename_keys_in_metadata(data)
+ # convert the values of the specified keys to arrays
+ for key in TSDB_ARRAY_KEYS:
+ new_data = _convert_to_array(new_data, key)
+
+ return new_data
+
+
+
+
+[docs]
+def generate_tsdf_metadata_from_tsdb(filepath_existing: str, filepath_new: str) -> None:
+ """
+ This function creates a metadata file (JSON) file in TSDF (0.1) format from a TSDB (legacy) file.
+
+ :param filepath_existing: The path to the JSON file to process
+ :param filepath_new: The path to the new JSON file
+ """
+ with open(filepath_existing, "r") as f:
+ data = json.load(f)
+ new_data = convert_tsdb_to_tsdf(data)
+ with open(filepath_new, "w") as f:
+ json.dump(new_data, f)
+
+
+
+[docs]
+def convert_file_tsdb_to_tsdf(filepath: str) -> None:
+ """
+ This function converts a metadata file (JSON) from TSDB (legacy) to TSDF (0.1) format. It overwrites the original file.
+
+ :param filepath: The path to the JSON file to process
+ """
+ with open(filepath, "r") as f:
+ data = json.load(f)
+ new_data = convert_tsdb_to_tsdf(data)
+ with open(filepath, "w") as f:
+ json.dump(new_data, f)
+
+
+
+
+[docs]
+def convert_files_tsdb_to_tsdf(directory: str) -> None:
+ """
+ This function converts all metadata files in a directory (and its subdirectories) from TSDB (legacy) to TSDF (0.1) format.
+ It walks through all files in a directory (and its subdirectories),
+ and processes all files with a .json extension.
+
+ :param directory: The directory to process files in
+ """
+
+ for filepath in file_utils.get_files_matching(directory, METADATA_NAMING_PATTERN):
+ convert_file_tsdb_to_tsdf(filepath)
+
+
+import sys
+import numpy as np
+
+
+_map_from_numpy_types = {
+ "i": "int",
+ "f": "float",
+ # etc
+}
+""" Mapping of NumPy data types to their TSDF metadata annotations. """
+
+
+
+[docs]
+def data_type_numpy_to_tsdf(data: np.ndarray):
+ """Compute the TSDF metadata 'data_type' value, based on the NumPy data."""
+ return _map_from_numpy_types[data.dtype.kind]
+
+
+
+_map_to_numpy_types = {
+ "int": "i",
+ "float": "f",
+ # etc
+}
+""" Mapping of data types that are supported by TSDF to
+ their NumPy representation used for parsing. """
+
+
+
+[docs]
+def data_type_tsdf_to_numpy(data_type: str) -> str:
+ """
+ Compute the the NumPy data type, based on the TSDF metadata 'data_type' value.
+
+ :param data_type: TSDF metadata 'data_type' value.
+
+ :return: NumPy data type (as a char).
+ """
+ return _map_to_numpy_types[data_type]
+
+
+
+
+[docs]
+def bits_numpy_to_tsdf(data: np.ndarray) -> int:
+ """
+ Compute TSDF metadata 'n_bits' value, based on the NumPy data.
+
+ :param data: NumPy data.
+
+ :return: TSDF metadata 'n_bits' value.
+ """
+ return data.dtype.itemsize * 8
+
+
+
+
+[docs]
+def bytes_tsdf_to_numpy(n_bits: int):
+ """
+ Compute the the NumPy byte number, based on the TSDF metadata 'n_bits' value.
+
+ :param n_bits: TSDF metadata 'n_bits' value.
+
+ :return: NumPy byte number.
+ """
+ return str(n_bits // 8)
+
+
+
+_map_from_numpy_endianness = {
+ "<": "little",
+ ">": "big",
+ "=": sys.byteorder,
+ "|": "not applicable",
+}
+""" Supported endianness values. """
+
+
+
+[docs]
+def endianness_numpy_to_tsdf(data: np.ndarray) -> str:
+ """
+ Compute TSDF metadata 'data_type' value, based on the NumPy data.
+
+ :param data: NumPy data.
+
+ :return: TSDF metadata 'data_type' value (as a string).
+ """
+ return _map_from_numpy_endianness[data.dtype.byteorder]
+
+
+
+_map_to_numpy_endianness = {
+ "little": "<",
+ "big": ">",
+ "not applicable": "|",
+}
+""" Supported endianness values. """
+
+
+
+[docs]
+def endianness_tsdf_to_numpy(endianness: str) -> str:
+ """
+ Compute TSDF metadata 'data_type' value, based on the NumPy data.
+
+ :param endianness: TSDF metadata 'data_type' value.
+
+ :return: NumPy data type (as a char)."""
+ return _map_to_numpy_endianness[endianness]
+
+
+
+
+[docs]
+def rows_numpy_to_tsdf(data: np.ndarray) -> int:
+ """
+ Compute TSDF metadata 'rows' value, based on the NumPy data.
+
+ :param data: NumPy data.
+
+ :return: TSDF metadata 'rows' value.
+ """
+ return data.shape[0]
+
+
+"""
+Module for parsing TSDF metadata files.
+
+Reference: https://arxiv.org/abs/2211.11294
+"""
+
+import os
+from typing import Any, Dict, List
+import re
+from dateutil import parser
+
+from tsdf import constants
+from tsdf import tsdfmetadata
+
+
+
+[docs]
+def read_data(data: Any, source_path: str) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
+ """
+ Function used to parse the JSON object containing TSDF metadata. It returns a
+ list of TSDFMetadata objects, where each object describes formatting of a binary file.
+
+ :param data: JSON object containing TSDF metadata.
+ :param source_path: path to the metadata file.
+
+ :return: list of TSDFMetadata objects.
+
+ :raises tsdf_metadata.TSDFMetadataFieldValueError: if the TSDF metadata file is missing a mandatory field.
+ """
+
+ # Check if the version is supported
+ version = data["metadata_version"]
+ if version not in constants.SUPPORTED_TSDF_VERSIONS:
+ raise tsdfmetadata.TSDFMetadataFieldValueError(
+ f"TSDF file version {version} not supported."
+ )
+
+ defined_properties: Dict[str, Any] = {}
+ return _read_struct(data, defined_properties.copy(), source_path, version)
+
+
+
+
+[docs]
+def _read_struct(
+ data: Any, defined_properties: Dict[str, Any], source_path, version: str
+) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
+ """
+ Recursive method used to parse the TSDF metadata in a hierarchical
+ order (from the root towards the leaves).
+
+ :param data: JSON object containing TSDF metadata.
+ :param defined_properties: dictionary containing all the properties defined at the current level of the TSDF structure.
+ :param source_path: path to the metadata file.
+ :param version: version of the TSDF used within the file.
+
+ :return: list of TSDFMetadata objects.
+
+ :raises tsdf_metadata.TSDFMetadataFieldError: if the TSDF metadata file is missing a mandatory field.
+ """
+ all_streams: Dict[str, "tsdfmetadata.TSDFMetadata"] = {}
+ remaining_data = {}
+ leaf: bool = True
+
+ # 1) Map all the values provided at the current level of the TSDF structure.
+ for key, value in data.items():
+ if is_mandatory_type(key, version):
+ defined_properties[key] = value
+ elif not _contains_file_name(value):
+ defined_properties[key] = value
+ else:
+ leaf = False
+ remaining_data[key] = value
+
+ # 2) If the current element is a leaf in the structure, convert it into a TSDFMetadata object.
+ if leaf:
+ try:
+ bin_file_name = defined_properties["file_name"]
+ except KeyError:
+ raise tsdfmetadata.TSDFMetadataFieldError.missing_field("file_name")
+ path = os.path.split(source_path)
+ file_dir = os.path.join(path[0])
+ meta_file_name = path[1]
+ all_streams[bin_file_name] = tsdfmetadata.TSDFMetadata(
+ defined_properties, file_dir, meta_file_name
+ )
+
+ # 3) If the current element is not a leaf, `remaining_data`` will contain lower
+ # levels of the TSDF structure.
+ # Extend the mapping recursively with values provided at those levels.
+ for key, value in remaining_data.items():
+ if isinstance(value, list):
+ for each_value in value:
+ all_streams = all_streams | _read_struct(
+ each_value, defined_properties.copy(), source_path, version
+ )
+ else:
+ all_streams = all_streams | _read_struct(
+ value, defined_properties.copy(), source_path, version
+ )
+
+ return all_streams
+
+
+
+
+[docs]
+def is_mandatory_type(key: str, version: str) -> bool:
+ """
+ Function returns True if the field that corresponds to the
+ key is mandatory for the given TSDF version, otherwise it returns False.
+
+ :param key: key of the TSDF metadata field.
+ :param version: version of the TSDF used within the file.
+
+ :return: True if the field is mandatory, otherwise False.
+ """
+ return True if key in constants.MANDATORY_TSDF_KEYS[version] else False
+
+
+
+
+[docs]
+def _contains_file_name(data: Any) -> bool:
+ """
+ Function return True if the data contains the "file_name" key,
+ and thus, represents nested data elements.
+ Otherwise it returns False.
+
+ :param data: data to be checked.
+
+ :return: True if the data contains the "file_name" key, otherwise False.
+ """
+
+ if isinstance(data, list):
+ for elem in data:
+ if _contains_file_name(elem):
+ return True
+
+ if not isinstance(data, dict):
+ return False
+
+ for key, value in data.items():
+ if key == "file_name":
+ return True
+ if _contains_file_name(value):
+ return True
+ return False
+
+
+
+
+[docs]
+def contains_tsdf_mandatory_fields(dictionary: Dict[str, Any]) -> bool:
+ """
+ Verifies that all the mandatory properties for TSDF metadata are provided,
+ and are in the right format.
+
+ :param dictionary: dictionary containing TSDF metadata.
+
+ :return: True if the metadata is well formatted.
+
+ :raises tsdf_metadata.TSDFMetadataFieldError: if the TSDF metadata file is missing a mandatory field.
+ :raises tsdf_metadata.TSDFMetadataFieldValueError: if the TSDF metadata file contains an invalid value.
+ """
+ version_key = "metadata_version"
+ if version_key not in dictionary.keys():
+ raise tsdfmetadata.TSDFMetadataFieldError.missing_field(version_key)
+
+ version = dictionary[version_key]
+ for key in constants.MANDATORY_TSDF_KEYS[version]:
+ if key not in dictionary.keys():
+ raise tsdfmetadata.TSDFMetadataFieldError.missing_field(key)
+ units = "units"
+ channels = "channels"
+ if len(dictionary[units]) != len(dictionary[channels]):
+ raise tsdfmetadata.TSDFMetadataFieldValueError(
+ f"TSDF metadata structure must specify equal number of {units} and {channels} for each binary file."
+ )
+
+ for key, value in dictionary.items():
+ _check_tsdf_property_format(key, value, version)
+
+ return True
+
+
+
+
+[docs]
+def _check_tsdf_property_format(key: str, value, version: str) -> None:
+ """
+ Function checks whether the value of the mandatory TSDF field specified by the key
+ is of the expected data format.\\
+ `Note: If the key is not mandatory the function does not perform any checks.`
+
+ :param key: key of the TSDF metadata field.
+ :param value: value of the TSDF metadata field.
+ :param version: version of the TSDF used within the file.
+
+ :raises tsdf_metadata.TSDFMetadataFieldValueError: if the TSDF metadata file contains an invalid value.
+ """
+ if not is_mandatory_type(key, version):
+ return
+
+ index = constants.MANDATORY_TSDF_KEYS[version].index(key)
+ type_name = constants.MANDATORY_TSDF_KEYS_VALUES[version][index]
+
+ if not isinstance(value, constants.KEY_VALUE_TYPES[type_name]):
+ raise tsdfmetadata.TSDFMetadataFieldValueError(
+ f"The given value for {key} is not in the expected ({type_name}) format."
+ )
+
+
+
+
+[docs]
+def get_file_metadata_at_index(
+ metadata: Dict[str, "tsdfmetadata.TSDFMetadata"], index: int
+) -> "tsdfmetadata.TSDFMetadata":
+ """
+ Returns the metadata object at the position defined by the index.
+
+ :param metadata: dictionary containing TSDF metadata.
+ :param index: index of the metadata object to be returned.
+
+ :return: metadata object at the position defined by the index.
+
+ :raises IndexError: if the index is out of range.
+ """
+ for _key, value in metadata.items():
+ if index == 0:
+ return value
+ index -= 1
+ raise IndexError("The index is out of range.")
+
+
+
+
+[docs]
+def confirm_dir_of_metadata(metadatas: List["tsdfmetadata.TSDFMetadata"]) -> None:
+ """
+ The method is used to confirm whether all the metadata files are expected in the same directory.
+
+ :param metadatas: list of metadata objects.
+
+ :raises tsdf_metadata.TSDFMetadataFieldValueError: if the metadata files are not in the same directory or describe the same binaries.
+ """
+ metadata_iter = iter(metadatas)
+ init_metadata = next(metadata_iter)
+
+ for curr_metadata in metadata_iter:
+ if init_metadata.file_dir_path != curr_metadata.file_dir_path:
+ raise tsdfmetadata.TSDFMetadataFieldValueError(
+ "Metadata files have to be in the same folder to be combined."
+ )
+ if init_metadata.file_name == curr_metadata.file_name:
+ raise tsdfmetadata.TSDFMetadataFieldValueError(
+ "Two metadata objects cannot reference the same binary file (file_name)."
+ #TODO: why not?
+ )
+
+
+
+[docs]
+def is_iso8601(date_string: str) -> bool:
+ """
+ Checks if the given date string is in ISO8601 format.
+
+ :param date_string: date string to be checked.
+ """
+ # Note that we need both the regex and the parser to validate the date string
+ # The regex only still allows for invalid dates, e.g. 2021-02-29
+ # The parser is too lenient in accepting different formats
+ iso8601_regex = r"^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])(T(2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9](?:\.[0-9]+)?(?:Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?)?$"
+ if re.match(iso8601_regex, date_string):
+ try:
+ parser.parse(date_string)
+ return True
+ except tsdfmetadata.TSDFMetadataFieldValueError:
+ return False
+ return False
+
+
+
+[docs]
+def validate_datetimes(metadata: tsdfmetadata.TSDFMetadata) -> bool:
+ """
+ Validates the start and end date format of the TSDFMetaData object.
+ """
+ if not is_iso8601(metadata.start_iso8601):
+ raise tsdfmetadata.TSDFMetadataFieldValueError(f"Invalid start_iso8601: {metadata.start_iso8601}")
+ if not is_iso8601(metadata.end_iso8601):
+ raise tsdfmetadata.TSDFMetadataFieldValueError(f"Invalid end_iso8601: {metadata.end_iso8601}")
+ return True
+
+
+"""
+Module for reading and writing binary files associated with TSDF.
+
+Reference: https://arxiv.org/abs/2211.11294
+"""
+
+import os
+from typing import List, Union
+import numpy as np
+import pandas as pd
+from tsdf import numpy_utils
+from tsdf import tsdfmetadata
+from tsdf.constants import ConcatenationType
+
+
+
+[docs]
+def load_dataframe_from_binaries(
+ metadatas: List["tsdfmetadata.TSDFMetadata"],
+ concatenation: ConcatenationType = ConcatenationType.none,
+) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+ """
+ Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
+
+ :param metadatas: list of TSDFMetadata objects.
+ :param concatenation: concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
+
+ :return: pandas DataFrame containing the combined data.
+ """
+ # Load the data
+ data_frames = []
+ for metadata in metadatas:
+ data = load_ndarray_from_binary(metadata)
+ df = pd.DataFrame(data, columns=metadata.channels)
+ data_frames.append(df)
+
+ # Merge the data
+ if concatenation == ConcatenationType.rows:
+ return pd.concat(data_frames)
+ elif concatenation == ConcatenationType.columns:
+ return pd.concat(data_frames, axis=1)
+ elif concatenation == ConcatenationType.none:
+ return data_frames
+
+
+
+
+[docs]
+def load_ndarray_from_binary(
+ metadata: "tsdfmetadata.TSDFMetadata", start_row: int = 0, end_row: int = -1
+) -> np.ndarray:
+ """
+ Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved).
+
+ :param metadata: TSDFMetadata object.
+ :param start_row: (optional) first row to load.
+ :param end_row: (optional) last row to load. If -1, load all rows.
+
+ :return: numpy array containing the data."""
+ metadata_dir = metadata.file_dir_path
+
+ bin_path = os.path.join(metadata_dir, metadata.file_name)
+ return _load_binary_file(
+ bin_path,
+ metadata.data_type,
+ metadata.bits,
+ metadata.endianness,
+ metadata.rows,
+ len(metadata.channels),
+ start_row,
+ end_row,
+ )
+
+
+
+
+[docs]
+def _load_binary_file(
+ bin_file_path: str,
+ data_type: str,
+ n_bits: int,
+ endianness: str,
+ n_rows: int,
+ n_columns: int,
+ start_row: int = 0,
+ end_row: int = -1,
+) -> np.ndarray:
+ """
+ Use provided parameters to load and return a numpy array from a binary file.
+
+ :param bin_file_path: path to the binary file.
+ :param data_type: data type of the binary file.
+ :param n_bits: number of bits per value.
+ :param endianness: endianness of the binary file.
+ :param n_rows: number of rows in the binary file.
+ :param n_columns: number of columns in the binary file.
+ :param start_row: (optional) first row to load.
+ :param end_row: (optional) last row to load. If -1, load all rows.
+
+ :return: numpy array containing the data.
+ """
+
+ s_endianness = numpy_utils.endianness_tsdf_to_numpy(endianness)
+ s_type = numpy_utils.data_type_tsdf_to_numpy(data_type)
+ s_n_bytes = numpy_utils.bytes_tsdf_to_numpy(n_bits)
+ format_string = "".join([s_endianness, s_type, s_n_bytes])
+
+ # Load the data and reshape
+ with open(bin_file_path, "rb") as fid:
+ fid.seek(start_row * n_columns * n_bits // 8)
+ if end_row == -1:
+ end_row = n_rows
+ buffer = fid.read((end_row - start_row) * n_columns * n_bits // 8)
+ values = np.frombuffer(buffer, dtype=format_string)
+ if n_columns > 1:
+ values = values.reshape((-1, n_columns))
+
+ # Check whether the number of rows matches the metadata
+ if values.shape[0] != end_row - start_row:
+ raise Exception("Number of rows doesn't match file length.")
+
+ return values
+
+
+"""
+Module for reading TSDF files.
+
+Reference: https://arxiv.org/abs/2211.11294
+"""
+
+import json
+import os
+from pathlib import Path
+from typing import Dict, List
+from tsdf import file_utils
+from tsdf.constants import METADATA_NAMING_PATTERN
+from tsdf import parse_metadata
+from tsdf import legacy_tsdf_utils
+from tsdf.tsdfmetadata import TSDFMetadata
+
+
+
+[docs]
+def load_metadata_file(file) -> Dict[str, TSDFMetadata]:
+ """Loads a TSDF metadata file, returns a dictionary
+
+ :param file: file object containing the TSDF metadata.
+
+ :return: dictionary of TSDFMetadata objects.
+ """
+
+ # The data is isomorphic to a JSON
+ data = json.load(file)
+
+ abs_path = os.path.realpath(file.name)
+
+ # Parse the data and verify that it complies with TSDF requirements
+ return parse_metadata.read_data(data, abs_path)
+
+
+
+[docs]
+def load_metadata_legacy_file(file) -> Dict[str, TSDFMetadata]:
+ """Loads a TSDB metadata file, i.e., legacy format of the TSDF. It returns a dictionary representing the metadata.
+
+ :param file: file object containing the TSDF metadata.
+
+ :return: dictionary of TSDFMetadata objects.
+ """
+
+ # The data is isomorphic to a JSON
+ legacy_data = json.load(file)
+
+ abs_path = os.path.realpath(file.name)
+
+ tsdf_data = legacy_tsdf_utils.convert_tsdb_to_tsdf(legacy_data)
+
+ # Parse the data and verify that it complies with TSDF requirements
+ return parse_metadata.read_data(tsdf_data, abs_path)
+
+
+
+[docs]
+def load_metadatas_from_dir(
+ dir_path: str, naming_pattern=METADATA_NAMING_PATTERN
+) -> List[Dict[str, TSDFMetadata]]:
+ """
+ Loads all TSDF metadata files in a directory, returns a dictionary
+
+ :param dir_path: path to the directory containing the TSDF metadata files.
+ :param naming_pattern: (optional) naming pattern of the TSDF metadata files .
+
+ :return: dictionary of TSDFMetadata objects.
+ """
+ # Get all files in the directory
+ file_paths = file_utils.get_files_matching(dir_path, naming_pattern)
+
+ # Load all files
+ metadatas = []
+ for file_path in file_paths:
+ metadata = load_metadata_from_path(file_path)
+ metadatas.append(metadata)
+
+ return metadatas
+
+
+
+
+[docs]
+def load_metadata_from_path(path: Path) -> Dict[str, TSDFMetadata]:
+ """
+ Loads a TSDF metadata file, returns a dictionary
+
+ :param path: path to the TSDF metadata file.
+
+ :return: dictionary of TSDFMetadata objects.
+ """
+ # The data is isomorphic to a JSON
+ with open(path, "r") as file:
+ data = json.load(file)
+
+ abs_path = os.path.realpath(path)
+ # Parse the data and verify that it complies with TSDF requirements
+ return parse_metadata.read_data(data, abs_path)
+
+
+
+
+[docs]
+def load_metadata_string(json_str) -> Dict[str, TSDFMetadata]:
+ """
+ Loads a TSDF metadata string, returns a dictionary.
+
+ :param json_str: string containing the TSDF metadata.
+
+ :return: dictionary of TSDFMetadata objects.
+ """
+
+ # The data is isomorphic to a JSON
+ data = json.loads(json_str)
+
+ # Parse the data and verify that it complies with TSDF requirements
+ return parse_metadata.read_data(data, "")
+
+
+import copy
+from typing import Any, Dict, List
+from datetime import datetime
+from dateutil import parser
+
+from tsdf import parse_metadata
+
+
+[docs]
+class TSDFMetadataFieldError(Exception):
+ "Raised when the TSDFMetadata is missing an obligatory field."
+
+[docs]
+ @classmethod
+ def missing_field(cls, field_name: str):
+ message = f"Value for the obligatory TSDF field '{field_name}' is missing in the provided TSDF metadata file."
+ return cls(message)
+
+
+
+
+
+[docs]
+class TSDFMetadataFieldValueError(Exception):
+ "Raised when a TSDFMetadata field is wrongly annotated."
+ pass
+
+
+
+
+[docs]
+class TSDFMetadata:
+ """Structure that provides metadata needed for reading a data stream."""
+
+ metadata_version: str
+ """Version of the TSDF metadata file."""
+ study_id: str
+ """Study ID."""
+ subject_id: str
+ """Subject ID."""
+ device_id: str
+ """Device ID."""
+ start_iso8601: str
+ """Start time of the recording in ISO8601 format."""
+ end_iso8601: str
+ """End time of the recording in ISO8601 format."""
+ file_name: str
+ """Name of the binary file containing the data."""
+ rows: int
+ """Number of rows in the binary file."""
+ channels: List[str]
+ """List of channels in the binary file."""
+ units: List[str]
+ """List of units for each channel in the binary file."""
+ data_type: str
+ """Data type of the binary file."""
+ bits: int
+ """Number of bits per sample in the binary file."""
+ endianness: str
+ """Endianness of the binary file."""
+
+ file_dir_path: str
+ """ A reference to the directory path, so we don't need it again when reading associated binary files. """
+ metadata_file_name: str #TODO: do we need this?? / is it used?
+ """ A reference to the source path, so we don't need it again when reading associated binary files. """
+
+ def __init__(
+ self, dictionary: Dict[str, Any], dir_path: str, metadata_file_name: str = "", do_validate: bool = True
+ ) -> None:
+ """
+ The default constructor takes a dictionary as an argument and creates each
+ field as a separate property.\\
+ `Both, mandatory and non-mandatory fields are stored as object properties.`
+
+ :param dictionary: dictionary containing TSDF metadata.
+ :param dir_path: path to the directory where the metadata file is stored.
+ :param metadata_file_name: (optional) name of the metadata file.
+ :param do_validate: (optional) flag to validate the metadata.
+ """
+
+ # Copy the attributes from the dictionary to the object
+ for key, value in dictionary.items():
+ setattr(self, key, value)
+ self.file_dir_path = dir_path
+ self.metadata_file_name = metadata_file_name
+
+ # Validate the metadata
+ if do_validate:
+ if not self.validate():
+ raise TSDFMetadataFieldValueError("The provided metadata is invalid.")
+
+
+
+[docs]
+ def validate(self) -> bool:
+ isValid: bool = True
+
+ # Validate presence of mandatory fields
+ dict = self.get_plain_tsdf_dict_copy()
+ isValid = isValid and parse_metadata.contains_tsdf_mandatory_fields(dict)
+
+ # Validate datetimes
+ isValid = isValid and parse_metadata.validate_datetimes(self)
+
+ return isValid
+
+
+
+[docs]
+ def get_plain_tsdf_dict_copy(self) -> Dict[str, Any]:
+ """
+ Method returns the a copy of the dict containing fields needed for the TSDF file.
+
+ :return: a copy of the dict containing fields needed for the TSDF file.
+ """
+ simple_dict = copy.deepcopy(self.__dict__)
+ if simple_dict.get("file_dir_path") is not None:
+ simple_dict.pop("file_dir_path")
+ if simple_dict.get("metadata_file_name") is not None:
+ simple_dict.pop("metadata_file_name")
+ return simple_dict
+
+
+
+[docs]
+ def set_start_datetime(self, date_time: datetime) -> None:
+ """
+ Sets the start date of the recording in ISO8601 format.
+ :param date_time: datetime object containing the start date.
+ """
+ self.start_iso8601 = date_time.isoformat()
+
+
+
+[docs]
+ def get_start_datetime(self) -> datetime:
+ """
+ Returns the start date of the recording as a datetime object.
+ :return: datetime object containing the start date.
+ """
+ return parser.parse(self.start_iso8601)
+
+
+
+[docs]
+ def set_end_datetime(self, date_time: datetime) -> None:
+ """
+ Sets the end date of the recording in ISO8601 format.
+ :param date_time: datetime object containing the end date.
+ """
+ self.end_iso8601 = date_time.isoformat()
+
+
+
+[docs]
+ def get_end_datetime(self) -> datetime:
+ """
+ Returns the end date of the recording as a datetime object.
+ :return: datetime object containing the end date.
+ """
+ return parser.parse(self.end_iso8601)
+
+
+
+import os
+import argparse
+import traceback
+import json
+from tsdf import read_tsdf, read_binary
+
+
+
+[docs]
+def validate_tsdf_format(file_path):
+ try:
+ # Read the meta data (this will check for compulsory fields and such)
+ metadata = read_tsdf.load_metadata_from_path(file_path)
+
+ # Get the absolute path of the file and cut off the file name
+ abs_path = os.path.abspath(file_path)
+ abs_dir = os.path.dirname(abs_path)
+
+ # Loop through all the files in the metadata
+ for file_name, file_metadata in metadata.items():
+ # print the file_metadata as json
+ # print(json.dumps(file_metadata.get_plain_tsdf_dict_copy(), indent=4))
+
+ # Load the binary data
+ binary_data = read_binary.load_ndarray_from_binary(file_metadata)
+
+ # Success message
+ print(
+ f"Successfully loaded binary file {file_name}, resulting shape: {binary_data.shape}"
+ )
+
+ return True
+
+ except Exception as e:
+ print(f"Error while validating: {e}")
+ # traceback.print_exc()
+ return False
+
+
+
+
+[docs]
+def main():
+ # Parse the arguments
+ parser = argparse.ArgumentParser(
+ description="Validate a file content against the TSDF format."
+ )
+ parser.add_argument("file_path", help="Path to the file to validate")
+ args = parser.parse_args()
+
+ # Perform validation
+ is_valid = validate_tsdf_format(args.file_path)
+
+ # Exit with error code 1 if the validation failed
+ exit(0 if is_valid else 1)
+
+
+
+if __name__ == "__main__":
+ main()
+
+"""
+Module for writing binary files associated with TSDF.
+
+Reference: https://arxiv.org/abs/2211.11294
+"""
+
+import os
+from typing import Any, Dict, List
+import numpy as np
+import pandas as pd
+from tsdf import numpy_utils
+
+from tsdf.tsdfmetadata import TSDFMetadata
+
+
+
+[docs]
+def write_dataframe_to_binaries(
+ file_dir: str, df: pd.DataFrame, metadatas: List[TSDFMetadata]
+) -> None:
+ """
+ Save binary file based on the provided pandas DataFrame.
+
+ :param file_dir: path to the directory where the file will be saved.
+ :param df: pandas DataFrame containing the data.
+ :param metadatas: list of metadata objects to be saved, also contains
+ channels to be retrieved from dataframe.
+ """
+ for metadata in metadatas:
+ file_name = metadata.file_name
+ path = os.path.join(file_dir, file_name)
+
+ # Write
+ data = df[metadata.channels].to_numpy() # TODO: derive channels from dataframe or use specified in metadata? Also for file_name?
+ data.tofile(path)
+
+ # Update metadata with data properties
+ data_props = _get_metadata_from_ndarray(data)
+ for key in data_props:
+ metadata.__setattr__(key, data_props[key])
+
+
+
+
+[docs]
+def _get_metadata_from_ndarray(data: np.ndarray) -> Dict[str, Any]:
+ """
+ Retrieve metadata information encoded in the NumPy array.
+
+ :param data: NumPy array containing the data.
+
+ :return: dictionary containing the metadata.
+ """
+
+ metadata = {
+ "data_type": numpy_utils.data_type_numpy_to_tsdf(data),
+ "bits": numpy_utils.bits_numpy_to_tsdf(data),
+ "endianness": numpy_utils.endianness_numpy_to_tsdf(data),
+ "rows": numpy_utils.rows_numpy_to_tsdf(data),
+ }
+ return metadata
+
+
+
+
+[docs]
+def write_binary_file(
+ file_dir: str, file_name: str, data: np.ndarray, metadata: dict
+) -> TSDFMetadata:
+ """
+ Save binary file based on the provided NumPy array.
+
+ :param file_dir: path to the directory where the file will be saved.
+ :param file_name: name of the file to be saved.
+ :param data: NumPy array containing the data.
+ :param metadata: dictionary containing the metadata.
+
+ :return: TSDFMetadata object.
+ """
+ path = os.path.join(file_dir, file_name)
+ data.tofile(path)
+ metadata.update(_get_metadata_from_ndarray(data))
+ metadata.update({"file_name": file_name})
+
+ return TSDFMetadata(metadata, file_dir)
+
+
+"""
+Module for writing TSDF files.
+
+Reference: https://arxiv.org/abs/2211.11294
+"""
+
+from typing import Any, Dict, List
+from tsdf import file_utils
+from tsdf import parse_metadata
+from tsdf.tsdfmetadata import TSDFMetadata, TSDFMetadataFieldValueError
+
+
+
+[docs]
+def write_metadata(metadatas: List[TSDFMetadata], file_name: str) -> None:
+ """
+ Combine and save the TSDF metadata objects as a json file.
+
+ :param metadatas: List of TSDFMetadata objects to be saved.
+ :param file_name: Name of the file to be saved. The file will be saved in the directory of the first TSDFMetadata object in the list.
+
+ :raises TSDFMetadataFieldValueError: if the metadata files cannot be combined (e.g. they have no common fields) or if the list of TSDFMetadata objects is empty.
+ """
+ for meta in metadatas:
+ meta.validate()
+
+ if len(metadatas) == 0:
+ raise TSDFMetadataFieldValueError(
+ "Metadata cannot be saved, as the list of TSDFMetadata objects is empty."
+ )
+
+ if len(metadatas) == 1:
+ meta = metadatas[0]
+ file_utils.write_to_file(
+ meta.get_plain_tsdf_dict_copy(), meta.file_dir_path, file_name
+ )
+ return
+
+ # Ensure that the metadata files can be combined
+ parse_metadata.confirm_dir_of_metadata(metadatas)
+
+ plain_meta = [meta.get_plain_tsdf_dict_copy() for meta in metadatas]
+ overlap = _extract_common_fields(plain_meta)
+ if not overlap:
+ raise TSDFMetadataFieldValueError(
+ "Metadata files mist have at least one common field. Otherwise, they should be stored separately."
+ )
+
+ if len(plain_meta) > 0:
+ overlap["sensors"] = _calculate_overlaps_rec(plain_meta)
+ file_utils.write_to_file(overlap, metadatas[0].file_dir_path, file_name)
+
+
+
+
+[docs]
+def _extract_common_fields(metadatas: List[Dict[str, Any]]) -> Dict[str, Any]:
+ """
+ Extract the fields that are the same for all the metadata files.
+ A new dict is created and the fields are removed from the original dictionaries.
+
+ :param metadatas: List of dictionaries containing the metadata.
+
+ :return: Dictionary containing the common fields.
+ """
+ meta_overlap: dict = {}
+
+ # Return empty dict if metadatas is empty
+ if len(metadatas) == 0:
+ return meta_overlap
+ if len(metadatas) == 1:
+ return metadatas.pop(0)
+ init_metadata = metadatas[0]
+ for key, value in init_metadata.items():
+ key_in_all = True
+ for curr_meta in metadatas[1:]:
+ if key not in curr_meta.keys() or curr_meta[key] != value:
+ key_in_all = False
+ if key_in_all:
+ meta_overlap[key] = value
+ for key, _ in meta_overlap.items():
+ for meta_dict in metadatas:
+ meta_dict.pop(key)
+ return meta_overlap
+
+
+
+
+[docs]
+def _calculate_overlaps_rec(metadatas: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """
+ A recursive call that optimises the structure of the TSDF metadata, by grouping common values. For the input the list of dictionaries
+ corresponds to a list of "flat" metadata dictionaries. The output is a list of dictionaries (potentially of length 1) that contain
+ the metadata in a tree structure. The tree structure is created by grouping the common values in the metadata.
+ The grouping is done recursively, until no more grouping is possible.
+
+ :param metadatas: List of dictionaries containing the metadata.
+
+ :return: List of dictionaries containing the metadata in a tree structure.
+ """
+
+ if len(metadatas) == 0:
+ return []
+ if len(metadatas) == 1:
+ return metadatas
+
+ overlap_per_key: Dict[str, List[dict]] = {} # Overlap for each key
+ final_metadata: List[dict] = [] # The metadata that is left to be processed
+
+ for key in _get_all_keys(metadatas):
+ overlap_per_key[key] = calculate_max_overlap(metadatas, key)
+
+ max_key = max_len_key(overlap_per_key)
+
+ first_group = overlap_per_key[max_key]
+ second_grop = [meta for meta in metadatas if meta not in first_group]
+
+ # Handle the first group
+ first_overlap = _extract_common_fields(first_group)
+ if len(first_group) > 0:
+ first_overlap["sensors"] = _calculate_overlaps_rec(first_group)
+ final_metadata.append(first_overlap)
+
+ # Handle the rest of the elements
+ second_overlap = _calculate_overlaps_rec(second_grop)
+ final_metadata.extend(second_overlap)
+
+ return final_metadata
+
+
+
+
+[docs]
+def _get_all_keys(metadatas: List[Dict[str, Any]]) -> List[str]:
+ """
+ Get all the keys from the metadata files.
+
+ :param metadatas: List of dictionaries containing the metadata.
+
+ :return: List of keys.
+ """
+ keys: List[str] = []
+ for meta in metadatas:
+ keys.extend(meta.keys())
+ return list(set(keys))
+
+
+
+
+[docs]
+def calculate_max_overlap(
+ meta_files: List[Dict[str, Any]], meta_key: str
+) -> List[Dict[str, Any]]:
+ """
+ Calculate the maximum overlap between the metadata files, for a specific key.
+ It returns the biggest group of dictionaries that contain the same value for the given meta_key.
+
+ :param meta_files: List of dictionaries containing the metadata.
+ :param meta_key: The key for which the overlap is calculated.
+
+ :return: List of dictionaries containing the metadata.
+ """
+ values: Dict[
+ str, List[Dict[str, Any]]
+ ] = (
+ {}
+ ) # Key: a value for the given meta_key, Value: list of metadata files that have that value
+ for meta in meta_files:
+ if meta_key in meta.keys():
+ curr_value = str(meta[meta_key])
+ if curr_value not in values.keys():
+ values[curr_value] = [meta]
+ else:
+ values[curr_value].append(meta)
+
+ max_key = max_len_key(values)
+ return values[max_key]
+
+
+
+
+[docs]
+def max_len_key(elements: Dict[str, List[Dict[str, Any]]]) -> str:
+ """
+ Return the key in a dictionary that has the longest list as a value.
+
+ :param elements: Dictionary containing the elements.
+
+ :return: The key that has the longest list as a value.
+ """
+ return max(elements, key=lambda x: len(elements[x]))
+
+
\n", + " | time | \n", + "acceleration_x | \n", + "acceleration_y | \n", + "acceleration_z | \n", + "rotation_x | \n", + "rotation_y | \n", + "rotation_z | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "0.374540 | \n", + "6 | \n", + "1 | \n", + "3 | \n", + "2 | \n", + "5 | \n", + "3 | \n", + "
1 | \n", + "0.950714 | \n", + "7 | \n", + "9 | \n", + "1 | \n", + "4 | \n", + "6 | \n", + "0 | \n", + "
2 | \n", + "0.731994 | \n", + "9 | \n", + "2 | \n", + "2 | \n", + "6 | \n", + "7 | \n", + "1 | \n", + "
3 | \n", + "0.598659 | \n", + "7 | \n", + "4 | \n", + "3 | \n", + "2 | \n", + "7 | \n", + "2 | \n", + "
4 | \n", + "0.156019 | \n", + "7 | \n", + "4 | \n", + "2 | \n", + "0 | \n", + "5 | \n", + "5 | \n", + "
\n", + " | time | \n", + "acceleration_x | \n", + "acceleration_y | \n", + "acceleration_z | \n", + "rotation_x | \n", + "rotation_y | \n", + "rotation_z | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "0.374540 | \n", + "0.028163 | \n", + "0.004694 | \n", + "0.014081 | \n", + "0.121951 | \n", + "0.304878 | \n", + "0.182927 | \n", + "
1 | \n", + "0.950714 | \n", + "0.032856 | \n", + "0.042244 | \n", + "0.004694 | \n", + "0.243902 | \n", + "0.365854 | \n", + "0.000000 | \n", + "
2 | \n", + "0.731994 | \n", + "0.042244 | \n", + "0.009388 | \n", + "0.009388 | \n", + "0.365854 | \n", + "0.426829 | \n", + "0.060976 | \n", + "
3 | \n", + "0.598659 | \n", + "0.032856 | \n", + "0.018775 | \n", + "0.014081 | \n", + "0.121951 | \n", + "0.426829 | \n", + "0.121951 | \n", + "
4 | \n", + "0.156019 | \n", + "0.032856 | \n", + "0.018775 | \n", + "0.009388 | \n", + "0.000000 | \n", + "0.304878 | \n", + "0.304878 | \n", + "
' + + '' + + _("Hide Search Matches") + + "
" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/autoapi/tsdf/constants/index.html b/autoapi/tsdf/constants/index.html new file mode 100644 index 0000000..6a039d0 --- /dev/null +++ b/autoapi/tsdf/constants/index.html @@ -0,0 +1,222 @@ + + + + + + ++ | List of currently supported versions. |
+
+ | Dictionary linking mandatory keys for different versions |
+
+ | Dictionary linking mandatory keys to their values |
+
+ | List of data types that are supported within the TSDF metadata file. |
+
+ | Naming convention for the metadata files. ** allows for any prefix, including additional directories. |
+
+ | + |
List of currently supported versions.
+Dictionary linking mandatory keys for different versions
+Dictionary linking mandatory keys to their values
+List of data types that are supported within the TSDF metadata file.
+Naming convention for the metadata files. ** allows for any prefix, including additional directories.
+
|
+Get all files matching the criteria in the directory and its subdirectories. |
+
|
+Write a dictionary to a json file. |
+
Get all files matching the criteria in the directory and its subdirectories.
+directory – directory to search in.
criteria – criteria to match (e.g., **meta.json).
list of files matching the criteria.
+Write a dictionary to a json file.
+dict – Dictionary to be written.
dir_path – Path to the directory where the file will be saved.
file_name – Name of the file to be saved.
+ | Structure that provides metadata needed for reading a data stream. |
+
|
+Loads a TSDF metadata file, returns a dictionary |
+
|
+Loads a TSDF metadata file, returns a dictionary |
+
|
+Loads all TSDF metadata files in a directory, returns a dictionary |
+
|
+Loads a TSDF metadata string, returns a dictionary. |
+
|
+Loads a TSDB metadata file, i.e., legacy format of the TSDF. It returns a dictionary representing the metadata. |
+
|
+Combine and save the TSDF metadata objects as a json file. |
+
|
+Save binary file based on the provided NumPy array. |
+
|
+Save binary file based on the provided pandas DataFrame. |
+
|
+Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved). |
+
|
+Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none). |
+
Loads a TSDF metadata file, returns a dictionary
+file – file object containing the TSDF metadata.
+dictionary of TSDFMetadata objects.
+Loads a TSDF metadata file, returns a dictionary
+path – path to the TSDF metadata file.
+dictionary of TSDFMetadata objects.
+Loads all TSDF metadata files in a directory, returns a dictionary
+dir_path – path to the directory containing the TSDF metadata files.
naming_pattern – (optional) naming pattern of the TSDF metadata files .
dictionary of TSDFMetadata objects.
+Loads a TSDF metadata string, returns a dictionary.
+json_str – string containing the TSDF metadata.
+dictionary of TSDFMetadata objects.
+Loads a TSDB metadata file, i.e., legacy format of the TSDF. It returns a dictionary representing the metadata.
+file – file object containing the TSDF metadata.
+dictionary of TSDFMetadata objects.
+Combine and save the TSDF metadata objects as a json file.
+metadatas – List of TSDFMetadata objects to be saved.
file_name – Name of the file to be saved. The file will be saved in the directory of the first TSDFMetadata object in the list.
TSDFMetadataFieldValueError – if the metadata files cannot be combined (e.g. they have no common fields) or if the list of TSDFMetadata objects is empty.
+Save binary file based on the provided NumPy array.
+file_dir – path to the directory where the file will be saved.
file_name – name of the file to be saved.
data – NumPy array containing the data.
metadata – dictionary containing the metadata.
TSDFMetadata object.
+Save binary file based on the provided pandas DataFrame.
+file_dir – path to the directory where the file will be saved.
df – pandas DataFrame containing the data.
metadatas – list of metadata objects to be saved, also contains +channels to be retrieved from dataframe.
Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved).
+metadata – TSDFMetadata object.
start_row – (optional) first row to load.
end_row – (optional) last row to load. If -1, load all rows.
numpy array containing the data.
+Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
+metadatas – list of TSDFMetadata objects.
concatenation – concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
pandas DataFrame containing the combined data.
+Structure that provides metadata needed for reading a data stream.
+Version of the TSDF metadata file.
+Study ID.
+Subject ID.
+Device ID.
+Start time of the recording in ISO8601 format.
+End time of the recording in ISO8601 format.
+Name of the binary file containing the data.
+Number of rows in the binary file.
+List of channels in the binary file.
+List of units for each channel in the binary file.
+Data type of the binary file.
+Number of bits per sample in the binary file.
+Endianness of the binary file.
+A reference to the directory path, so we don’t need it again when reading associated binary files.
+A reference to the source path, so we don’t need it again when reading associated binary files.
+Method returns the a copy of the dict containing fields needed for the TSDF file.
+a copy of the dict containing fields needed for the TSDF file.
+Sets the start date of the recording in ISO8601 format. +:param date_time: datetime object containing the start date.
+Returns the start date of the recording as a datetime object. +:return: datetime object containing the start date.
++ | + |
+ | + |
|
+This function renames the keys in a metadata file. |
+
|
+This function converts the value of a specified key in a dictionary to an array if it's not already an array. |
+
|
+Converts a data from TSDB (legacy) to TSDF (0.1) format. |
+
|
+This function creates a metadata file (JSON) file in TSDF (0.1) format from a TSDB (legacy) file. |
+
|
+This function converts a metadata file (JSON) from TSDB (legacy) to TSDF (0.1) format. It overwrites the original file. |
+
|
+This function converts all metadata files in a directory (and its subdirectories) from TSDB (legacy) to TSDF (0.1) format. |
+
This function renames the keys in a metadata file. +If a key in the metadata matches a key in the provided dictionary, it is renamed to the corresponding value in the dictionary. +It handles nested dictionaries and lists of dictionaries.
+old_dict – The metadata file (dictionary) with keys to rename
+The updated metadata file (dictionary)
+This function converts the value of a specified key in a dictionary to an array if it’s not already an array. +It handles nested dictionaries and lists of dictionaries.
+data – The dictionary with a value to convert
key – The key in the dictionary whose value to convert
The updated dictionary
+Converts a data from TSDB (legacy) to TSDF (0.1) format.
+data – The data in legacy (tsdb) format.
+The data in tsdf format.
+This function creates a metadata file (JSON) file in TSDF (0.1) format from a TSDB (legacy) file.
+filepath_existing – The path to the JSON file to process
filepath_new – The path to the new JSON file
This function converts a metadata file (JSON) from TSDB (legacy) to TSDF (0.1) format. It overwrites the original file.
+filepath – The path to the JSON file to process
+This function converts all metadata files in a directory (and its subdirectories) from TSDB (legacy) to TSDF (0.1) format. +It walks through all files in a directory (and its subdirectories), +and processes all files with a .json extension.
+directory – The directory to process files in
++ | Mapping of NumPy data types to their TSDF metadata annotations. |
+
+ | Mapping of data types that are supported by TSDF to |
+
+ | Supported endianness values. |
+
+ | Supported endianness values. |
+
|
+Compute the TSDF metadata 'data_type' value, based on the NumPy data. |
+
|
+Compute the the NumPy data type, based on the TSDF metadata 'data_type' value. |
+
|
+Compute TSDF metadata 'n_bits' value, based on the NumPy data. |
+
|
+Compute the the NumPy byte number, based on the TSDF metadata 'n_bits' value. |
+
|
+Compute TSDF metadata 'data_type' value, based on the NumPy data. |
+
|
+Compute TSDF metadata 'data_type' value, based on the NumPy data. |
+
|
+Compute TSDF metadata 'rows' value, based on the NumPy data. |
+
Mapping of NumPy data types to their TSDF metadata annotations.
+Compute the TSDF metadata ‘data_type’ value, based on the NumPy data.
+Mapping of data types that are supported by TSDF to +their NumPy representation used for parsing.
+Compute the the NumPy data type, based on the TSDF metadata ‘data_type’ value.
+data_type – TSDF metadata ‘data_type’ value.
+NumPy data type (as a char).
+Compute TSDF metadata ‘n_bits’ value, based on the NumPy data.
+data – NumPy data.
+TSDF metadata ‘n_bits’ value.
+Compute the the NumPy byte number, based on the TSDF metadata ‘n_bits’ value.
+n_bits – TSDF metadata ‘n_bits’ value.
+NumPy byte number.
+Supported endianness values.
+Compute TSDF metadata ‘data_type’ value, based on the NumPy data.
+data – NumPy data.
+TSDF metadata ‘data_type’ value (as a string).
+Supported endianness values.
+Module for parsing TSDF metadata files.
+Reference: https://arxiv.org/abs/2211.11294
+
|
+Function used to parse the JSON object containing TSDF metadata. It returns a |
+
|
+Recursive method used to parse the TSDF metadata in a hierarchical |
+
|
+Function returns True if the field that corresponds to the |
+
|
+Function return True if the data contains the "file_name" key, |
+
|
+Verifies that all the mandatory properties for TSDF metadata are provided, |
+
|
+Function checks whether the value of the mandatory TSDF field specified by the key |
+
+ | Returns the metadata object at the position defined by the index. |
+
|
+The method is used to confirm whether all the metadata files are expected in the same directory. |
+
|
+Checks if the given date string is in ISO8601 format. |
+
|
+Validates the start and end date format of the TSDFMetaData object. |
+
Function used to parse the JSON object containing TSDF metadata. It returns a +list of TSDFMetadata objects, where each object describes formatting of a binary file.
+data – JSON object containing TSDF metadata.
source_path – path to the metadata file.
list of TSDFMetadata objects.
+tsdf_metadata.TSDFMetadataFieldValueError – if the TSDF metadata file is missing a mandatory field.
+Recursive method used to parse the TSDF metadata in a hierarchical +order (from the root towards the leaves).
+data – JSON object containing TSDF metadata.
defined_properties – dictionary containing all the properties defined at the current level of the TSDF structure.
source_path – path to the metadata file.
version – version of the TSDF used within the file.
list of TSDFMetadata objects.
+tsdf_metadata.TSDFMetadataFieldError – if the TSDF metadata file is missing a mandatory field.
+Function returns True if the field that corresponds to the +key is mandatory for the given TSDF version, otherwise it returns False.
+key – key of the TSDF metadata field.
version – version of the TSDF used within the file.
True if the field is mandatory, otherwise False.
+Function return True if the data contains the “file_name” key, +and thus, represents nested data elements. +Otherwise it returns False.
+data – data to be checked.
+True if the data contains the “file_name” key, otherwise False.
+Verifies that all the mandatory properties for TSDF metadata are provided, +and are in the right format.
+dictionary – dictionary containing TSDF metadata.
+True if the metadata is well formatted.
+tsdf_metadata.TSDFMetadataFieldError – if the TSDF metadata file is missing a mandatory field.
tsdf_metadata.TSDFMetadataFieldValueError – if the TSDF metadata file contains an invalid value.
Function checks whether the value of the mandatory TSDF field specified by the key +is of the expected data format.Note: If the key is not mandatory the function does not perform any checks.
+key – key of the TSDF metadata field.
value – value of the TSDF metadata field.
version – version of the TSDF used within the file.
tsdf_metadata.TSDFMetadataFieldValueError – if the TSDF metadata file contains an invalid value.
+Returns the metadata object at the position defined by the index.
+metadata – dictionary containing TSDF metadata.
index – index of the metadata object to be returned.
metadata object at the position defined by the index.
+IndexError – if the index is out of range.
+The method is used to confirm whether all the metadata files are expected in the same directory.
+metadatas – list of metadata objects.
+tsdf_metadata.TSDFMetadataFieldValueError – if the metadata files are not in the same directory or describe the same binaries.
+Checks if the given date string is in ISO8601 format.
+date_string – date string to be checked.
+Validates the start and end date format of the TSDFMetaData object.
+Module for reading and writing binary files associated with TSDF.
+Reference: https://arxiv.org/abs/2211.11294
+
|
+Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none). |
+
|
+Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved). |
+
|
+Use provided parameters to load and return a numpy array from a binary file. |
+
Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
+metadatas – list of TSDFMetadata objects.
concatenation – concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
pandas DataFrame containing the combined data.
+Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved).
+metadata – TSDFMetadata object.
start_row – (optional) first row to load.
end_row – (optional) last row to load. If -1, load all rows.
numpy array containing the data.
+Use provided parameters to load and return a numpy array from a binary file.
+bin_file_path – path to the binary file.
data_type – data type of the binary file.
n_bits – number of bits per value.
endianness – endianness of the binary file.
n_rows – number of rows in the binary file.
n_columns – number of columns in the binary file.
start_row – (optional) first row to load.
end_row – (optional) last row to load. If -1, load all rows.
numpy array containing the data.
+Module for reading TSDF files.
+Reference: https://arxiv.org/abs/2211.11294
+
|
+Loads a TSDF metadata file, returns a dictionary |
+
|
+Loads a TSDB metadata file, i.e., legacy format of the TSDF. It returns a dictionary representing the metadata. |
+
|
+Loads all TSDF metadata files in a directory, returns a dictionary |
+
|
+Loads a TSDF metadata file, returns a dictionary |
+
|
+Loads a TSDF metadata string, returns a dictionary. |
+
Loads a TSDF metadata file, returns a dictionary
+file – file object containing the TSDF metadata.
+dictionary of TSDFMetadata objects.
+Loads a TSDB metadata file, i.e., legacy format of the TSDF. It returns a dictionary representing the metadata.
+file – file object containing the TSDF metadata.
+dictionary of TSDFMetadata objects.
+Loads all TSDF metadata files in a directory, returns a dictionary
+dir_path – path to the directory containing the TSDF metadata files.
naming_pattern – (optional) naming pattern of the TSDF metadata files .
dictionary of TSDFMetadata objects.
+Loads a TSDF metadata file, returns a dictionary
+path – path to the TSDF metadata file.
+dictionary of TSDFMetadata objects.
+Loads a TSDF metadata string, returns a dictionary.
+json_str – string containing the TSDF metadata.
+dictionary of TSDFMetadata objects.
++ | Raised when the TSDFMetadata is missing an obligatory field. |
+
+ | Raised when a TSDFMetadata field is wrongly annotated. |
+
+ | Structure that provides metadata needed for reading a data stream. |
+
Bases: Exception
Raised when the TSDFMetadata is missing an obligatory field.
+ + +Bases: Exception
Raised when a TSDFMetadata field is wrongly annotated.
+Structure that provides metadata needed for reading a data stream.
+Version of the TSDF metadata file.
+Study ID.
+Subject ID.
+Device ID.
+Start time of the recording in ISO8601 format.
+End time of the recording in ISO8601 format.
+Name of the binary file containing the data.
+Number of rows in the binary file.
+List of channels in the binary file.
+List of units for each channel in the binary file.
+Data type of the binary file.
+Number of bits per sample in the binary file.
+Endianness of the binary file.
+A reference to the directory path, so we don’t need it again when reading associated binary files.
+A reference to the source path, so we don’t need it again when reading associated binary files.
+Method returns the a copy of the dict containing fields needed for the TSDF file.
+a copy of the dict containing fields needed for the TSDF file.
+Sets the start date of the recording in ISO8601 format. +:param date_time: datetime object containing the start date.
+Returns the start date of the recording as a datetime object. +:return: datetime object containing the start date.
+Module for writing binary files associated with TSDF.
+Reference: https://arxiv.org/abs/2211.11294
+
|
+Save binary file based on the provided pandas DataFrame. |
+
|
+Retrieve metadata information encoded in the NumPy array. |
+
|
+Save binary file based on the provided NumPy array. |
+
Save binary file based on the provided pandas DataFrame.
+file_dir – path to the directory where the file will be saved.
df – pandas DataFrame containing the data.
metadatas – list of metadata objects to be saved, also contains +channels to be retrieved from dataframe.
Retrieve metadata information encoded in the NumPy array.
+data – NumPy array containing the data.
+dictionary containing the metadata.
+Save binary file based on the provided NumPy array.
+file_dir – path to the directory where the file will be saved.
file_name – name of the file to be saved.
data – NumPy array containing the data.
metadata – dictionary containing the metadata.
TSDFMetadata object.
+Module for writing TSDF files.
+Reference: https://arxiv.org/abs/2211.11294
+
|
+Combine and save the TSDF metadata objects as a json file. |
+
|
+Extract the fields that are the same for all the metadata files. |
+
|
+A recursive call that optimises the structure of the TSDF metadata, by grouping common values. For the input the list of dictionaries |
+
|
+Get all the keys from the metadata files. |
+
|
+Calculate the maximum overlap between the metadata files, for a specific key. |
+
|
+Return the key in a dictionary that has the longest list as a value. |
+
Combine and save the TSDF metadata objects as a json file.
+metadatas – List of TSDFMetadata objects to be saved.
file_name – Name of the file to be saved. The file will be saved in the directory of the first TSDFMetadata object in the list.
TSDFMetadataFieldValueError – if the metadata files cannot be combined (e.g. they have no common fields) or if the list of TSDFMetadata objects is empty.
+Extract the fields that are the same for all the metadata files. +A new dict is created and the fields are removed from the original dictionaries.
+metadatas – List of dictionaries containing the metadata.
+Dictionary containing the common fields.
+A recursive call that optimises the structure of the TSDF metadata, by grouping common values. For the input the list of dictionaries +corresponds to a list of “flat” metadata dictionaries. The output is a list of dictionaries (potentially of length 1) that contain +the metadata in a tree structure. The tree structure is created by grouping the common values in the metadata. +The grouping is done recursively, until no more grouping is possible.
+metadatas – List of dictionaries containing the metadata.
+List of dictionaries containing the metadata in a tree structure.
+Get all the keys from the metadata files.
+metadatas – List of dictionaries containing the metadata.
+List of keys.
+Calculate the maximum overlap between the metadata files, for a specific key. +It returns the biggest group of dictionaries that contain the same value for the given meta_key.
+meta_files – List of dictionaries containing the metadata.
meta_key – The key for which the overlap is calculated.
List of dictionaries containing the metadata.
+These are some examples on how to read and write TSDF data into and from a numpy array, using the tsdf
library.
import tsdf
+
# The file names
+metadata_path = "data/example_meta.json"
+binary_filename = "example_binary.bin"
+
+# Multiple metadata files (one for each binary) are loaded into a dictionary
+# mapping the binary file name to the metadata object
+metadata_dict = tsdf.load_metadata_from_path(metadata_path)
+
+# Retrieve the metadata object we want, using the name of the binary as key
+metadata = metadata_dict[binary_filename]
+
+# Load the data
+data = tsdf.load_ndarray_from_binary(metadata)
+
+# Print some info
+print(f"Data type:\t {data.dtype}")
+print(f"Data shape:\t {data.shape}")
+
Data type: int16
+Data shape: (10, 3)
+
# Perform an operation, resulting in a different data type
+processed_data_1 = (data / 10).astype('float32')
+
+# Print some info
+print(f"Processed data type:\t {processed_data_1.dtype}")
+print(f"Data shape:\t\t {processed_data_1.shape}")
+
Processed data type: float32
+Data shape: (10, 3)
+
Write the processed data in binary format. The call returns the corresponding metadata object.
+# The new name of the file
+output_bin_filename = "tmp_example_processed.bin"
+
+# Write the data to a new binary file
+processed_metadata_1 = tsdf.write_binary_file(
+ "data",
+ output_bin_filename,
+ processed_data_1,
+ metadata.get_plain_tsdf_dict_copy(),
+ )
+
+print(f"File written to data/{output_bin_filename}")
+
File written to data/tmp_example_processed.bin
+
# Write new metadata file
+output_meta_filename = "tmp_example_processed_meta.json"
+tsdf.write_metadata([processed_metadata_1], output_meta_filename)
+print(f"File written to data/{output_meta_filename}")
+
File written to data/tmp_example_processed_meta.json
+
# Preprocess the original data to generate another data source
+processed_data_2 = (data * 1000).astype("int32")
+
+# Adjust the metadata slightly
+updated_metadata = metadata.get_plain_tsdf_dict_copy()
+updated_metadata.pop("scale_factors") # remove the 'scale_factors'
+
+# Save the new binary file
+output_bin_filename_2 = "tmp_example_processed_2.bin"
+processed_metadata_2 = tsdf.write_binary_file(
+ "data",
+ output_bin_filename_2,
+ processed_data_2,
+ updated_metadata,
+)
+print(f"File written to data/{output_bin_filename_2}")
+
+# Write a metadata file that combines the two binary files
+output_meta_filename_2 = "tmp_example_processed_2_meta.json"
+tsdf.write_metadata([processed_metadata_1, processed_metadata_2],
+ output_meta_filename_2)
+print(f"File written to data/{output_meta_filename_2}")
+
File written to data/tmp_example_processed_2.bin
+File written to data/tmp_example_processed_2_meta.json
+
These are some examples on how to read and write TSDF data into and from a numpy array, using the tsdf
library.
import tsdf
-
# The file names
-metadata_path = "data/example_meta.json"
-binary_filename = "example_binary.bin"
-
-# Multiple metadata files (one for each binary) are loaded into a dictionary
-# mapping the binary file name to the metadata object
-metadata_dict = tsdf.load_metadata_from_path(metadata_path)
-
-# Retrieve the metadata object we want, using the name of the binary as key
-metadata = metadata_dict[binary_filename]
-
-# Load the data
-data = tsdf.load_ndarray_from_binary(metadata)
-
-# Print some info
-print(f"Data type:\t {data.dtype}")
-print(f"Data shape:\t {data.shape}")
-
Data type: int16 -Data shape: (10, 3) --
# Perform an operation, resulting in a different data type
-processed_data_1 = (data / 10).astype('float32')
-
-# Print some info
-print(f"Processed data type:\t {processed_data_1.dtype}")
-print(f"Data shape:\t\t {processed_data_1.shape}")
-
Processed data type: float32 -Data shape: (10, 3) --
Write the processed data in binary format. The call returns the corresponding metadata object.
- -# The new name of the file
-output_bin_filename = "tmp_example_processed.bin"
-
-# Write the data to a new binary file
-processed_metadata_1 = tsdf.write_binary_file(
- "data",
- output_bin_filename,
- processed_data_1,
- metadata.get_plain_tsdf_dict_copy(),
- )
-
-print(f"File written to data/{output_bin_filename}")
-
File written to data/tmp_example_processed.bin --
# Write new metadata file
-output_meta_filename = "tmp_example_processed_meta.json"
-tsdf.write_metadata([processed_metadata_1], output_meta_filename)
-print(f"File written to data/{output_meta_filename}")
-
File written to data/tmp_example_processed_meta.json --
# Preprocess the original data to generate another data source
-processed_data_2 = (data * 1000).astype("int32")
-
-# Adjust the metadata slightly
-updated_metadata = metadata.get_plain_tsdf_dict_copy()
-updated_metadata.pop("scale_factors") # remove the 'scale_factors'
-
-# Save the new binary file
-output_bin_filename_2 = "tmp_example_processed_2.bin"
-processed_metadata_2 = tsdf.write_binary_file(
- "data",
- output_bin_filename_2,
- processed_data_2,
- updated_metadata,
-)
-print(f"File written to data/{output_bin_filename_2}")
-
-# Write a metadata file that combines the two binary files
-output_meta_filename_2 = "tmp_example_processed_2_meta.json"
-tsdf.write_metadata([processed_metadata_1, processed_metadata_2],
- output_meta_filename_2)
-print(f"File written to data/{output_meta_filename_2}")
-
File written to data/tmp_example_processed_2.bin -File written to data/tmp_example_processed_2_meta.json --
These are some examples on how to read and write TSDF data into and from a numpy array, using the tsdf
library.
import tsdf
+
# Load the metadata
+metadata_path = "data/ppp_format_meta.json"
+metadata_dict = tsdf.load_metadata_from_path(metadata_path)
+metadata_time = metadata_dict["ppp_format_time.bin"]
+metadata_samples = metadata_dict["ppp_format_samples.bin"]
+
+# Load the data
+df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)
+
+# Print some info
+print(f"Data shape:\t {df.shape}")
+df.head()
+
Data shape: (17, 7)
+
+ | time | +acceleration_x | +acceleration_y | +acceleration_z | +rotation_x | +rotation_y | +rotation_z | +
---|---|---|---|---|---|---|---|
0 | +0.374540 | +6 | +1 | +3 | +2 | +5 | +3 | +
1 | +0.950714 | +7 | +9 | +1 | +4 | +6 | +0 | +
2 | +0.731994 | +9 | +2 | +2 | +6 | +7 | +1 | +
3 | +0.598659 | +7 | +4 | +3 | +2 | +7 | +2 | +
4 | +0.156019 | +7 | +4 | +2 | +0 | +5 | +5 | +
scale_factors = getattr(metadata_samples, "scale_factors")
+print(f"Scale factors: {scale_factors}")
+
+# Perform scaling
+df_scaled = df.copy()
+df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
+
+# Print some info
+print(f"Data shape:\t\t {df.shape}")
+df_scaled.head()
+
Scale factors: [0.00469378, 0.00469378, 0.00469378, 0.06097561, 0.06097561, 0.06097561]
+Data shape: (17, 7)
+
/tmp/ipykernel_2035/4007094660.py:6: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0 0.028163
+1 0.032856
+2 0.042244
+3 0.032856
+4 0.032856
+5 0.018775
+6 0.023469
+7 0.000000
+8 0.028163
+9 0.009388
+10 0.042244
+11 0.042244
+12 0.037550
+13 0.037550
+14 0.014081
+15 0.037550
+16 0.042244
+Name: acceleration_x, dtype: float64' has dtype incompatible with int16, please explicitly cast to a compatible dtype first.
+ df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
+/tmp/ipykernel_2035/4007094660.py:6: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0 0.004694
+1 0.042244
+2 0.009388
+3 0.018775
+4 0.018775
+5 0.000000
+6 0.004694
+7 0.014081
+8 0.014081
+9 0.042244
+10 0.028163
+11 0.028163
+12 0.009388
+13 0.028163
+14 0.028163
+15 0.018775
+16 0.037550
+Name: acceleration_y, dtype: float64' has dtype incompatible with int16, please explicitly cast to a compatible dtype first.
+ df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
+/tmp/ipykernel_2035/4007094660.py:6: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0 0.014081
+1 0.004694
+2 0.009388
+3 0.014081
+4 0.009388
+5 0.004694
+6 0.014081
+7 0.009388
+8 0.037550
+9 0.023469
+10 0.004694
+11 0.018775
+12 0.018775
+13 0.037550
+14 0.032856
+15 0.014081
+16 0.042244
+Name: acceleration_z, dtype: float64' has dtype incompatible with int16, please explicitly cast to a compatible dtype first.
+ df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
+/tmp/ipykernel_2035/4007094660.py:6: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0 0.121951
+1 0.243902
+2 0.365854
+3 0.121951
+4 0.000000
+5 0.426829
+6 0.060976
+7 0.548780
+8 0.426829
+9 0.121951
+10 0.060976
+11 0.182927
+12 0.243902
+13 0.243902
+14 0.060976
+15 0.182927
+16 0.365854
+Name: rotation_x, dtype: float64' has dtype incompatible with int16, please explicitly cast to a compatible dtype first.
+ df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
+/tmp/ipykernel_2035/4007094660.py:6: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0 0.304878
+1 0.365854
+2 0.426829
+3 0.426829
+4 0.304878
+5 0.243902
+6 0.243902
+7 0.304878
+8 0.000000
+9 0.548780
+10 0.060976
+11 0.304878
+12 0.304878
+13 0.426829
+14 0.304878
+15 0.060976
+16 0.243902
+Name: rotation_y, dtype: float64' has dtype incompatible with int16, please explicitly cast to a compatible dtype first.
+ df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
+/tmp/ipykernel_2035/4007094660.py:6: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0 0.182927
+1 0.000000
+2 0.060976
+3 0.121951
+4 0.304878
+5 0.243902
+6 0.182927
+7 0.304878
+8 0.426829
+9 0.487805
+10 0.121951
+11 0.487805
+12 0.121951
+13 0.487805
+14 0.182927
+15 0.182927
+16 0.060976
+Name: rotation_z, dtype: float64' has dtype incompatible with int16, please explicitly cast to a compatible dtype first.
+ df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
+
+ | time | +acceleration_x | +acceleration_y | +acceleration_z | +rotation_x | +rotation_y | +rotation_z | +
---|---|---|---|---|---|---|---|
0 | +0.374540 | +0.028163 | +0.004694 | +0.014081 | +0.121951 | +0.304878 | +0.182927 | +
1 | +0.950714 | +0.032856 | +0.042244 | +0.004694 | +0.243902 | +0.365854 | +0.000000 | +
2 | +0.731994 | +0.042244 | +0.009388 | +0.009388 | +0.365854 | +0.426829 | +0.060976 | +
3 | +0.598659 | +0.032856 | +0.018775 | +0.014081 | +0.121951 | +0.426829 | +0.121951 | +
4 | +0.156019 | +0.032856 | +0.018775 | +0.009388 | +0.000000 | +0.304878 | +0.304878 | +
Write the processed data in binary format. The call updates the metadata object with the data attributes of the dataframe.
+# The new name of the file
+output_bin_filename = "tmp_pandas_example_processed.bin"
+metadata_samples.__setattr__("file_name", output_bin_filename)
+
+# Write the data to a new binary file
+tsdf.write_dataframe_to_binaries("data", df_scaled, [metadata_time, metadata_samples])
+
+print(f"File written to data/{output_bin_filename}")
+
File written to data/tmp_pandas_example_processed.bin
+
# Write new metadata file
+output_meta_filename = "tmp_pandas_example_processed_meta.json"
+tsdf.write_metadata([metadata_time, metadata_samples], output_meta_filename)
+print(f"File written to data/{output_meta_filename}")
+
File written to data/tmp_pandas_example_processed_meta.json
+
These are some examples on how to read and write TSDF data into and from a numpy array, using the tsdf
library.
import tsdf
-
# Load the metadata
-metadata_path = "data/ppp_format_meta.json"
-metadata_dict = tsdf.load_metadata_from_path(metadata_path)
-metadata_time = metadata_dict["ppp_format_time.bin"]
-metadata_samples = metadata_dict["ppp_format_samples.bin"]
-
-# Load the data
-df = tsdf.load_binaries_to_dataframe([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)
-
-# Print some info
-print(f"Data shape:\t {df.shape}")
-df.head()
-
Data shape: (17, 7) --
- | time | -acceleration_x | -acceleration_y | -acceleration_z | -rotation_x | -rotation_y | -rotation_z | -
---|---|---|---|---|---|---|---|
0 | -0.374540 | -6 | -1 | -3 | -2 | -5 | -3 | -
1 | -0.950714 | -7 | -9 | -1 | -4 | -6 | -0 | -
2 | -0.731994 | -9 | -2 | -2 | -6 | -7 | -1 | -
3 | -0.598659 | -7 | -4 | -3 | -2 | -7 | -2 | -
4 | -0.156019 | -7 | -4 | -2 | -0 | -5 | -5 | -
scale_factors = getattr(metadata_samples, "scale_factors")
-print(f"Scale factors: {scale_factors}")
-
-# Perform scaling
-df_scaled = df.copy()
-df_scaled.iloc[:, 1:] = df_scaled.iloc[:, 1:].multiply(scale_factors, axis=1)
-
-# Print some info
-print(f"Data shape:\t\t {df.shape}")
-df_scaled.head()
-
Scale factors: [0.00469378, 0.00469378, 0.00469378, 0.06097561, 0.06097561, 0.06097561] -Data shape: (17, 7) --
- | time | -acceleration_x | -acceleration_y | -acceleration_z | -rotation_x | -rotation_y | -rotation_z | -
---|---|---|---|---|---|---|---|
0 | -0.374540 | -0.028163 | -0.004694 | -0.014081 | -0.121951 | -0.304878 | -0.182927 | -
1 | -0.950714 | -0.032856 | -0.042244 | -0.004694 | -0.243902 | -0.365854 | -0.000000 | -
2 | -0.731994 | -0.042244 | -0.009388 | -0.009388 | -0.365854 | -0.426829 | -0.060976 | -
3 | -0.598659 | -0.032856 | -0.018775 | -0.014081 | -0.121951 | -0.426829 | -0.121951 | -
4 | -0.156019 | -0.032856 | -0.018775 | -0.009388 | -0.000000 | -0.304878 | -0.304878 | -
Write the processed data in binary format. The call updates the metadata object with the data attributes of the dataframe.
- -# The new name of the file
-output_bin_filename = "tmp_pandas_example_processed.bin"
-metadata_samples.__setattr__("file_name", output_bin_filename)
-
-# Write the data to a new binary file
-tsdf.write_dataframe_to_binaries("data", df_scaled, [metadata_time, metadata_samples])
-
-print(f"File written to data/{output_bin_filename}")
-
File written to data/tmp_pandas_example_processed.bin --
# Write new metadata file
-output_meta_filename = "tmp_pandas_example_processed_meta.json"
-tsdf.write_metadata([metadata_time, metadata_samples], output_meta_filename)
-print(f"File written to data/{output_meta_filename}")
-
File written to data/tmp_pandas_example_processed_meta.json --
More information about the TSDF format can be found in the TSDF preprint.
+This package has been written by engineers from the Netherlands eScience Center. +It is maintained by:
+Peter Kok (p.kok@esciencecenter.nl).
Vedran Kasalica (v.kasalica@esciencecenter.nl).
Pablo Rodríguez-Sánchez (p.rodriguez-sanchez@esciencecenter.nl).
More information about the TSDF format can be found in the TSDF preprint.
-This package has been written by engineers from the Netherlands eScience Center. -It is maintained by:
-Transform one file (or all files within the given directory) from legacy TSDB to the current TSDF format (v0.1).
+import os
+from tsdf.legacy_tsdf_utils import (
+ generate_tsdf_metadata_from_tsdb,
+ convert_file_tsdb_to_tsdf,
+ convert_files_tsdb_to_tsdf,
+)
+
+data_dir = 'data'
+
# Path to the metadata file
+path_to_file = os.path.join(data_dir, "ppp_format_meta_legacy.json")
+path_to_new_file = os.path.join(data_dir, "tmp_ppp_format_meta.json")
+
+# Generate a TSDF metadata file from TSDB
+generate_tsdf_metadata_from_tsdb(path_to_file, path_to_new_file)
+
+# Convert a TSDB metadata file to TSDB format
+# convert_metadata_tsdb_to_tsdf(path_to_file)
+
+# Convert all metadata files in the directory from TSDB to TSDF format
+# convert_metadatas_tsdb_to_tsdf(path_to_dir)
+
Transform one file (or all files within the given directory) from TSDB to TSDF format.
- -import os
-from tsdf.legacy_tsdf_utils import (
- generate_tsdf_metadata_from_tsdb,
- convert_file_tsdb_to_tsdf,
- convert_files_tsdb_to_tsdf,
-)
-
-data_dir = 'data'
-
# Path to the metadata file
-path_to_file = os.path.join(data_dir, "ppp_format_meta_legacy.json")
-path_to_new_file = os.path.join(data_dir, "tmp_ppp_format_meta.json")
-
-# Generate a TSDF metadata file from TSDB
-generate_tsdf_metadata_from_tsdb(path_to_file, path_to_new_file)
-
-# Convert a TSDB metadata file to TSDB format
-# convert_metadata_tsdb_to_tsdf(path_to_file)
-
-# Convert all metadata files in the directory from TSDB to TSDF format
-# convert_metadatas_tsdb_to_tsdf(path_to_dir)
-
+ | + |
+ | + |
+ | + |
+ | + |
+ | + |
+ | + |
+ |
+ | + |
+ | + |
+ | + |
|
+
|
+
+ |
+ | + |
+ | + |