Skip to content

Commit

Permalink
Merge pull request #154 from pomfort/dev/windowsPathHandling
Browse files Browse the repository at this point in the history
Make sure to properly read and write posix style relative paths to the xml files on windows
  • Loading branch information
ptrpfn authored Dec 16, 2024
2 parents ac0188f + cb1174b commit e73462c
Show file tree
Hide file tree
Showing 25 changed files with 491 additions and 250 deletions.
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# make sure the scenario output files are not converted to a different line ending when checked out by git on windows
# otherwise the scenario tests will fail and the files can't be verified with the ascmhl tool
/examples/scenarios/Output/** binary
4 changes: 2 additions & 2 deletions .github/workflows/build+test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: ascmhl-build-test

on:
push:
branches: [ master ]
branches: [ master, dev/windowsPathHandling ]
pull_request:
branches: [ master ]

Expand All @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.11", "3.12"]
steps:
- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ docs/build
docs/docenv
.coverage
htmlcov
/.vscode/
13 changes: 6 additions & 7 deletions ascmhl/chain_xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@
__email__ = "[email protected]"
"""

from . import logger
from .__version__ import ascmhl_reference_hash_format
from .chain import MHLChain, MHLChainGeneration
from .hashlist import MHLHashList
import os
import textwrap

from lxml import etree
Expand All @@ -20,6 +16,7 @@
from .hashlist import *
from .__version__ import ascmhl_supported_hashformats
from .hashlist import MHLHashList
from .utils import convert_local_path_to_posix, convert_posix_to_local_path


def parse(file_path):
Expand Down Expand Up @@ -51,14 +48,15 @@ def parse(file_path):

if type(current_object) is MHLChainGeneration:
if tag == "path":
current_object.ascmhl_filename = element.text
current_object.ascmhl_filename = convert_posix_to_local_path(element.text)
elif tag in ascmhl_supported_hashformats:
current_object.hash_format = tag
current_object.hash_string = element.text
elif tag == "hashlist":
current_object.generation_number = element.attrib.get("sequencenr")
chain.append_generation(current_object)
current_object = None
file.close()

return chain

Expand Down Expand Up @@ -86,6 +84,7 @@ def write_chain(chain: MHLChain, new_hash_list: MHLHashList):
current_indent = current_indent[:-2]
_write_xml_string_to_file(file, "</ascmhldirectory>\n", current_indent)
file.flush()
file.close()


def _write_xml_element_to_file(file, xml_element, indent: str):
Expand All @@ -102,7 +101,7 @@ def _hashlist_xml_element_from_hashlist(hash_list: MHLHashList):
"""builds and returns one <hashlist> element for a given HashList object"""

hash_list_element = E.hashlist(
E.path(os.path.basename(hash_list.file_path)),
E.path(convert_local_path_to_posix(os.path.basename(hash_list.file_path))),
E.c4(hash_list.generate_reference_hash()),
)
hash_list_element.attrib["sequencenr"] = str(hash_list.generation_number)
Expand All @@ -115,7 +114,7 @@ def _hashlist_xml_element_from_chaingeneration(generation: MHLChainGeneration):

if generation.hash_format == "c4":
hash_list_element = E.hashlist(
E.path(generation.ascmhl_filename),
E.path(convert_local_path_to_posix(generation.ascmhl_filename)),
E.c4(generation.hash_string),
)
hash_list_element.attrib["sequencenr"] = str(generation.generation_number)
Expand Down
11 changes: 9 additions & 2 deletions ascmhl/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1263,12 +1263,18 @@ def info(verbose, single_file, root_path):
if single_file is not None and len(single_file) > 0:
if root_path == None:
current_dir = os.path.dirname(os.path.abspath(single_file[0]))
while current_dir != "/" and current_dir != "":
while os.path.isdir(current_dir):
asc_mhl_folder_path = os.path.join(current_dir, ascmhl_folder_name)
if os.path.exists(asc_mhl_folder_path):
root_path = current_dir
break
current_dir = os.path.dirname(current_dir)
parent_dir = os.path.dirname(current_dir)
# in case we get the same path again, we seem to be at the root
# this works both on windows and unix
if parent_dir == current_dir:
break
current_dir = parent_dir

if root_path is None:
raise errors.NoMHLHistoryException(single_file[0])
else:
Expand Down Expand Up @@ -1405,6 +1411,7 @@ def xsd_schema_check(file_path, directory_file, xsd_file):
# pass a file handle to support the fake file system used in the tests
file = open(file_path, "rb")
result = xsd.validate(etree.parse(file))
file.close()

if result:
logger.info(f"validated: {file_path}")
Expand Down
2 changes: 1 addition & 1 deletion ascmhl/hashlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ class MHLCreatorInfo:

host_name: Optional[str]
tool: Optional[MHLTool]
creation_date: Optional[datetime]
creation_date: Optional[str]
authors: List[MHLAuthor]
location: Optional[str]
comment: Optional[str]
Expand Down
31 changes: 19 additions & 12 deletions ascmhl/hashlist_xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
MHLTool,
)
from .ignore import MHLIgnoreSpec
from .utils import datetime_isostring
from .utils import datetime_isostring, convert_local_path_to_posix, convert_posix_to_local_path


def parse(file_path):
Expand Down Expand Up @@ -146,7 +146,7 @@ def parse(file_path):

elif type(current_object) is MHLMediaHash:
if tag == "path":
current_object.path = element.text
current_object.path = convert_posix_to_local_path(element.text)
file_size = element.attrib.get("size")
current_object.file_size = int(file_size) if file_size else None
# TODO: parse date
Expand All @@ -159,7 +159,12 @@ def parse(file_path):
hash_date = dateutil.parser.parse(hash_date_string)
if current_object.is_directory:
if is_directory_structure == False:
entry = MHLHashEntry(tag, element.text, element.attrib.get("action"), hash_date)
entry = MHLHashEntry(
tag,
convert_posix_to_local_path(element.text),
element.attrib.get("action"),
hash_date,
)
current_object.append_hash_entry(entry)
else:
# find right hash entry and set structure hash
Expand All @@ -182,11 +187,11 @@ def parse(file_path):
current_object.root_media_hash = root_media_hash

elif tag == "previousPath":
current_object.previous_path = element.text
current_object.previous_path = convert_posix_to_local_path(element.text)

elif type(current_object) is MHLHashListReference:
if tag == "path":
current_object.path = element.text
current_object.path = convert_posix_to_local_path(element.text)
elif tag == "c4":
current_object.reference_hash = element.text
elif tag == "hashlistreference":
Expand All @@ -203,6 +208,7 @@ def parse(file_path):

hash_list.process_info.ignore_spec = MHLIgnoreSpec(existing_ignore_patterns)
logger.debug(f"parsing took: {timer() - start}")
file.close()

return hash_list

Expand Down Expand Up @@ -255,6 +261,7 @@ def write_hash_list(hash_list: MHLHashList, file_path: str):
current_indent = current_indent[:-2]
_write_xml_string_to_file(file, "</hashlist>\n", current_indent)
file.flush()
file.close()


def _write_xml_element_to_file(file, xml_element, indent: str):
Expand All @@ -270,7 +277,7 @@ def _write_xml_string_to_file(file, xml_string: str, indent: str):
def _media_hash_xml_element(media_hash: MHLMediaHash):
"""builds and returns one <hash> element for a given MediaHash object"""

path_element = E.path(media_hash.path)
path_element = E.path(convert_local_path_to_posix(media_hash.path))
if media_hash.file_size:
path_element.attrib["size"] = str(media_hash.file_size)
if media_hash.last_modification_date:
Expand All @@ -288,8 +295,8 @@ def _media_hash_xml_element(media_hash: MHLMediaHash):
hash_element.append(entry_element)

if media_hash.previous_path:
previous_path_element = E.previousPath(media_hash.previous_path)
previous_path_element.text = media_hash.previous_path
previous_path_element = E.previousPath(convert_local_path_to_posix(media_hash.previous_path))
previous_path_element.text = convert_local_path_to_posix(media_hash.previous_path)
hash_element.append(previous_path_element)

return hash_element
Expand Down Expand Up @@ -321,7 +328,7 @@ def _directory_hash_xml_element(media_hash: MHLMediaHash, skipPath=False):
hash_element = E.directoryhash()

if skipPath == False:
path_element = E.path(media_hash.path)
path_element = E.path(convert_local_path_to_posix(media_hash.path))
if media_hash.file_size:
path_element.attrib["size"] = str(media_hash.file_size)
if media_hash.last_modification_date:
Expand All @@ -332,8 +339,8 @@ def _directory_hash_xml_element(media_hash: MHLMediaHash, skipPath=False):
hash_element.append(structure_element)

if media_hash.previous_path:
previous_path_element = E.previousPath(media_hash.previous_path)
previous_path_element.text = media_hash.previous_path
previous_path_element = E.previousPath(convert_local_path_to_posix(media_hash.previous_path))
previous_path_element.text = convert_local_path_to_posix(media_hash.previous_path)
hash_element.append(previous_path_element)

return hash_element
Expand All @@ -344,7 +351,7 @@ def _ascmhlreference_xml_element(hash_list: MHLHashList, file_path: str):

root_path = os.path.dirname(os.path.dirname(file_path))
hash_element = E.hashlistreference(
E.path(os.path.relpath(hash_list.file_path, root_path)),
E.path(convert_local_path_to_posix(os.path.relpath(hash_list.file_path, root_path))),
E.c4(hash_list.generate_reference_hash()),
)

Expand Down
34 changes: 18 additions & 16 deletions ascmhl/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,22 +241,24 @@ def load_from_path(cls, root_path):
hash_lists = []
for root, directories, filenames in os.walk(asc_mhl_folder_path):
for filename in filenames:
if filename.endswith(ascmhl_file_extension):
# file name example: 0001_root_2020-01-15_130000.mhl
filename_no_extension, _ = os.path.splitext(filename)
parts = re.findall(MHLHistory.history_file_name_regex, filename_no_extension)
if len(parts) == 1 and len(parts[0]) == 2:
file_path = os.path.join(asc_mhl_folder_path, filename)
hash_list = hashlist_xml_parser.parse(file_path)
generation_number = int(parts[0][0])
hash_list.generation_number = generation_number
# FIXME is there a better way of accessing the generation from a hash entry?
if hash_list.process_info.root_media_hash is not None:
for hash_entry in hash_list.process_info.root_media_hash.hash_entries:
hash_entry.temp_generation_number = hash_list.generation_number
hash_lists.append(hash_list)
else:
logger.error(f"name of ascmhl file {filename} does not conform to naming convention")
# file name example: 0001_root_2020-01-15_130000.mhl
# ignore ._ variants of mhl files that can happen when moving data from macOS to Windows and back
if (len(filename) > 2 and filename[:2] == "._") or not filename.endswith(ascmhl_file_extension):
continue
filename_no_extension, _ = os.path.splitext(filename)
parts = re.findall(MHLHistory.history_file_name_regex, filename_no_extension)
if len(parts) == 1 and len(parts[0]) == 2:
file_path = os.path.join(asc_mhl_folder_path, filename)
hash_list = hashlist_xml_parser.parse(file_path)
generation_number = int(parts[0][0])
hash_list.generation_number = generation_number
# FIXME is there a better way of accessing the generation from a hash entry?
if hash_list.process_info.root_media_hash is not None:
for hash_entry in hash_list.process_info.root_media_hash.hash_entries:
hash_entry.temp_generation_number = hash_list.generation_number
hash_lists.append(hash_list)
else:
logger.error(f"name of ascmhl file {filename} does not conform to naming convention")
# sort all found hash lists by generation number first to make sure we add them to the history in order
hash_lists.sort(key=lambda x: x.generation_number)
for hash_list in hash_lists:
Expand Down
16 changes: 14 additions & 2 deletions ascmhl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import datetime
import time
import os
from pathlib import Path, PurePosixPath, PureWindowsPath


def matches_prefixes(text: str, prefixes: list):
Expand All @@ -26,7 +28,7 @@ def datetime_isostring(date, keep_microseconds=False):
date -- date object
keep_microseconds -- include microseconds in iso
"""
utc_offset_sec = time.altzone if time.localtime().tm_isdst else time.timezone
utc_offset_sec = time.altzone if time.localtime().tm_isdst == 1 else time.timezone
utc_offset = datetime.timedelta(seconds=-utc_offset_sec)

if keep_microseconds:
Expand All @@ -43,8 +45,18 @@ def datetime_now_isostring():

def datetime_now_filename_string():
"""create a string representation for now() for use as part of the MHL filename"""
return datetime.datetime.strftime(datetime.datetime.now(datetime.UTC), "%Y-%m-%d_%H%M%SZ")
return datetime.datetime.strftime(datetime.datetime.now(datetime.timezone.utc), "%Y-%m-%d_%H%M%SZ")


def datetime_now_isostring_with_microseconds():
return datetime_isostring(datetime.datetime.now(), keep_microseconds=True)


def convert_local_path_to_posix(path: str) -> str:
return str(Path(path).as_posix())


def convert_posix_to_local_path(path: str) -> str:
if os.name == "nt":
return str(PureWindowsPath(PurePosixPath(path)))
return path
20 changes: 15 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import pytest
from freezegun import freeze_time
from click.testing import CliRunner
from os.path import abspath
from pathlib import Path
import ascmhl.commands
import os
import time
Expand All @@ -18,6 +20,14 @@
# this file is automatically loaded by pytest we setup various shared fixtures here


def abspath_conversion_tests(path):
return abspath(path)


def path_conversion_tests(path):
return Path(path)


@pytest.fixture(scope="session", autouse=True)
def set_timezone():
"""Fakes the host timezone to UTC so we don't get different mhl files if the tests run on different time zones
Expand All @@ -44,21 +54,21 @@ def nested_mhl_histories(fs):
# create mhl histories on different directly levels
fs.create_file("/root/Stuff.txt", contents="stuff\n")
runner = CliRunner()
result = runner.invoke(ascmhl.commands.create, ["/root", "-h", "xxh64"])
result = runner.invoke(ascmhl.commands.create, [abspath_conversion_tests("/root"), "-h", "xxh64"])
assert result.exit_code == 0

fs.create_file("/root/A/AA/AA1.txt", contents="AA1\n")
fs.create_file("/root/A/AB/AB1.txt", contents="AB1\n")
result = runner.invoke(ascmhl.commands.create, ["/root/A/AA", "-h", "xxh64"])
result = runner.invoke(ascmhl.commands.create, [abspath_conversion_tests("/root/A/AA"), "-h", "xxh64"])
assert result.exit_code == 0

fs.create_file("/root/B/B1.txt", contents="B1\n")
result = runner.invoke(ascmhl.commands.create, ["/root/B", "-h", "xxh64"])
result = runner.invoke(ascmhl.commands.create, [abspath_conversion_tests("/root/B"), "-h", "xxh64"])
assert result.exit_code == 0

fs.create_file("/root/B/BA/BA1.txt", contents="BA1\n")
fs.create_file("/root/B/BB/BB1.txt", contents="BB1\n")
result = runner.invoke(ascmhl.commands.create, ["/root/B/BB", "-h", "xxh64"])
result = runner.invoke(ascmhl.commands.create, [abspath_conversion_tests("/root/B/BB"), "-h", "xxh64"])
assert result.exit_code == 0


Expand All @@ -70,7 +80,7 @@ def simple_mhl_history(fs):
fs.create_file("/root/A/A1.txt", contents="A1\n")

runner = CliRunner()
result = runner.invoke(ascmhl.commands.create, ["/root", "-h", "xxh64"])
result = runner.invoke(ascmhl.commands.create, [abspath_conversion_tests("/root"), "-h", "xxh64"])
assert result.exit_code == 0


Expand Down
Loading

0 comments on commit e73462c

Please sign in to comment.