153-End to End build test. (#266)

* Updated functions in cli.py to use argv[Optional[Sequence[str]] instead of no args - much easier to test * Added end to end build test & workflow trigger * Added additional test config to handle temporary directories, etc etc. for tests. * Updated docs to include instructions on running end to end build tests. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marc White <[email protected]>
ACCESS-NRI · Nov 26, 2024 · 39bea88 · 39bea88
1 parent 8d18b19
commit 39bea88
Show file tree

Hide file tree

Showing 12 changed files with 2,695 additions and 118 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -40,7 +40,7 @@ jobs:
 
       - name: Run tests
         shell: bash -l {0}
-        run: coverage run -m --source=access_nri_intake pytest
+        run: coverage run -m --source=access_nri_intake pytest tests
 
       - name: Generate coverage report
         shell: bash -l {0}

diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml
@@ -0,0 +1,23 @@
+name: Run end-to-end tests on Gadi
+on:
+  workflow_dispatch:
+    inputs:
+      release_version:
+        description: 'Release version'     
+        required: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Run end-to-end tests
+        uses: appleboy/[email protected]
+        with:
+          host: gadi.nci.org.au
+          username: ${{secrets.GADI_USER}}
+          key: ${{secrets.DEPLOY_KEY}}
+          script: |
+            cd ${{secrets.GADI_REPO_PATH}}
+            qsub bin/build_all.sh
+
+            
diff --git a/bin/test_end_to_end.sh b/bin/test_end_to_end.sh
@@ -0,0 +1,27 @@
+#!/bin/bash -l
+
+#PBS -P iq82
+#PBS -l storage=gdata/xp65+gdata/ik11+gdata/cj50+gdata/hh5+gdata/p73+gdata/dk92+gdata/al33+gdata/rr3+gdata/fs38+gdata/oi10
+#PBS -q normal
+#PBS -W block=true
+#PBS -l walltime=00:30:00
+#PBS -l mem=32gb
+#PBS -l ncpus=12
+#PBS -l wd
+#PBS -j oe
+
+########################################################################################### 
+# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+# Description:
+#   Generate access-nri intake metacatalog from config files
+
+###########################################################################################
+
+set -e
+
+module use /g/data/xp65/public/modules
+module load conda/access-med-0.6
+
+pytest -s --e2e tests
diff --git a/docs/contributing/code.rst b/docs/contributing/code.rst
@@ -60,5 +60,10 @@ contributions and submitting a pull request.
 
       pytest .
 
+   This project has both unit tests and integration tests. Integration tests are disabled by default due to computational
+   expense, and can only be run on Gadi. To run the full test suite, including integration tests, run::
+
+      pytest --e2e .
+
 #. Once you are happy with your contribution, go `here <https://github.com/ACCESS-NRI/access-nri-intake-catalog/pulls>`_ 
    and open a new pull request to merge your branch of your fork with the main branch of the base.
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,6 +30,10 @@ test = [
     "pytest",
     "tox",
 ] 
+e2e = [
+    "pytest",
+    "pytest-ordering",
+]
 
 [project.scripts]
 catalog-build = "access_nri_intake.cli:build"

diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py
@@ -8,6 +8,7 @@
 import logging
 import os
 import re
+from collections.abc import Sequence
 from pathlib import Path
 
 import jsonschema
@@ -27,7 +28,9 @@ class MetadataCheckError(Exception):
     pass
 
 
-def _parse_build_inputs(config_yamls, build_path, data_base_path):
+def _parse_build_inputs(
+    config_yamls: list[str], build_path: str, data_base_path: str
+) -> list[tuple[str, dict]]:
     """
     Parse build inputs into a list of tuples of CatalogManager methods and args to
     pass to the methods
@@ -78,9 +81,12 @@ def _parse_build_inputs(config_yamls, build_path, data_base_path):
     return args
 
 
-def _check_build_args(args_list):
+def _check_build_args(args_list: list[dict]) -> None:
     """
     Run some checks on the parsed build argmuents to be passed to the CatalogManager
+
+    Raises:
+        MetadataCheckError: If there are experiments with the same name or experiment_uuid
     """
 
     names = []
@@ -91,18 +97,18 @@ def _check_build_args(args_list):
 
     if len(names) != len(set(names)):
         seen = set()
-        dupes = [name for name in names if name in seen or seen.add(name)]
+        dupes = [name for name in names if name in seen or seen.add(name)]  # type: ignore
         raise MetadataCheckError(f"There are experiments with the same name: {dupes}")
     if len(uuids) != len(set(uuids)):
         seen = set()
-        dupes = [uuid for uuid in uuids if uuid in seen or seen.add(uuid)]
+        dupes = [uuid for uuid in uuids if uuid in seen or seen.add(uuid)]  # type: ignore
         dupes = [name for name, uuid in zip(names, uuids) if uuid in dupes]
         raise MetadataCheckError(
             f"There are experiments with the same experiment_uuid: {dupes}"
         )
 
 
-def build():
+def build(argv: Sequence[str] | None = None):
     """
     Build an intake-dataframe-catalog from YAML configuration file(s).
     """
@@ -180,7 +186,7 @@ def build():
         ),
     )
 
-    args = parser.parse_args()
+    args = parser.parse_args(argv)
     config_yamls = args.config_yaml
     build_base_path = args.build_base_path
     catalog_base_path = args.catalog_base_path
@@ -212,21 +218,21 @@ def _get_project(path):
         return match.groups()[0] if match else None
 
     project = set()
-    for method, args in parsed_sources:
+    for method, src_args in parsed_sources:
         if method == "load":
             # This is a hack but I don't know how else to get the storage from pre-built datastores
-            esm_ds = open_esm_datastore(args["path"][0])
+            esm_ds = open_esm_datastore(src_args["path"][0])
             project |= set(esm_ds.df["path"].map(_get_project))
 
-        project |= {_get_project(path) for path in args["path"]}
+        project |= {_get_project(path) for path in src_args["path"]}
     project |= {_get_project(build_base_path)}
     storage_flags = "+".join(sorted([f"gdata/{proj}" for proj in project]))
 
     # Build the catalog
     cm = CatalogManager(path=metacatalog_path)
-    for method, args in parsed_sources:
-        logger.info(f"Adding '{args['name']}' to metacatalog '{metacatalog_path}'")
-        getattr(cm, method)(**args)
+    for method, src_args in parsed_sources:
+        logger.info(f"Adding '{src_args['name']}' to metacatalog '{metacatalog_path}'")
+        getattr(cm, method)(**src_args)
 
     # Write catalog yaml file
     cat = cm.dfcat
@@ -367,7 +373,7 @@ def _combine_storage_flags(a: str, b: str) -> str:
     return "+".join(sorted(list(set(aflags + bflags))))
 
 
-def metadata_validate():
+def metadata_validate(argv: Sequence[str] | None = None):
     """
     Check provided metadata.yaml file(s) against the experiment schema
     """
@@ -379,7 +385,7 @@ def metadata_validate():
         help="The path to the metadata.yaml file. Multiple file paths can be passed.",
     )
 
-    args = parser.parse_args()
+    args = parser.parse_args(argv)
     files = args.file
 
     for f in files:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,11 +3,42 @@
 
 import os
 import warnings
+from datetime import datetime
 from pathlib import Path
 
 from pytest import fixture
 
-here = os.path.abspath(os.path.dirname(__file__))
+here = Path(__file__).parent
+
+
+@fixture(scope="session")
+def test_data():
+    return Path(here / "data")
+
+
+@fixture(scope="session")
+def config_dir():
+    return Path(here / "e2e/configs")
+
+
+@fixture(scope="session")
+def BASE_DIR(tmp_path_factory):
+    yield tmp_path_factory.mktemp("catalog-dir")
+
+
+@fixture(scope="session")
+def v_num():
+    return datetime.now().strftime("v%Y-%m-%d")
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--e2e",
+        action="store_true",
+        default=False,
+        help="Run end-to-end tests",
+        dest="e2e",
+    )
 
 
 def _get_xfail():
@@ -34,11 +65,6 @@ def _get_xfail():
 _add_xfail = _get_xfail()
 
 
-@fixture(scope="session")
-def test_data():
-    return Path(os.path.join(here, "data"))
-
-
 def pytest_collection_modifyitems(config, items):
     """
     This function is called by pytest to modify the items collected during test

diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py
diff --git a/tests/e2e/configs/access-om2.yaml b/tests/e2e/configs/access-om2.yaml
@@ -0,0 +1,9 @@
+builder: AccessOm2Builder
+
+translator: DefaultTranslator
+
+sources:
+
+  - metadata_yaml: /g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/metadata.yaml
+    path:
+      - /g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi
diff --git a/tests/e2e/configs/cmip5.yaml b/tests/e2e/configs/cmip5.yaml
@@ -0,0 +1,9 @@
+builder: null
+
+translator: Cmip5Translator
+
+sources:
+
+  - metadata_yaml: /g/data/xp65/admin/access-nri-intake-catalog/config/metadata_sources/cmip5-al33/metadata.yaml
+    path:
+      - /g/data/al33/catalog/v2/esm/catalog.json