From bec67a076e93c91c2de5d47bfd3943e345d63f81 Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Mon, 10 Jun 2024 14:26:45 -0700
Subject: [PATCH 1/8] (HP-1483): add healdata-utils to pyproject.toml and
 re-lock

---
 vlmd-submission-tools/poetry.lock             | 302 +++++++++++++++++-
 vlmd-submission-tools/pyproject.toml          |   2 +
 .../template_submission_bad_format.csv        |   7 -
 .../tests/test_mapping_utils.py               |  68 ----
 .../vlmd_submission_tools/common/fields.json  | 209 ------------
 .../common/mapping_utils.py                   | 163 ----------
 .../vlmd_submission_tools/common/schemas.py   |  29 --
 7 files changed, 302 insertions(+), 478 deletions(-)
 delete mode 100644 vlmd-submission-tools/tests/templates/template_submission_bad_format.csv
 delete mode 100644 vlmd-submission-tools/tests/test_mapping_utils.py
 delete mode 100644 vlmd-submission-tools/vlmd_submission_tools/common/fields.json
 delete mode 100644 vlmd-submission-tools/vlmd_submission_tools/common/mapping_utils.py
 delete mode 100644 vlmd-submission-tools/vlmd_submission_tools/common/schemas.py

diff --git a/vlmd-submission-tools/poetry.lock b/vlmd-submission-tools/poetry.lock
index adfb0dc2..9b63f028 100644
--- a/vlmd-submission-tools/poetry.lock
+++ b/vlmd-submission-tools/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "attrs"
@@ -276,6 +276,17 @@ files = [
     {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
 ]
 
+[[package]]
+name = "et-xmlfile"
+version = "1.1.0"
+description = "An implementation of lxml.xmlfile for the standard library"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
+    {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.1.3"
@@ -374,6 +385,29 @@ pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
 reauth = ["pyu2f (>=0.1.5)"]
 requests = ["requests (>=2.20.0,<3.0.0dev)"]
 
+[[package]]
+name = "healdata-utils"
+version = "0.5.1"
+description = "Data packaging tools for the HEAL data ecosystem"
+optional = false
+python-versions = "*"
+files = [
+    {file = "healdata_utils-0.5.1-py3-none-any.whl", hash = "sha256:76c1dc5c7855e990c3891a18c3a2c8365f1d79221d1d1c46a69380ca09dd91af"},
+    {file = "healdata_utils-0.5.1.tar.gz", hash = "sha256:681b815a1a1b53ad107027352a7abd9d14fa805141d8942f1bfef3d6957c0a57"},
+]
+
+[package.dependencies]
+charset-normalizer = ">=2.1"
+click = ">=8.1.3"
+jsonschema = ">=4.17.3"
+openpyxl = "*"
+pandas = ">=1.4"
+petl = "1.7.12"
+pyreadstat = ">=1.2.0"
+python-slugify = "*"
+PyYaml = ">=6.0"
+visions = ">=0.7.5"
+
 [[package]]
 name = "humanize"
 version = "4.6.0"
@@ -606,6 +640,80 @@ files = [
     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
 ]
 
+[[package]]
+name = "multimethod"
+version = "1.11.2"
+description = "Multiple argument dispatching."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "multimethod-1.11.2-py3-none-any.whl", hash = "sha256:cb338f09395c0ee87d36c7691cdd794d13d8864358082cf1205f812edd5ce05a"},
+    {file = "multimethod-1.11.2.tar.gz", hash = "sha256:7f2a4863967142e6db68632fef9cd79053c09670ba0c5f113301e245140bba5c"},
+]
+
+[[package]]
+name = "networkx"
+version = "3.2.1"
+description = "Python package for creating and manipulating graphs and networks"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"},
+    {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"},
+]
+
+[package.extras]
+default = ["matplotlib (>=3.5)", "numpy (>=1.22)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"]
+developer = ["changelist (==0.4)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"]
+doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"]
+extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"]
+test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
+
+[[package]]
+name = "numpy"
+version = "1.26.4"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
+    {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"},
+    {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"},
+    {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"},
+    {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"},
+    {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"},
+    {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"},
+    {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"},
+    {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"},
+    {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"},
+    {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
+]
+
 [[package]]
 name = "oauthlib"
 version = "3.2.2"
@@ -622,6 +730,20 @@ rsa = ["cryptography (>=3.0.0)"]
 signals = ["blinker (>=1.4.0)"]
 signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 
+[[package]]
+name = "openpyxl"
+version = "3.1.3"
+description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "openpyxl-3.1.3-py2.py3-none-any.whl", hash = "sha256:25071b558db709de9e8782c3d3e058af3b23ffb2fc6f40c8f0c45a154eced2c3"},
+    {file = "openpyxl-3.1.3.tar.gz", hash = "sha256:8dd482e5350125b2388070bb2477927be2e8ebc27df61178709bc8c8751da2f9"},
+]
+
+[package.dependencies]
+et-xmlfile = "*"
+
 [[package]]
 name = "packaging"
 version = "23.1"
@@ -633,6 +755,79 @@ files = [
     {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
 ]
 
+[[package]]
+name = "pandas"
+version = "2.2.2"
+description = "Powerful data structures for data analysis, time series, and statistics"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
+    {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
+    {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
+    {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
+    {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
+    {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
+]
+
+[package.dependencies]
+numpy = [
+    {version = ">=1.22.4", markers = "python_version < \"3.11\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+]
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.7"
+
+[package.extras]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.9.2)"]
+
 [[package]]
 name = "parameterized"
 version = "0.9.0"
@@ -779,6 +974,44 @@ files = [
 [package.extras]
 plugins = ["importlib-metadata"]
 
+[[package]]
+name = "pyreadstat"
+version = "1.2.7"
+description = "Reads and Writes SAS, SPSS and Stata files into/from pandas data frames."
+optional = false
+python-versions = "*"
+files = [
+    {file = "pyreadstat-1.2.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8b53853d05527a44a5dca33df309b3a7d5c2ca4a513ee9056ffc1b0bf6cbf917"},
+    {file = "pyreadstat-1.2.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cb79cfcad497a90ae116dde05ad45bdeab26c85915493b8e29a474c449ab55cf"},
+    {file = "pyreadstat-1.2.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34693684338acd6d0dcf02cc2dc6bc6fe70a4a7bc2d52fea4a67d6f7bfd8f648"},
+    {file = "pyreadstat-1.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:200096fa11562723f2c53e13e5e76b9ab72ae395329de1ada32ccb743b9c1752"},
+    {file = "pyreadstat-1.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:5c6852a28b6ee9b5eae4c7a6a29aaeb2072a3b9f0a2c8084b96d6e84eff95990"},
+    {file = "pyreadstat-1.2.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:603d7117dfdb6ca7231f9e15fa8496cf4187dc6358768547e66bbfff0b4ceda6"},
+    {file = "pyreadstat-1.2.7-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:177ca4500b4f1c471297b8041437487fcc83d9fb39f8a45493bdf37a62a3f965"},
+    {file = "pyreadstat-1.2.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61fcf55f2db2306c984e488c4fbd22786e01a06a4c0e297b52b23c8e0a59eefb"},
+    {file = "pyreadstat-1.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd2100cbe74ecbc13d84556aeb8d4ffc2e2a448f2fa0056cb00d66cea79d55aa"},
+    {file = "pyreadstat-1.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:c801cbb4ec6aa07346347fcafeda1e08af50508f7b1a1850be5dc369e2d12e6f"},
+    {file = "pyreadstat-1.2.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3b2867a76ced9286942f8f75d50e8dcd2fd3601c12d1d1ed5c62d55f99747920"},
+    {file = "pyreadstat-1.2.7-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:6303c8acfd54607bd5d6334149a0051d4f5ce0458089e22e7ad876ad0c12d354"},
+    {file = "pyreadstat-1.2.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6314a0fc91a0e56f29c91da5ac9502429006800487793f82eabc797205d4e224"},
+    {file = "pyreadstat-1.2.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7656c368711e356babece8fba6a8780b800c7e1379cfda6f22d97081b60a7fb9"},
+    {file = "pyreadstat-1.2.7-cp312-cp312-win_amd64.whl", hash = "sha256:d9a8ec8bde0e82f5e45a5906499deb3615d874e80bd36756c42c8a43bb5a59c9"},
+    {file = "pyreadstat-1.2.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4a4f0dfb117183986c5ccf74e45e7a356cdefe839483d60ebb7a19ae7da820ac"},
+    {file = "pyreadstat-1.2.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0e21c94514d02ac46d8e9f9008f99b0760478df90ca59f95be8705ba1a48a514"},
+    {file = "pyreadstat-1.2.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e1d2c516e5233fb0f632a082748ca694fc8fff7ab88622b6c6585a35c6701a0"},
+    {file = "pyreadstat-1.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b1823ee4aacefe92f6a419b6b5f30a6632a0c1a6785dc352a05fb259697304a"},
+    {file = "pyreadstat-1.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:41ff6a79eea517f83958f0d4d2ba0dd17996d11028f3dbdaac2fb119e5381076"},
+    {file = "pyreadstat-1.2.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:82152e0df092cd0fdb0d43c466210f3cf75d8e67854ecd6bd93a9ac05e218312"},
+    {file = "pyreadstat-1.2.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0fe67f8f8c92f1861a0e044bc275644de552231e9a28960d54c6c5ca4cb285cf"},
+    {file = "pyreadstat-1.2.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8985b9f76dbcc9fa687b0b64090ec821a5c5af2d1f57410635e2d5bdfc0b20b"},
+    {file = "pyreadstat-1.2.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:259797c81d96e0c179f224565b75902d2bac02dd731a0a865ab04731ffdfc682"},
+    {file = "pyreadstat-1.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:91015a4b9e853544f8737c818a88881a36de4b8dbcf80387773aacbb6f0d678b"},
+    {file = "pyreadstat-1.2.7.tar.gz", hash = "sha256:bc2142be4773a9e7ff844068d0b48c413f9f46ba9511408bcd5dbec9b20aab6d"},
+]
+
+[package.dependencies]
+pandas = ">=1.2.0"
+
 [[package]]
 name = "pyrsistent"
 version = "0.19.3"
@@ -886,6 +1119,17 @@ text-unidecode = ">=1.3"
 [package.extras]
 unidecode = ["Unidecode (>=1.1.1)"]
 
+[[package]]
+name = "pytz"
+version = "2024.1"
+description = "World timezone definitions, modern and historical"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
+    {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0"
@@ -956,6 +1200,23 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "requests-mock"
+version = "1.12.1"
+description = "Mock out responses from the requests package"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401"},
+    {file = "requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563"},
+]
+
+[package.dependencies]
+requests = ">=2.22,<3"
+
+[package.extras]
+fixture = ["fixtures"]
+
 [[package]]
 name = "requests-oauthlib"
 version = "1.3.1"
@@ -1167,6 +1428,17 @@ files = [
     {file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"},
 ]
 
+[[package]]
+name = "tzdata"
+version = "2024.1"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+files = [
+    {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
+    {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
+]
+
 [[package]]
 name = "urllib3"
 version = "1.26.9"
@@ -1199,6 +1471,32 @@ decorator = ">=3.4.0"
 [package.extras]
 test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"]
 
+[[package]]
+name = "visions"
+version = "0.7.6"
+description = "Visions"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "visions-0.7.6-py3-none-any.whl", hash = "sha256:72b7f8dbc374e9d6055e938c8c67b0b8da52f3bcb8320f25d86b1a57457e7aa6"},
+    {file = "visions-0.7.6.tar.gz", hash = "sha256:00f494a7f78917db2292e11ea832c6e026b64783e688b11da24f4c271ef1631d"},
+]
+
+[package.dependencies]
+attrs = ">=19.3.0"
+multimethod = ">=1.4"
+networkx = ">=2.4"
+numpy = ">=1.23.2"
+pandas = ">=2.0.0"
+
+[package.extras]
+all = ["Pillow", "attrs (>=19.3.0)", "imagehash", "matplotlib", "multimethod (>=1.4)", "networkx (>=2.4)", "numpy (>=1.23.2)", "pandas (>=2.0.0)", "pydot", "pygraphviz", "shapely"]
+dev = ["IPython", "Sphinx-copybutton", "black (>=20.8b1)", "isort (>=5.0.9)", "mypy (>=0.770)", "nbsphinx", "recommonmark (>=0.6.0)", "setuptools (>=46.1.3)", "sphinx-autodoc-typehints (>=1.10.3)", "sphinx-rtd-theme (>=0.4.3)", "wheel (>=0.34.2)"]
+plotting = ["matplotlib", "pydot", "pygraphviz"]
+test = ["Pillow", "big-o (>=0.10.1)", "black (>=19.10b0)", "check-manifest (>=0.41)", "imagehash", "isort (>=5.0.9)", "matplotlib", "mypy (>=0.800)", "numba", "pandas", "pre-commit", "pyarrow (>=1.0.1)", "pydot", "pyspark", "pytest (>=5.2.0)", "pytest-spark (>=0.6.0)", "shapely", "twine (>=3.1.1)"]
+type-geometry = ["shapely"]
+type-image-path = ["Pillow", "imagehash"]
+
 [[package]]
 name = "websocket-client"
 version = "1.5.3"
@@ -1218,4 +1516,4 @@ test = ["websockets"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "329f221b6b67c43ae8ff4cedd7ecca5f22acc58a01506300f244a69dadac3776"
+content-hash = "f873068e454e91ce3abec04042338c0f0a2704f8a56157d2b3f26e52bf275ce0"
diff --git a/vlmd-submission-tools/pyproject.toml b/vlmd-submission-tools/pyproject.toml
index fb28cc18..e8dbd442 100644
--- a/vlmd-submission-tools/pyproject.toml
+++ b/vlmd-submission-tools/pyproject.toml
@@ -28,6 +28,8 @@ frictionless = "^5.12.1"
 parameterized = "^0.9.0"
 pytest = "^7.4.2"
 pytest-cov = "^4.1.0"
+healdata-utils = "^0.5.1"
+requests-mock = "^1.12.1"
 
 [tool.poetry.dev-dependencies]
 
diff --git a/vlmd-submission-tools/tests/templates/template_submission_bad_format.csv b/vlmd-submission-tools/tests/templates/template_submission_bad_format.csv
deleted file mode 100644
index 02eb5f8d..00000000
--- a/vlmd-submission-tools/tests/templates/template_submission_bad_format.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-name,title,description,type,format,constraints.maxLength,constraints.pattern,constraints.minimum,constraints.maximum,ordered,missingValues,trueValues,falseValues,repo_link,cde_id,ontology_id,encoding,constraints.enum
-participant_id,Participant Id,Unique identifier for participant,string,9999,,[A-Z][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9],,,,,,,,,,,
-race,Race,Self-reported race,integer,,,,,,,99,,,,NLM=Fakc6Jy2x|NLM=m1_atF7L7U,,1=White|2=Black or African American|3=American Indian or Alaska Native|4=Native| 5=Hawaiian or Other Pacific Islander|6=Asian|7=Some other race|8=Multiracial|99=Not reported,1|2|3|4|5|6|7|8
-age,Age,What is your age? (age at enrollment),integer,years,,,0,90,,,,,,,,,
-hispanic,"Hispanic, Latino, or Spanish Origin","Are you of Hispanic, Latino, or Spanish origin?",boolean,,,,,,,Not reported,No,Yes,,,,,
-sex_at_birth,Sex at Birth,The self-reported sex of the participant/subject at birth,string,,,,,,,Prefer not to answer|Unknown,,,,NLM=ezelurehr2,,,Male|Female|Intersex|None of these describe me|Prefer not to answer|Unknown
-SU4,Heroin Days Used,During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use],integer,days,,,,,,,,,,,is=CHEBI=27808|is=RXNORM=3304,,
diff --git a/vlmd-submission-tools/tests/test_mapping_utils.py b/vlmd-submission-tools/tests/test_mapping_utils.py
deleted file mode 100644
index 27bd802a..00000000
--- a/vlmd-submission-tools/tests/test_mapping_utils.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from vlmd_submission_tools.common import mapping_utils
-
-class TestMappingUtils:
-
-
-    def test_split_str_array(self):
-        string="foo|bar"
-        expected=["foo", "bar"]
-        assert mapping_utils.split_str_array(string) == expected
-        sep="#"
-        string="foo#bar"
-        expected=["foo", "bar"]
-        assert mapping_utils.split_str_array(string,sep) == expected
-
-
-    def test_map_keys_vals(self):
-        keys=['key1', 'key2', 'key3']
-        vals=['val1', 'val2', 'val3']
-        expected={'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}
-        assert mapping_utils.map_keys_vals(keys,vals) == expected
-
-
-    def test_split_and_map(self):
-        string="1=foo|2=bar|3=flim"
-        prop = {
-            'items': {
-                'properties': {
-                    'key1': 'val1',
-                    'key2': 'val2',
-                    'key3': 'val3'
-                }
-            }
-        }
-        expected=[
-            {'key1': '1', 'key2': 'foo'},
-            {'key1': '2', 'key2': 'bar'},
-            {'key1': '3', 'key2': 'flim'}
-        ]
-        assert mapping_utils.split_and_map(string,prop) == expected
-
-
-    def test_loads_dict(self):
-        string="1=foo|2=bar|3=flim"
-        expected={'1': 'foo', '2': 'bar', '3': 'flim'}
-        assert mapping_utils.loads_dict(string)== expected
-        string2="1_foo#2_bar#3_flim"
-        result = mapping_utils.loads_dict(string2,item_sep='#',key_val_sep='_')
-        assert result == expected
-
-
-    def test_to_bool(self):
-        test_vals=['True', 'true', '1', 'Yes', 'Y', 'Required']
-        for val in test_vals:
-            assert mapping_utils.to_bool(val)
-        test_vals=['False', 'false', '0', 'No', 'N', 'Not Required']
-        for val in test_vals:
-            assert not mapping_utils.to_bool(val)
-        test_vals=['Foo', 'Bar']
-        expected_empty = ""
-        for val in test_vals:
-            assert mapping_utils.to_bool(val) == expected_empty
-
-
-    def test_join_dict_vals(self):
-        sep="|"
-        dict={'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}
-        expected="val1|val2|val3"
-        assert mapping_utils.join_dictvals(dict,sep) == expected
diff --git a/vlmd-submission-tools/vlmd_submission_tools/common/fields.json b/vlmd-submission-tools/vlmd_submission_tools/common/fields.json
deleted file mode 100644
index 9dbcfe9d..00000000
--- a/vlmd-submission-tools/vlmd_submission_tools/common/fields.json
+++ /dev/null
@@ -1,209 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-04/schema#",
-    "$id": "vlmd-fields",
-    "title": "HEAL Variable Level Metadata Fields",
-    "description": "Variable level metadata individual fields integrated into the variable level metadata object within the HEAL platform metadata service.\n",
-    "type": "object",
-    "required": [
-        "name",
-        "description"
-    ],
-    "properties": {
-        "name": {
-            "type": "string",
-            "title": "Variable Name",
-            "description": "The name of a variable (i.e., field) as it appears in the data.\n"
-        },
-        "title": {
-            "type": "string",
-            "title": "Variable Label (ie Title)",
-            "description": "The human-readable title of the variable."
-        },
-        "description": {
-            "type": "string",
-            "title": "Variable Description",
-            "description": "An extended description of the variable.",
-            "examples": [
-                "Definition",
-                "Question text (if a survey)"
-            ]
-        },
-        "type": {
-            "type": "string",
-            "title": "Variable Type",
-            "description": "A classification allowing the user (analyst, researcher or computer) to know how to use the variable\n"
-        },
-        "format": {
-            "type": "string",
-            "title": "Variable Format",
-            "description": "Indicates the format of the type specified in the `type` property. This may describe the type of unit (such as for time fields like year or month) or the format of a date field (such as %y%m%d).\n"
-        },
-        "constraints": {
-            "type": "object",
-            "properties": {
-                "maxLength": {
-                    "type": "integer",
-                    "title": "Maximum Length",
-                    "description": "Indicates the maximum length of an iterable (e.g., array, string, or object). For example, if 'Hello World' is the longest value of a categorical variable, this would be a maxLength of 11.\n"
-                },
-                "enum": {
-                    "type": "array",
-                    "title": "Variable Possible Values",
-                    "description": "Constrains possible values to a set of values.\n"
-                },
-                "pattern": {
-                    "type": "string",
-                    "title": "Regular Expression Pattern",
-                    "description": "A regular expression pattern the data MUST conform to.\n"
-                },
-                "maximum": {
-                    "type": "integer",
-                    "title": "Maximum Value",
-                    "description": "Specifies the maximum value of a field (e.g., maximum -- or most recent -- date, maximum integer etc). Note, this is different then maxLength property.\n"
-                },
-                "minimum": {
-                    "type": "integer",
-                    "title": "Minimum Value",
-                    "description": "Specifies the minimum value of a field (e.g., miniimum -- or oldest -- date, minimum integer etc).\n"
-                }
-            }
-        },
-        "encoding": {
-            "type": "object",
-            "title": "Variable Value Encodings (i.e., mappings; value labels)",
-            "description": "Encodings (and mappings) allow categorical values to be stored as numerical values. IMPORTANT: the ==key should be the value represented IN the data== and the ==value should be the to-be-mapped label==. Many analytic software programs use numerical encodings and some algorithms only support numerical values. Additionally, this field provides a way to store categoricals that are stored as  \"short\" labels (such as abbreviations)\n",
-            "examples": [
-                "{0:'No',1:'Yes'}",
-                "{'HW':'Hello world','GBW':'Good bye world'}"
-            ]
-        },
-        "ordered": {
-            "type": "boolean",
-            "title": "An ordered variable",
-            "description": "Indicates whether a categorical variable is ordered. This variable  is relevant for variables that have an ordered relationship but not necessarily  a numerical relationship (e.g., Strongly disagree < Disagree < Neutral < Agree).\n"
-        },
-        "missingValues": {
-            "type": "array",
-            "title": "Missing Values",
-            "description": "A list of missing values specific to a variable."
-        },
-        "trueValues": {
-            "type": "array",
-            "title": "Boolean True Value Labels",
-            "description": "For boolean (true) variable (as defined in type field), this field allows a physical string representation to be cast as true (increasing readability of the field)\n",
-            "items": {
-                "type": "string"
-            },
-            "examples": [
-                "Yes",
-                "1",
-                "True",
-                "true",
-                "Correct"
-            ]
-        },
-        "falseValues": {
-            "type": "array",
-            "title": "Boolean False Value Labels",
-            "description": "For boolean (false) variable (as defined in type field), this field allows a physical string representation to be cast as false (increasing readability of the field)\n",
-            "items": {
-                "type": "string"
-            },
-            "examples": [
-                "No",
-                "0",
-                "False",
-                "false",
-                "Incorrect"
-            ]
-        },
-        "repo_link": {
-            "type": "string",
-            "title": "Variable Repository Link",
-            "description": "A link to the variable as it exists on the home repository, if applicable\n"
-        },
-        "cde_id": {
-            "type": "array",
-            "title": "Common Data Element Id",
-            "description": "The source and id for the NIH Common Data Elements program.",
-            "items": {
-                    "type": "object",
-                    "properties": {
-                        "source": {
-                            "type": "string"
-                        },
-                        "id": {
-                            "type": "string"
-                        }
-                    }
-                }
-        },
-        "ontology_id": {
-            "type": "array",
-            "title": "Ontology ID",
-            "description": "Ontological information for the given variable as indicated  by the source, id, and relation to the specified classification. One or more ontology classifications can be specified. \n",
-            "items": {
-                    "type": "object",
-                    "properties": {
-                        "relation": {
-                            "type": "string"
-                        },
-                        "source": {
-                            "type": "string"
-                        },
-                        "id": {
-                            "type": "string"
-                        }
-
-                    }
-                }
-        },
-        "univar_stats": {
-            "type": "object",
-            "properties": {
-                "median": {
-                    "type": "number"
-                },
-                "mean": {
-                    "type": "number"
-                },
-                "std": {
-                    "type": "number"
-                },
-                "min": {
-                    "type": "number"
-                },
-                "max": {
-                    "type": "number"
-                },
-                "mode": {
-                    "type": "number"
-                },
-                "count": {
-                    "type": "integer",
-                    "minimum": 0
-                },
-                "twenty_five_percentile": {
-                    "type": "number"
-                },
-                "seventy_five_percentile": {
-                    "type": "number"
-                },
-                "cat_marginals": {
-                    "type": "array",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "name": {
-                                "type": "string"
-                            },
-                            "count": {
-                                "type": "integer"
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/vlmd-submission-tools/vlmd_submission_tools/common/mapping_utils.py b/vlmd-submission-tools/vlmd_submission_tools/common/mapping_utils.py
deleted file mode 100644
index 3b97e658..00000000
--- a/vlmd-submission-tools/vlmd_submission_tools/common/mapping_utils.py
+++ /dev/null
@@ -1,163 +0,0 @@
-'''
-contains mappings (both lambda functions or column mappings)
-'''
-from vlmd_submission_tools.common import schemas
-
-# split array columns
-def split_str_array(string,sep='|'):
-    if string:
-        return [s.strip() for s in string.split(sep)]
-    else:
-        return None
-
-# if object within array, assign to properties
-def map_keys_vals(keys,vals):
-    ''' zips two lists of the same size as
-    a dictionary
-    '''
-    return dict(zip(keys,vals))
-
-
-def split_and_map(string,prop):
-    '''
-    splits nested stringified delimited lists
-    (delimiters being | for outer and = for inner)
-    and zips/maps each of the inner lists to a set
-    of values (right now keys of a dictionary)
-    TODO: rename function split_and_map_to_keys
-    TODO: generalize to more than keys
-
-    '''
-    if string:
-        keys = prop['items']['properties'].keys()
-        return [
-            map_keys_vals(keys,split_str_array(x,sep='='))
-            for x in split_str_array(string,sep='|')
-        ]
-    else:
-        return None
-
-
-def loads_dict(string,item_sep='|',key_val_sep='='):
-    if string:
-        return dict([split_str_array(s,key_val_sep)
-            for s in split_str_array(string,item_sep)])
-
-
-def convert_rec_to_json(field):
-    '''
-    converts a flattened dictionary to a nested dictionary
-    based on JSON path dot notation indicating nesting
-    '''
-    # print(f"Working on field {field}")
-    field_json = {}
-    for prop_path,prop in field.items():
-        if prop:
-            # initiate the prop to be added with the entire
-            # field
-            prop_json = field_json
-            # get the inner most dictionary item of the jsonpath
-            nested_names = prop_path.split('.')
-            for i,prop_name in enumerate(nested_names):
-                is_last_nested = i+1==len(nested_names)
-                if prop_json.get(prop_name) and not is_last_nested:
-                    prop_json = prop_json[prop_name]
-                # if no object currently
-                elif not is_last_nested:
-                    prop_json[prop_name] = {}
-                    prop_json = prop_json[prop_name]
-                #assign property to inner most item
-                else:
-                    prop_json[prop_name] = prop
-
-    return field_json
-
-
-def mapval(v,mapping):
-    v = str(v)
-    if v in mapping:
-        return mapping[v]
-    else:
-        return v
-
-
-def to_bool(v):
-    if v.lower() in true_values:
-        return True
-    elif v.lower() in false_values:
-        return False
-    else:
-        return ""
-
-
-typemap = {
-    #from bacpac
-    'text':'string',
-    'float':'number',
-    #from hemo
-    'NUM':'number',
-    'CHAR':'string'
-}
-
-
-formatmap = {
-    'ISO8601':'' # NOTE: this is the default date format for frictionless so not necessary to specify
-}
-
-
-props = schemas.heal['data_dictionary']['properties']
-    #mappings for array of dicts, arrays, and dicts
-
-
-true_values = ["true","1","yes","required","y"]
-false_values = ["false","0","no","not required","n"]
-
-
-fieldmap = {
-    'constraints.enum': lambda v: split_str_array(v),
-    # 'constraints.maximum':int,
-    # 'constraints.minimum':int, #TODO:need to add to schema
-    # 'constraints.maxLength':int,
-    'cde_id': lambda v: split_and_map(v, props['cde_id']),
-    'ontology_id': lambda v: split_and_map(v, props['ontology_id']),
-    'encoding':lambda v: loads_dict(v),
-    'format': lambda v: mapval(v,formatmap),
-    'type':lambda v: mapval(v,typemap),
-    #'univar_stats.cat_marginals':lambda v: split_and_map(v, prop['univar_stats']['cat_marginals']),
-    'missingValues':lambda v: split_str_array(v),
-    'trueValues': lambda v: split_str_array(v),
-    'falseValues':lambda v: split_str_array(v),
-    # 'constraints.required': lambda v: to_bool(v),
-    # TODO: add stats
-}
-
-
-# join mappings for json to csv
-
-def join_iter(iterable,sep_list="|"):
-    return sep_list.join([str(p) for p in iterable])
-
-
-def join_dictvals(dictionary:dict,sep:str):
-    return sep.join(dictionary.values())
-
-
-def join_dictitems(dictionary:dict,sep_keyval='=',sep_items='|'):
-    dict_list = [key+sep_keyval+val for key,val in dictionary.items()]
-    return sep_items.join(dict_list)
-
-
-joinmap = {
-    'constraints.enum': join_iter,
-    'cde_id': join_dictvals,
-    'ontology_id': join_dictvals,
-    'encodings': join_dictitems,
-    'missingValues':join_iter,
-    'trueValues': join_iter,
-    'falseValues':join_iter,
-    # TODO: add stats
-}
-
-
-def join_prop(propname,prop):
-    return joinmap[propname](prop) if propname in joinmap else prop
diff --git a/vlmd-submission-tools/vlmd_submission_tools/common/schemas.py b/vlmd-submission-tools/vlmd_submission_tools/common/schemas.py
deleted file mode 100644
index 50c1e393..00000000
--- a/vlmd-submission-tools/vlmd_submission_tools/common/schemas.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from pathlib import Path
-from frictionless import Schema
-import json
-import jsonschema
-
-# TODO: use data_dictionary.json
-# TODO: output informative error messages in validation
-# NOTE: would it be good to also have a frictionless CSV template with regexs?...may be easier to spot text errors?
-
-# can change to request.get(github)
-with open('vlmd_submission_tools/common/fields.json') as f:
-    data = json.load(f)
-
-heal = {
-    'data_dictionary': data
-}
-
-schema = {
-    'type':'object',
-    'required':[
-        'title',
-        'data_dictionary'
-    ],
-    'properties':{
-        'title':{'type':'string'},
-        'description':{'type':'string'},
-        'data_dictionary':{'type':'array','items':heal['data_dictionary']}
-    }
-}

From df3181219145bba98b6aca268c2a394f591db700 Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Mon, 10 Jun 2024 14:28:19 -0700
Subject: [PATCH 2/8] (hp-1483): add new test templates

---
 .../templates/template_submission_invalid.csv  |  8 ++++++++
 .../templates/template_submission_invalid.json | 16 ++++++++++++++++
 .../templates/template_submission_invalid.tsv  |  7 +++++++
 .../templates/template_submission_small.csv    |  3 +++
 .../templates/template_submission_small.json   | 18 ++++++++++++++++++
 .../templates/template_submission_small.tsv    |  3 +++
 6 files changed, 55 insertions(+)
 create mode 100644 vlmd-submission-tools/tests/templates/template_submission_invalid.csv
 create mode 100644 vlmd-submission-tools/tests/templates/template_submission_invalid.json
 create mode 100644 vlmd-submission-tools/tests/templates/template_submission_invalid.tsv
 create mode 100644 vlmd-submission-tools/tests/templates/template_submission_small.csv
 create mode 100644 vlmd-submission-tools/tests/templates/template_submission_small.json
 create mode 100644 vlmd-submission-tools/tests/templates/template_submission_small.tsv

diff --git a/vlmd-submission-tools/tests/templates/template_submission_invalid.csv b/vlmd-submission-tools/tests/templates/template_submission_invalid.csv
new file mode 100644
index 00000000..436dc924
--- /dev/null
+++ b/vlmd-submission-tools/tests/templates/template_submission_invalid.csv
@@ -0,0 +1,8 @@
+name,description,type
+participant_id,Unique identifier for participant,character
+race,Self-reported race,integer
+,,integer
+hispanic,"Are you of Hispanic, Latino, or Spanish origin?",boolean
+sex_at_birth,The self-reported sex of the participant/subject at birth,
+SU4,During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use],integer
+pulse_rate,Heart rate measured at systemic artery,number
diff --git a/vlmd-submission-tools/tests/templates/template_submission_invalid.json b/vlmd-submission-tools/tests/templates/template_submission_invalid.json
new file mode 100644
index 00000000..198cd029
--- /dev/null
+++ b/vlmd-submission-tools/tests/templates/template_submission_invalid.json
@@ -0,0 +1,16 @@
+{
+    "title": "Example VLMD",
+    "description": "This is an example",
+    "fields": [
+        {
+            "name": "participant_id",
+            "description": "Unique identifier for participant",
+            "type": "character"
+        },
+        {
+            "name": "race",
+            "description": "Self-reported race",
+            "type": "integer"
+        }
+    ]
+}
diff --git a/vlmd-submission-tools/tests/templates/template_submission_invalid.tsv b/vlmd-submission-tools/tests/templates/template_submission_invalid.tsv
new file mode 100644
index 00000000..1129cf95
--- /dev/null
+++ b/vlmd-submission-tools/tests/templates/template_submission_invalid.tsv
@@ -0,0 +1,7 @@
+name	title	description	type
+participant_id	Participant Id	Unique identifier for participant	character
+race	Race	Self-reported race	integer
+age	Age	What is your age? (age at enrollment)	integer
+hispanic	"Hispanic	 Latino	 or Spanish Origin"	"Are you of Hispanic	 Latino	 or Spanish origin?"	boolean
+sex_at_birth	Sex at Birth	The self-reported sex of the participant/subject at birth	string
+SU4	Heroin Days Used	During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use]	integer
diff --git a/vlmd-submission-tools/tests/templates/template_submission_small.csv b/vlmd-submission-tools/tests/templates/template_submission_small.csv
new file mode 100644
index 00000000..370e1a41
--- /dev/null
+++ b/vlmd-submission-tools/tests/templates/template_submission_small.csv
@@ -0,0 +1,3 @@
+module,name,title,description,type,format,
+,participant_id,Participant Id,Unique identifier for participant,string,,
+,race,Race,Self-reported race,integer,,
diff --git a/vlmd-submission-tools/tests/templates/template_submission_small.json b/vlmd-submission-tools/tests/templates/template_submission_small.json
new file mode 100644
index 00000000..4a7d0430
--- /dev/null
+++ b/vlmd-submission-tools/tests/templates/template_submission_small.json
@@ -0,0 +1,18 @@
+{
+    "title": "Example VLMD",
+    "description": "This is an example",
+    "fields": [
+        {
+            "name": "participant_id",
+            "title": "Participant Id",
+            "description": "Unique identifier for participant",
+            "type": "string"
+        },
+        {
+            "name": "race",
+            "title": "Race",
+            "description": "Self-reported race",
+            "type": "integer"
+        }
+    ]
+}
diff --git a/vlmd-submission-tools/tests/templates/template_submission_small.tsv b/vlmd-submission-tools/tests/templates/template_submission_small.tsv
new file mode 100644
index 00000000..b47a98eb
--- /dev/null
+++ b/vlmd-submission-tools/tests/templates/template_submission_small.tsv
@@ -0,0 +1,3 @@
+name	title	description	type
+participant_id	Participant Id	Unique identifier for participant	string
+race	Race	Self-reported race	integer

From a29e937d7bf9f259d6796d80c04df154f7be54af Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Mon, 10 Jun 2024 14:29:30 -0700
Subject: [PATCH 3/8] (HP-1483): update submission tools

---
 .../vlmd_submission_tools/common/utils.py     |   4 +-
 .../read_and_validate_dictionary.py           | 188 ++++++++++--------
 .../subcommands/upload_dictionary_to_mds.py   |  22 +-
 3 files changed, 118 insertions(+), 96 deletions(-)

diff --git a/vlmd-submission-tools/vlmd_submission_tools/common/utils.py b/vlmd-submission-tools/vlmd_submission_tools/common/utils.py
index 794aecf8..62fb1280 100644
--- a/vlmd-submission-tools/vlmd_submission_tools/common/utils.py
+++ b/vlmd-submission-tools/vlmd_submission_tools/common/utils.py
@@ -59,9 +59,9 @@ def check_mds_study_id(study_id, hostname=config.HOST_NAME):
     if response.json().get("_guid_type") != "discovery_metadata":
         raise ValueError("Study ID is not dicovery metadata")
 
-    existing_data_dictionaries = response.json().get("data_dictionaries", {})
+    existing_vlmd = response.json().get("variable_level_metadata", {})
 
-    return existing_data_dictionaries
+    return existing_vlmd
 
 
 def get_client_token(hostname: str, client_id: str, client_secret: str):
diff --git a/vlmd-submission-tools/vlmd_submission_tools/subcommands/read_and_validate_dictionary.py b/vlmd-submission-tools/vlmd_submission_tools/subcommands/read_and_validate_dictionary.py
index 38bcb763..2e76b124 100644
--- a/vlmd-submission-tools/vlmd_submission_tools/subcommands/read_and_validate_dictionary.py
+++ b/vlmd-submission-tools/vlmd_submission_tools/subcommands/read_and_validate_dictionary.py
@@ -7,20 +7,14 @@
 
 from argparse import ArgumentParser, Namespace
 import json
-import jsonschema
-import os
 import traceback
 from urllib.parse import unquote
 
-from frictionless import Resource, FrictionlessException
-import petl as etl
 import requests
+from healdata_utils import validate_vlmd_csv, validate_vlmd_json
+from healdata_utils.conversion import convert_to_vlmd
 
 from vlmd_submission_tools.common.logger import Logger
-from vlmd_submission_tools.common import config
-from vlmd_submission_tools.common import mapping_utils
-from vlmd_submission_tools.common import utils
-from vlmd_submission_tools.common import schemas
 from vlmd_submission_tools.subcommands import Subcommand
 
 
@@ -76,9 +70,8 @@ def __get_description__(cls) -> str:
         """
         return (
             "Takes a presigned url and fetches the data dictionary. "
-            "Converts any csv/tsv to json and saves to local file system. "
-            "Validates the dictionary against the provided schema. "
-            "Writes JSON output with json_local_path and is_valid_dictionary."
+            "Validates the dictionary against the healdata-utils schema. "
+            "Writes JSON output with json_local_path and validation report. "
         )
 
     @classmethod
@@ -95,89 +88,107 @@ def main(cls, options: Namespace) -> None:
 
         file_type = cls._get_file_type_from_filename(options.file_name)
         json_local_path = options.json_local_path
+        local_path = None
+        is_valid_dictionary = None
+        errors_list = None
 
-        # pull in schema
-        schema = schemas.heal['data_dictionary']
-        data_dictionary_props = schema['properties']
-        data_dictionary = {"title": "dictionary title"}
-        mappings = mapping_utils.fieldmap
+        # download from url and save local copy
+        try:
+            local_path = cls._download_from_url(file_type, dictionary_url, json_local_path)
+            if local_path:
+                logger.info(f"Data dictionary saved in {local_path}")
+
+        except Exception as e:
+            logger.error(f"Could not read dictionary from url {dictionary_url}")
+            logger.error(e)
+            logger.error(f"Exception type = {type(e)}")
+            return
+
+        # get validation report with healdata-utils.validate_vlmd
+        logger.info(f"Getting validation report for {local_path}")
+        try:
+            if file_type == 'json':
+                result = validate_vlmd_json(local_path)
+            elif file_type == 'csv' or file_type == 'tsv':
+                result = validate_vlmd_csv(local_path)
+            validation_report = result.get('report')
+
+            is_valid_dictionary = validation_report.get('valid')
+            errors_list = validation_report.get('errors')
+        except Exception as e:
+            logger.error(f"Error in validation: {e}")
+
+        logger.info(f"Valid dictionary = {is_valid_dictionary}")
+        logger.info(f"Errors from validation report = {errors_list}")
 
-        logger.info(f"Fetching dictionary from s3 url.")
+        # convert csv to json for uploading to MDS.
         if file_type == 'csv' or file_type == 'tsv':
-            try:
-                source = Resource(dictionary_url)
-                source = source.to_petl()
-
-                logger.info(f"Converting {file_type} file to json")
-                logger.info(f"Column names in petl: {source.fieldnames()}")
-                fields_to_add = [
-                    (field,'')
-                    for field in mappings.keys()
-                    if not field in source.fieldnames()
-                ]
-                template_tbl = (
-                    source
-                    .addfields(fields_to_add) # add fields from mappings not in the csv template to allow convert fxns to work
-                    .convert(mappings)
-                    .convertnumbers()
-                    .cut(source.fieldnames()) # want to include only fields in csv
-                )
-            except FrictionlessException:
-                is_valid_dictionary = False
-                traceback.print_exc()
-                raise FrictionlessException(f"Frictionless could not read dictionary from url {dictionary_url}")
-            except:
-                is_valid_dictionary = False
-                traceback.print_exc()
-                raise Exception(f"Could not read dictionary from url {dictionary_url}")
-
-            try:
-                data_dictionary['data_dictionary'] = [mapping_utils.convert_rec_to_json(rec) for rec in etl.dicts(template_tbl)]
-            except:
-                is_valid_dictionary = False
-                traceback.print_exc()
-                raise Exception(f"Could not convert {file_type} to json")
-        else:
-            # JSON format is read directly without conversion
-            try:
-                response = requests.get(dictionary_url)
-                data_dictionary_json = response.text
-                data_dictionary = json.loads(data_dictionary_json)
-            except:
-                is_valid_dictionary = False
-                traceback.print_exc()
-                raise Exception(f"Could not read dictionary from url {dictionary_url}")
-
-        logger.info("Reading schema into schema_array")
-        schema_array = {
-            "$schema": "http://json-schema.org/draft-04/schema#",
-            "$id": "vlmd",
-            "title":"Variable Level Metadata (Data Dictionaries)",
-            "description": "This schema defines the variable level metadata for one data dictionary for a given study.Note a given study can have multiple data dictionaries",
-            "type": "array",
-            "items": schema
+            logger.info(f"Converting {file_type} to JSON")
+            props = {
+                "description": f"Json dictionary converted from {file_type}",
+                "title": "HEAL compliant variable level metadata dictionary"
+            }
+
+            vlmd_dict = convert_to_vlmd(
+                input_filepath = local_path,
+                data_dictionary_props = props,
+                inputtype = "csv-data-dict",
+            )
+            converted_json = vlmd_dict.get('jsontemplate')
+
+        # logger.info(f"Converted JSON is valid dictionary = convertis_valid_dictionary}")
+        # logger.info(f"Errors from validation report = {errors_list}")
+            logger.info(f"Errors = {vlmd_dict.get('errors')}")
+
+            with open(json_local_path, 'w', encoding='utf-8') as o:
+                json.dump(converted_json, o, ensure_ascii=False, indent=4)
+            logger.info(f"Converted JSON data dictionary saved in {json_local_path}")
+
+
+        report_json = {
+            "json_local_path": json_local_path,
+            "is_valid_dictionary": is_valid_dictionary,
+            "errors": errors_list
         }
+        # save the validation report artifact
+        with open(options.output, 'w', encoding='utf-8') as o:
+            json.dump(report_json, o, ensure_ascii=False, indent=4)
+        logger.info(f"Validation report saved in {options.output}")
+
 
-        logger.info("Validating dictionary.")
-        is_valid_dictionary = True
+    @classmethod
+    def _download_from_url(cls, file_type: str, url: str, json_local_path: str) -> str:
+        """
+        Sends a request to the url and saves data in the local_path
+
+        Args:
+            file_type (str): 'csv', 'tsv', 'json'
+            url (str): the url for the data dictionary
+            json_local_path (str): the path to the local copy, eg, '/tmp/vlmd/dict.json'
+
+        Returns:
+            path of saved contents, None if error in downloading.
+        """
+        local_path = None
         try:
-            jsonschema.validate(data_dictionary['data_dictionary'],schema=schema_array)
-        except:
-            is_valid_dictionary = False
-            traceback.print_exc()
-            raise Exception("Not a valid dictionary")
-        logger.info(f"Valid={is_valid_dictionary}")
-
-        # save the data dictionary
-        with open(json_local_path, 'w', encoding='utf-8') as o:
-            json.dump(data_dictionary, o, ensure_ascii=False, indent=4)
-        logger.info(f"JSON data dictionary saved in {json_local_path}")
-
-        # save the json_local_path and is_valid_dictionary output parameters
-        record_json = {"json_local_path": json_local_path, "is_valid_dictionary": is_valid_dictionary}
-        with open(options.output, 'w', encoding='utf-8') as o:
-            json.dump(record_json, o, ensure_ascii=False, indent=4)
-        logger.info(f"JSON response saved in {options.output}")
+            response = requests.get(url)
+            data_dictionary = response.text
+            if file_type == 'json':
+                data_dictionary = response.text
+                data_dictionary = json.loads(data_dictionary)
+                with open(json_local_path, 'w', encoding='utf-8') as f:
+                    json.dump(data_dictionary, f, ensure_ascii=False, indent=4)
+                return json_local_path
+            elif file_type == 'csv' or file_type == 'tsv':
+                data_dictionary = response.content
+                csv_local_path = json_local_path.replace('json', f"{file_type}")
+                with open(csv_local_path, 'wb') as f:
+                    f.write(data_dictionary)
+                return csv_local_path
+        except Exception as exc:
+            raise(exc)
+
+        return local_path
 
 
     @classmethod
@@ -190,4 +201,5 @@ def _get_file_type_from_filename(cls, file_name: str):
             file_type = 'tsv'
         else:
             raise Exception("Could not get file type suffix from filename")
+
         return file_type
diff --git a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
index a368c273..afe17453 100644
--- a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
@@ -89,9 +89,15 @@ def main(cls, options: Namespace) -> None:
         except:
             raise Exception("Could not read local json dictionary.")
 
+
         # verify that the submitted study-id exists in mds db
+        # TODO: decide if we want to handle exception here
         logger.info(f"Checking for study ID {options.study_id} in MDS")
-        existing_data_dictionaries = utils.check_mds_study_id(options.study_id, config.HOST_NAME)
+        vlmd_for_study = utils.check_mds_study_id(options.study_id, config.HOST_NAME)
+        logger.info(f"Existing vlmd = {vlmd_for_study}")
+        # if empty then fill in required key: 'data_dictionaries'
+        if vlmd_for_study.get('data_dictionaries') == None:
+            vlmd_for_study['data_dictionaries'] = {}
 
         # test the client token - maybe put this in a try statement.
         # get token for mds api call
@@ -107,7 +113,8 @@ def main(cls, options: Namespace) -> None:
         try:
             guid = str(uuid.uuid4())
             data = { "_guid_type": "data_dictionary",
-                    "data_dictionary": data_dictionary['data_dictionary']}
+                    "title": options.dictionary_name,
+                    "data_dictionary": data_dictionary}
             url = f"https://{config.HOST_NAME}/mds/metadata/{guid}"
             headers = {"Authorization": "bearer " + token, "content-type": "application/json"}
             response = requests.post(url, headers=headers, json=data)
@@ -122,14 +129,17 @@ def main(cls, options: Namespace) -> None:
         if response.status_code != 200 and response.status_code != 201:
             logger.error("Error in uploading dictionary to MDS")
 
-        # add this name and guid to the study ID metadata
+        # add this name and guid to the study ID variable level metadata
         logger.info(f"Adding dictionary_name '{options.dictionary_name}' to study ID = {options.study_id}")
 
         try:
-            existing_data_dictionaries[options.dictionary_name] = f"{guid}"
-            data = {"data_dictionaries": existing_data_dictionaries}
+            vlmd_for_study['data_dictionaries'][options.dictionary_name] = f"{guid}"
+            json_data = {
+                "variable_level_metadata": vlmd_for_study
+            }
+            # data = {"data_dictionaries": existing_data_dictionaries}
             url = f"https://{config.HOST_NAME}/mds/metadata/{options.study_id}?merge=True"
-            response = requests.put(url, headers=headers, json=data)
+            response = requests.put(url, headers=headers, json=json_data)
             response.raise_for_status()
             logger.info("Success")
         except:

From 1a0f702565fcad0f7c58700ba8653e14cafbc9a5 Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Mon, 10 Jun 2024 14:48:05 -0700
Subject: [PATCH 4/8] (HP-1483): update tests

---
 .secrets.baseline                             |  20 +-
 .../tests/test_common_utils.py                |   8 +-
 ...subcommand_read_and_validate_dictionary.py | 439 ++++++++++++++----
 ...est_subcommand_upload_dictionary_to_mds.py |  11 +-
 4 files changed, 368 insertions(+), 110 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index 72559e6f..38d485e5 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -1,9 +1,9 @@
 {
   "exclude": {
-    "files": "^.secrets.baseline$",
+    "files": null,
     "lines": null
   },
-  "generated_at": "2023-09-28T19:27:35Z",
+  "generated_at": "2024-06-10T21:46:21Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -442,14 +442,6 @@
         "type": "Base64 High Entropy String"
       }
     ],
-    "azlinux-jupyter-scipy/start.sh": [
-      {
-        "hashed_secret": "f98370d81077aed0aa3500bfc8de3f3e1dac52f6",
-        "is_verified": false,
-        "line_number": 133,
-        "type": "Secret Keyword"
-      }
-    ],
     "jupyter-geo/start.sh": [
       {
         "hashed_secret": "f98370d81077aed0aa3500bfc8de3f3e1dac52f6",
@@ -468,9 +460,9 @@
     ],
     "vlmd-submission-tools/poetry.lock": [
       {
-        "hashed_secret": "5b240644452ed40dfe194673b7db6b641971c720",
+        "hashed_secret": "e1df343623dcc5d44e3a7da0e09ee4b0c980b52f",
         "is_verified": false,
-        "line_number": 1221,
+        "line_number": 1519,
         "type": "Hex High Entropy String"
       }
     ],
@@ -478,7 +470,7 @@
       {
         "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
         "is_verified": false,
-        "line_number": 100,
+        "line_number": 102,
         "type": "Secret Keyword"
       }
     ],
@@ -494,7 +486,7 @@
       {
         "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
         "is_verified": false,
-        "line_number": 129,
+        "line_number": 134,
         "type": "Secret Keyword"
       }
     ]
diff --git a/vlmd-submission-tools/tests/test_common_utils.py b/vlmd-submission-tools/tests/test_common_utils.py
index 80adc927..bbb5ff3f 100644
--- a/vlmd-submission-tools/tests/test_common_utils.py
+++ b/vlmd-submission-tools/tests/test_common_utils.py
@@ -43,14 +43,16 @@ def test_check_mds_study_id(self, mocked_post):
         hostname = "mycommons.planx-pla.net"
         study_id = "my_study_id"
         expected_data_dictionaries = {
-            "my first dictionary": "guid1",
-            "my second dictionary": "guid2"
+            "data_dictionaries" : {
+                "my first dictionary": "guid1",
+                "my second dictionary": "guid2"
+            }
         }
         mock_mds_response = MagicMock(requests.Response)
         mock_mds_response.status_code = 200
         mock_mds_response.json.return_value = {
             "_guid_type": "discovery_metadata",
-            "data_dictionaries": expected_data_dictionaries
+            "variable_level_metadata": expected_data_dictionaries
         }
         mocked_post.return_value = mock_mds_response
 
diff --git a/vlmd-submission-tools/tests/test_subcommand_read_and_validate_dictionary.py b/vlmd-submission-tools/tests/test_subcommand_read_and_validate_dictionary.py
index 4cdb24bf..2c39f4c2 100644
--- a/vlmd-submission-tools/tests/test_subcommand_read_and_validate_dictionary.py
+++ b/vlmd-submission-tools/tests/test_subcommand_read_and_validate_dictionary.py
@@ -1,20 +1,22 @@
 """Tests for the ``vlmd_submission_tools.subcommands.ReadAndValidateDictionary`` subcommand"""
 import os
+import re
 from typing import NamedTuple
+from unittest import mock
 from unittest.mock import MagicMock, patch
 
-from frictionless import FrictionlessException
 import json
 from parameterized import parameterized
 from pathlib import Path
 import pytest
 import requests
-
-
+import requests_mock
 
 from vlmd_submission_tools.subcommands import ReadAndValidateDictionary
 from utils import cleanup_files
 
+DIR = Path(__file__).resolve().parent
+
 class MockArgs(NamedTuple):
     file_name: str
     json_local_path: str
@@ -22,15 +24,60 @@ class MockArgs(NamedTuple):
     output: str
 
 
-class TestReadAndValidateDictionarySubcommand:
+@pytest.fixture(scope="session")
+def download_dir(tmpdir_factory):
+    path = tmpdir_factory.mktemp("vlmd_download_dir")
+    return path
+
+
+@pytest.fixture
+def template_submission_json():
+    with open(Path(DIR, "templates/template_submission_small.json")) as f:
+        return json.load(f)
+
+
+@pytest.fixture
+def template_submission_invalid_json():
+    with open(Path(DIR, "templates/template_submission_invalid.json")) as f:
+        return json.load(f)
+
+
+@pytest.fixture
+def template_submission_csv():
+    with open(Path(DIR, "templates/template_submission_small.csv")) as f:
+        # return re.escape(f.read())
+        # return bytes(f.read(), 'utf-8')
+        return f.read()
+
+
+@pytest.fixture
+def template_submission_invalid_csv():
+    with open(Path(DIR, "templates/template_submission_invalid.csv")) as f:
+        return f.read()
 
 
-    def get_mock_args(self, file_name, dictionary_url):
+@pytest.fixture
+def template_submission_tsv():
+    with open(Path(DIR, "templates/template_submission_small.tsv")) as f:
+        # return re.escape(f.read())
+        # return bytes(f.read(), 'utf-8')
+        return f.read()
+
+
+@pytest.fixture
+def template_submission_invalid_tsv():
+    with open(Path(DIR, "templates/template_submission_invalid.tsv")) as f:
+        return f.read()
+
+
+class TestReadAndValidateDictionarySubcommand:
+
+    def get_mock_args(self, file_name, json_local_path, dictionary_url, output):
         return MockArgs(
             file_name=file_name,
-            json_local_path="test_dictionary.json",
+            json_local_path=json_local_path,
             dictionary_url=dictionary_url,
-            output="validate.json",
+            output=output,
         )
 
 
@@ -49,114 +96,326 @@ def test_get_file_type_from_filename_exception(self):
             ReadAndValidateDictionary._get_file_type_from_filename(bad_filename)
 
 
-    @parameterized.expand(["csv", "tsv"])
-    def test_read_and_validate_dictionary_csv(self, suffix):
-        # read valid csv/tsv dictionaries directly from file
-        args = self.get_mock_args(f"template_submission.{suffix}",f"tests/templates/template_submission.{suffix}")
-        expected_json = {
+    def test_download_from_url(
+            self,
+            template_submission_json,
+            template_submission_csv,
+            template_submission_tsv,
+            download_dir
+    ):
+
+        url = "https://some.url"
+        json_local_path = f"{download_dir}/test_dict.json"
+        with requests_mock.Mocker() as m:
+
+            file_type = "json"
+            # good data from url
+            m.get(url, json=template_submission_json)
+            result = ReadAndValidateDictionary._download_from_url(
+                file_type, url, json_local_path
+            )
+            assert result == json_local_path
+            assert os.path.exists(json_local_path)
+            # TODO: read file and assert that is equal to mock_data
+            with open(json_local_path, 'r') as f:
+                    downloaded_json = json.load(f)
+            assert downloaded_json == template_submission_json
+
+            file_type = "csv"
+            expected_local_path = json_local_path.replace('json', f"{file_type}")
+            m.get(
+                url,
+                content = bytes(template_submission_csv, 'utf-8')
+            )
+
+            result = ReadAndValidateDictionary._download_from_url(
+                file_type, url, json_local_path
+            )
+            assert result == expected_local_path
+            assert os.path.exists(expected_local_path)
+            with open(expected_local_path, 'r') as f:
+                    downloaded_csv = f.read()
+            assert downloaded_csv == template_submission_csv
+
+            file_type = "tsv"
+            expected_local_path = json_local_path.replace('json', f"{file_type}")
+            m.get(
+                url,
+                content = bytes(template_submission_tsv, 'utf-8')
+            )
+
+            result = ReadAndValidateDictionary._download_from_url(
+                file_type, url, json_local_path
+            )
+            assert result == expected_local_path
+            assert os.path.exists(expected_local_path)
+            with open(expected_local_path, 'r') as f:
+                    downloaded_tsv = f.read()
+            assert downloaded_tsv == template_submission_tsv
+
+
+    def test_download_from_url_failures(self, download_dir):
+
+        file_type = "json"
+        url = "https://some.url"
+        json_local_path = f"{download_dir}/test_dict.json"
+        csv_local_path = json_local_path.replace('json', 'csv')
+        if os.path.exists(json_local_path):
+            Path(json_local_path).unlink()
+
+        with requests_mock.Mocker() as m:
+            # bad url - request throws exception
+            output_path = None
+            m.get(url, exc=requests.HTTPError('Mocked HTTP Error'))
+            expected_error=f"Mocked HTTP Error"
+            with pytest.raises(Exception, match=expected_error):
+                output_path = ReadAndValidateDictionary._download_from_url(
+                file_type, url, json_local_path
+            )
+            assert output_path == None
+            assert os.path.exists(json_local_path) == False
+
+            # have a good url but a bad json_local_path
+            output_path = None
+            bad_local_path = "/does/not/exist.json"
+            csv_local_path = bad_local_path.replace('json', 'csv')
+            mock_data = {"title": "test json data"}
+            m.get(url, json=mock_data)
+            expected_error = re.escape(f"[Errno 2] No such file or directory: '{bad_local_path}'")
+            with pytest.raises(Exception, match=expected_error):
+                output_path = ReadAndValidateDictionary._download_from_url(
+                file_type, url, bad_local_path
+            )
+            # no output file, no converted json, no original csv
+            assert output_path == None
+            assert os.path.exists(json_local_path) == False
+            assert os.path.exists(csv_local_path) == False
+
+
+    def test_read_and_validate_dictionary_json(self, template_submission_json, download_dir):
+        # read valid json dictionary
+        json_file_name = "template_submission_small.json"
+        path_to_input_dict = f"tests/templates/{json_file_name}"
+        args = self.get_mock_args(
+            file_name=json_file_name,
+            json_local_path=f"{download_dir}/test_dictionary.json",
+            dictionary_url="https://some.url",
+            output=f"{download_dir}/validate_artifact.json",
+        )
+        expected_validation_report = {
             "json_local_path": args.json_local_path,
-            "is_valid_dictionary": True
+            "is_valid_dictionary": True,
+            "errors": []
         }
 
+
         try:
-            ReadAndValidateDictionary.main(options=args)
-
-            # The converted json dictionary
-            assert Path(args.json_local_path).resolve().is_file()
-            with open(args.json_local_path, 'r') as fh:
-                converted_json = json.load(fh)
-            assert "title" in converted_json
-            assert "data_dictionary" in converted_json
-
-            # The output json for subcommand
-            with open(args.output, 'r') as fh:
-                result_json = json.load(fh)
-            assert json.dumps(result_json) == json.dumps(expected_json)
+            with requests_mock.Mocker() as m:
+                m.get(
+                    args.dictionary_url,
+                    text=json.dumps(template_submission_json)
+                )
+
+                ReadAndValidateDictionary.main(options=args)
+
+                # downloaded json dict is saved in 'json_local_path'.
+                assert Path(args.json_local_path).resolve().is_file()
+                with open(args.json_local_path, 'r') as fh:
+                    downloaded_json = json.load(fh)
+                assert "fields" in downloaded_json
+                assert downloaded_json == template_submission_json
+
+                # The output validation report json for subcommand
+                with open(args.output, 'r') as fh:
+                    validation_report = json.load(fh)
+                assert validation_report == expected_validation_report
         finally:
             cleanup_files([args.json_local_path, args.output])
 
 
-    @parameterized.expand(["csv"])
-    def test_read_and_validate_dictionary_csv_invalid_dictionary(self, suffix):
-        # read valid csv/tsv dictionaries directly from file
-        args = self.get_mock_args(f"template_submission_bad_format.{suffix}",f"tests/templates/template_submission_bad_format.{suffix}")
+    def test_read_and_validate_dictionary_bad_url(self):
+        args = self.get_mock_args(
+            file_name="some_template.json",
+            json_local_path=f"{download_dir}/test_dictionary.json",
+            dictionary_url="https://some.url",
+            output=f"{download_dir}/validate_artifact.json",
+        )
+        with requests_mock.Mocker() as m:
+            m.get(
+                args.dictionary_url,
+                text="404 file not found",
+                status_code = 404
+            )
 
-        # Exception from bad input file
-        expected_error="Not a valid dictionary"
-        with pytest.raises(Exception, match=expected_error):
             ReadAndValidateDictionary.main(args)
-        assert os.path.exists(args.output) == False
+            assert os.path.exists(args.json_local_path) == False
+            assert os.path.exists(args.output) == False
 
 
-    @parameterized.expand(["csv", "tsv"])
-    def test_read_and_validate_dictionary_csv_does_not_exist(self, suffix):
-        # read valid csv/tsv dictionaries directly from file
-        args = self.get_mock_args(f"dict_does_not_exist.{suffix}",f"tests/templates/dict_does_not_exist.{suffix}")
+    def test_read_and_validate_dictionary_bad_local_path(
+        self, template_submission_json, download_dir
+    ):
+        args = self.get_mock_args(
+            file_name="some_template.json",
+            json_local_path="/does/not/exist",
+            dictionary_url="https://some.url",
+            output=f"{download_dir}/validate_artifact.json",
+        )
+        with requests_mock.Mocker() as m:
+            m.get(
+                args.dictionary_url,
+                text=json.dumps(template_submission_json)
+            )
 
-        try:
-            # Exception from bad input file
-            expected_error=f"Frictionless could not read dictionary from url {args.dictionary_url}"
-            with pytest.raises(FrictionlessException, match=expected_error):
-                ReadAndValidateDictionary.main(args)
+            ReadAndValidateDictionary.main(args)
             assert os.path.exists(args.json_local_path) == False
             assert os.path.exists(args.output) == False
 
-        finally:
-            cleanup_files([args.json_local_path, args.output])
-
 
-    # JSON test will need mock of requests.get so that is reads the file from disk.
-    @patch('requests.get')
-    def test_read_and_validate_dictionary_json(self, mocked_request):
-        # read valid json dictionary
-        json_file_name = "template_submission_minimal.json"
-        path_to_input_dict = f"tests/templates/{json_file_name}"
-        args = self.get_mock_args(json_file_name,path_to_input_dict)
-        expected_json = {
+    def test_read_and_validate_dictionary_json_invalid_dictionary(
+        self, template_submission_invalid_json, download_dir
+    ):
+        args = self.get_mock_args(
+            file_name=f"template_submission_invalid.json",
+            json_local_path=f"{download_dir}/test_dict.json",
+            dictionary_url="https://some.url",
+            output=f"{download_dir}/validate_artifact.json",
+        )
+        expected_validation_report = {
             "json_local_path": args.json_local_path,
-            "is_valid_dictionary": True
+            "is_valid_dictionary": False,
+            "errors": [{
+                'json_path': '$.fields[0].type',
+                'message': "'character' is not one of ['number', 'integer', 'string', 'any', 'boolean', 'date', 'datetime', 'time', 'year', 'yearmonth', 'duration', 'geopoint']"
+            }]
         }
-        # mock pre-signed url response by reading from local test file
-        with open(path_to_input_dict, 'r') as fh:
-            input_dict_json = json.load(fh)
-        mocked_request.return_value.text = json.dumps(input_dict_json)
 
         try:
-            ReadAndValidateDictionary.main(options=args)
-
-            # when input dict is json then converted json is the same.
-            assert Path(args.json_local_path).resolve().is_file()
-            with open(args.json_local_path, 'r') as fh:
-                converted_json = json.load(fh)
-            assert "title" in converted_json
-            assert "data_dictionary" in converted_json
-            assert json.dumps(converted_json) == json.dumps(input_dict_json)
-
-            # The output json for subcommand
-            with open(args.output, 'r') as fh:
-                result_json = json.load(fh)
-            assert json.dumps(result_json) == json.dumps(expected_json)
+            with requests_mock.Mocker() as m:
+                m.get(
+                    args.dictionary_url,
+                    text=json.dumps(template_submission_invalid_json)
+                )
+                ReadAndValidateDictionary.main(options=args)
+                # we should have our downloaded json file
+                assert Path(args.json_local_path).resolve().is_file()
+                # the validation report should show errors
+                assert Path(args.output).resolve().is_file()
+                with open(args.output, 'r') as fh:
+                    validation_report = json.load(fh)
+                assert validation_report == expected_validation_report
         finally:
             cleanup_files([args.json_local_path, args.output])
 
 
-    @patch('requests.get')
-    def test_read_and_validate_dictionary_json_does_not_exist(self, mocked_request):
-        # read valid csv/tsv dictionaries directly from file
-        args = self.get_mock_args(f"dict_does_not_exist.json",f"https://tests/templates/dict_does_not_exist.json")
+    @pytest.mark.parametrize(
+        "suffix",
+        ["csv", "tsv"]
+    )
+    def test_read_and_validate_dictionary_csv(
+        self,
+        template_submission_csv,
+        template_submission_tsv,
+        template_submission_json,
+        download_dir,
+        suffix
+    ):
+
+        args = self.get_mock_args(
+            file_name=f"template_submission.{suffix}",
+            json_local_path=f"{download_dir}/test_dictionary.json",
+            dictionary_url="https://some.url",
+            output=f"{download_dir}/validate_artifact.json",
+        )
+        expected_validation_report = {
+            "json_local_path": args.json_local_path,
+            "is_valid_dictionary": True,
+            "errors": []
+        }
+        expected_converted_json = template_submission_json
+        try:
+            with requests_mock.Mocker() as m:
+
+                if suffix == 'csv':
+                    m.get(
+                        args.dictionary_url,
+                        content = bytes(template_submission_csv, 'utf-8')
+                    )
+                elif suffix == 'tsv':
+                     m.get(
+                        args.dictionary_url,
+                        content = bytes(template_submission_tsv, 'utf-8')
+                    )
+
+                ReadAndValidateDictionary.main(options=args)
+
+                # we should have a file for unconverted data
+                expected_local_path = args.json_local_path.replace('json', f'{suffix}')
+                assert Path(expected_local_path).resolve().is_file()
+                # we should have a converted json dictionary
+                assert Path(args.json_local_path).resolve().is_file()
+                with open(args.json_local_path, 'r') as f:
+                    converted_json = json.load(f)
+                assert converted_json.get('fields') == expected_converted_json.get('fields')
+                # output validation report json artifact
+                with open(args.output, 'r') as f:
+                    validation_report = json.load(f)
+                assert validation_report == expected_validation_report
 
-        # pre-signed url returns 404
-        mocked_response = MagicMock(requests.Response)
-        mocked_response.status_code = 404
-        mocked_response.json.return_value = {
-            "error": "no record found",
+        finally:
+            cleanup_files([expected_local_path, args.json_local_path, args.output])
+
+
+    @pytest.mark.parametrize(
+        "suffix",
+        ["csv", "tsv"]
+    )
+    def test_read_and_validate_dictionary_csv_invalid_dictionary(
+        self,
+        template_submission_invalid_csv,
+        template_submission_invalid_tsv,
+        download_dir,
+        suffix
+    ):
+        args = self.get_mock_args(
+            file_name=f"template_submission_invalid.{suffix}",
+            json_local_path=f"{download_dir}/test_dict.json",
+            dictionary_url="https://some.url",
+            output=f"{download_dir}/validate_artifact.json",
+        )
+        expected_validation_report = {
+            "json_local_path": args.json_local_path,
+            "is_valid_dictionary": False,
+            "errors": [{
+                'json_path': '$[0].type',
+                'message': "'character' is not valid under any of the given schemas"
+            }]
         }
-        mocked_request.return_value = mocked_response
-        mocked_request.return_value.text = "404 file not found"
 
-        # Exception from bad input file
-        expected_error=f"Could not read dictionary from url {args.dictionary_url}"
-        with pytest.raises(Exception, match=expected_error):
-            ReadAndValidateDictionary.main(args)
-        assert os.path.exists(args.json_local_path) == False
-        assert os.path.exists(args.output) == False
+        try:
+            with requests_mock.Mocker() as m:
+                if suffix == 'csv':
+                    m.get(
+                        args.dictionary_url,
+                        content=bytes(template_submission_invalid_csv, 'utf-8')
+                    )
+                elif suffix == 'tsv':
+                     m.get(
+                        args.dictionary_url,
+                        content=bytes(template_submission_invalid_tsv, 'utf-8')
+                    )
+
+                ReadAndValidateDictionary.main(options=args)
+
+                # csv file should have been downloaded
+                csv_local_path = args.json_local_path.replace('json',f'{suffix}')
+                assert Path(csv_local_path).resolve().is_file()
+                assert Path(args.json_local_path).resolve().is_file()
+                # validation report should show errors
+                assert Path(args.output).resolve().is_file()
+                with open(args.output, 'r') as fh:
+                    validation_report = json.load(fh)
+                assert validation_report == expected_validation_report
+
+        finally:
+            cleanup_files([csv_local_path, args.json_local_path, args.output])
diff --git a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
index 52d3528b..b767afb9 100644
--- a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
@@ -39,8 +39,10 @@ def test_upload_dictionary_to_mds(self, mocked_mds_put, mocked_mds_post, mocked_
 
         args = self.get_mock_args()
         existing_data_dictionaries = {
-            "CVS baseline": "guid-1",
-            "JSON followup": "guid-2"
+            "data_dictionaries": {
+                "CVS baseline": "guid-1",
+                "JSON followup": "guid-2"
+            }
         }
         mocked_check_mds.return_value = existing_data_dictionaries
 
@@ -54,6 +56,7 @@ def test_upload_dictionary_to_mds(self, mocked_mds_put, mocked_mds_post, mocked_
         mocked_post_response.status_code = 200
         mocked_post_response.json.return_value = {
             "_guid_type": "data_dictionary",
+            "title": args.dictionary_name,
             "data_dictionary": json_dictionary
         }
         mocked_mds_post.return_value = mocked_post_response
@@ -66,7 +69,9 @@ def test_upload_dictionary_to_mds(self, mocked_mds_put, mocked_mds_post, mocked_
         new_metadata = {
             "_guid_type": "discovery_metadata",
             "gen3_discovery": "discovery_metadata",
-            "data_dictionaries": new_data_dictionaries
+            "variable_level_metadata": {
+                "data_dictionaries": new_data_dictionaries
+            }
         }
         mocked_put_response = MagicMock(requests.Response)
         mocked_put_response.status_code = 200

From 46357b74b1d32f45d4306467c5f1eb72147fb682 Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Fri, 14 Jun 2024 15:07:20 -0700
Subject: [PATCH 5/8] (HP-1483): update secrets.baseline

---
 .secrets.baseline                                 |  4 ++--
 .../test_subcommand_upload_dictionary_to_mds.py   | 12 +++++++++++-
 .../subcommands/upload_dictionary_to_mds.py       | 15 +++++++++++++++
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index 38d485e5..0f6a468a 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": null,
     "lines": null
   },
-  "generated_at": "2024-06-10T21:46:21Z",
+  "generated_at": "2024-06-14T21:59:54Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -486,7 +486,7 @@
       {
         "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
         "is_verified": false,
-        "line_number": 134,
+        "line_number": 136,
         "type": "Secret Keyword"
       }
     ]
diff --git a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
index b767afb9..d995aec9 100644
--- a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
@@ -15,16 +15,18 @@
 class MockArgs(NamedTuple):
     json_local_path: str
     dictionary_name: str
+    is_valid_dictionary: str
     study_id: str
     output: str
 
 
 class TestGetDictionaryUrlSubcommand:
 
-    def get_mock_args(self):
+    def get_mock_args(self,is_valid_dictionary='True'):
         return MockArgs(
             json_local_path="tests/templates/template_submission_minimal.json",
             dictionary_name="Minimal_json_dict",
+            is_valid_dictionary=is_valid_dictionary,
             study_id="my_study_id",
             output="upload_output.json",
         )
@@ -151,3 +153,11 @@ def test_upload_dictionary_to_mds_failed_update(self, mocked_mds_put, mocked_mds
         with pytest.raises(Exception, match=expected_error):
             UploadDictionaryToMds.main(options=args)
         assert os.path.exists(args.output) == False
+
+
+    def test_upload_dictionary_to_mds_invalid_dict(self):
+
+        args = self.get_mock_args(is_valid_dictionary='False')
+
+        UploadDictionaryToMds.main(options=args)
+        assert os.path.exists(args.output) == False
diff --git a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
index afe17453..11b2562c 100644
--- a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
@@ -41,6 +41,17 @@ def __add_arguments__(cls, parser: ArgumentParser) -> None:
             ),
         )
 
+        parser.add_argument(
+            "-v",
+            "--is_valid_dictionary",
+            required=False,
+            type=str,
+            default="True",
+            help=(
+                "Skip the upload if not True"
+            ),
+        )
+
         parser.add_argument(
             "-s",
             "--study_id",
@@ -81,6 +92,10 @@ def main(cls, options: Namespace) -> None:
         logger = Logger.get_logger(cls.__tool_name__())
         logger.info(cls.__get_description__())
 
+        if options.is_valid_dictionary.lower() != 'true':
+            logger.info("Skipping MDS upload. Dictionary is not valid.")
+            return
+
         # Read json dictionary from local path
         logger.info("Reading dictionary from local file system.")
         try:

From c1825e43216903b8272c6f2069edc44d02ef4a54 Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Fri, 14 Jun 2024 17:36:01 -0700
Subject: [PATCH 6/8] (HP-1483): write output artifact when skipping upload

---
 .../tests/test_subcommand_upload_dictionary_to_mds.py  | 10 +++++++++-
 .../subcommands/upload_dictionary_to_mds.py            | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
index d995aec9..dbdbfbc3 100644
--- a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
@@ -158,6 +158,14 @@ def test_upload_dictionary_to_mds_failed_update(self, mocked_mds_put, mocked_mds
     def test_upload_dictionary_to_mds_invalid_dict(self):
 
         args = self.get_mock_args(is_valid_dictionary='False')
+        expected_output =  {
+            "upload_status": None,
+            "dictionary_name": args.dictionary_name,
+            "mds_guid": None
+        }
 
         UploadDictionaryToMds.main(options=args)
-        assert os.path.exists(args.output) == False
+        assert os.path.exists(args.output)
+        with open(args.output, 'r') as fh:
+            result_json = json.load(fh)
+        assert result_json == expected_output
diff --git a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
index 11b2562c..d826c48e 100644
--- a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
@@ -94,6 +94,15 @@ def main(cls, options: Namespace) -> None:
 
         if options.is_valid_dictionary.lower() != 'true':
             logger.info("Skipping MDS upload. Dictionary is not valid.")
+            # save the upload_status, dictionary_name and MDS guid output parameters
+            record_json = {
+                "upload_status": None,
+                "dictionary_name": options.dictionary_name,
+                "mds_guid": None
+            }
+            with open(options.output, 'w', encoding='utf-8') as o:
+                json.dump(record_json, o, ensure_ascii=False, indent=4)
+            logger.info(f"JSON response saved in {options.output}")
             return
 
         # Read json dictionary from local path
@@ -104,7 +113,6 @@ def main(cls, options: Namespace) -> None:
         except:
             raise Exception("Could not read local json dictionary.")
 
-
         # verify that the submitted study-id exists in mds db
         # TODO: decide if we want to handle exception here
         logger.info(f"Checking for study ID {options.study_id} in MDS")

From 144a9169fb596ad64802da281fafe941fbe03572 Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Fri, 14 Jun 2024 17:38:35 -0700
Subject: [PATCH 7/8] (HP-1483): remove comment line

---
 .../subcommands/upload_dictionary_to_mds.py                      | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
index d826c48e..e389415d 100644
--- a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
@@ -114,7 +114,6 @@ def main(cls, options: Namespace) -> None:
             raise Exception("Could not read local json dictionary.")
 
         # verify that the submitted study-id exists in mds db
-        # TODO: decide if we want to handle exception here
         logger.info(f"Checking for study ID {options.study_id} in MDS")
         vlmd_for_study = utils.check_mds_study_id(options.study_id, config.HOST_NAME)
         logger.info(f"Existing vlmd = {vlmd_for_study}")

From dcc2faf5234698ee9cf5b73c3ca691f27345e341 Mon Sep 17 00:00:00 2001
From: George Thomas <george42@uchicago.edu>
Date: Mon, 17 Jun 2024 15:03:39 -0700
Subject: [PATCH 8/8] (HP-1483): update secrets.baseline

---
 .secrets.baseline                             |  4 +-
 ...est_subcommand_upload_dictionary_to_mds.py | 57 ++++++++++++++-----
 .../subcommands/upload_dictionary_to_mds.py   |  1 -
 3 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index 0f6a468a..244d80ac 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": null,
     "lines": null
   },
-  "generated_at": "2024-06-14T21:59:54Z",
+  "generated_at": "2024-06-17T22:02:51Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -486,7 +486,7 @@
       {
         "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
         "is_verified": false,
-        "line_number": 136,
+        "line_number": 163,
         "type": "Secret Keyword"
       }
     ]
diff --git a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
index dbdbfbc3..e9d8c12c 100644
--- a/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py
@@ -9,6 +9,7 @@
 import uuid
 
 from utils import cleanup_files
+from vlmd_submission_tools.common import config
 from vlmd_submission_tools.subcommands import UploadDictionaryToMds
 
 
@@ -31,22 +32,31 @@ def get_mock_args(self,is_valid_dictionary='True'):
             output="upload_output.json",
         )
 
-
     @patch('vlmd_submission_tools.common.utils.check_mds_study_id')
     @patch('vlmd_submission_tools.common.utils.get_client_secret')
     @patch('vlmd_submission_tools.common.utils.get_client_token')
     @patch('requests.post')
     @patch('requests.put')
-    def test_upload_dictionary_to_mds(self, mocked_mds_put, mocked_mds_post, mocked_client_token, mocked_client_secret, mocked_check_mds):
+    def test_upload_dictionary_to_mds(
+        self,
+        mocked_mds_put,
+        mocked_mds_post,
+        mocked_client_token,
+        mocked_client_secret,
+        mocked_check_mds,
+    ):
 
         args = self.get_mock_args()
-        existing_data_dictionaries = {
+
+        mock_new_guid = "bbf91e87-837d-4f36-88b8-96e83bd77e9a"
+        vlmd_for_study = {
             "data_dictionaries": {
                 "CVS baseline": "guid-1",
                 "JSON followup": "guid-2"
-            }
+            },
+            "common_data_elements" : {"foo": "bar"}
         }
-        mocked_check_mds.return_value = existing_data_dictionaries
+        mocked_check_mds.return_value = vlmd_for_study
 
         mocked_client_secret.return_value = {"client_id": "client_id", "client_secret": "client_secret"}
         mocked_client_token.return_value = "my_client_token"
@@ -56,24 +66,20 @@ def test_upload_dictionary_to_mds(self, mocked_mds_put, mocked_mds_post, mocked_
             json_dictionary = json.load(fh)
         mocked_post_response = MagicMock(requests.Response)
         mocked_post_response.status_code = 200
-        mocked_post_response.json.return_value = {
+        post_request_data = {
             "_guid_type": "data_dictionary",
             "title": args.dictionary_name,
             "data_dictionary": json_dictionary
         }
+        mocked_post_response.json.return_value = post_request_data
         mocked_mds_post.return_value = mocked_post_response
 
-        new_data_dictionaries = {
-            "CVS baseline": "guid-1",
-            "JSON followup": "guid-2",
-            args.dictionary_name: "guid-3"
-        }
+        # use a mock guid for a placeholder for the mock response
+        vlmd_for_study['data_dictionaries'][args.dictionary_name] = mock_new_guid
         new_metadata = {
             "_guid_type": "discovery_metadata",
             "gen3_discovery": "discovery_metadata",
-            "variable_level_metadata": {
-                "data_dictionaries": new_data_dictionaries
-            }
+            "variable_level_metadata": vlmd_for_study,
         }
         mocked_put_response = MagicMock(requests.Response)
         mocked_put_response.status_code = 200
@@ -82,15 +88,36 @@ def test_upload_dictionary_to_mds(self, mocked_mds_put, mocked_mds_post, mocked_
 
         try:
             UploadDictionaryToMds.main(options=args)
+
+            # output artifact should have the dictionary name and guid
             with open(args.output, 'r') as fh:
                 result_json = json.load(fh)
             assert result_json.get("upload_status") == "ok"
             assert result_json.get("dictionary_name") == args.dictionary_name
+            new_guid = result_json.get("mds_guid")
             try:
-                uuid.UUID(result_json.get("mds_guid"))
+                uuid.UUID(new_guid)
                 assert True
             except ValueError:
                 assert False
+
+            # check the post request with the data dictionary
+            mocked_mds_post.assert_called_with(
+                f'https://{config.HOST_NAME}/mds/metadata/{new_guid}',
+                headers={'Authorization': 'bearer my_client_token', 'content-type': 'application/json'},
+                json=post_request_data
+            )
+
+            # get the actual guid generated by update
+            vlmd_for_study['data_dictionaries'][args.dictionary_name] = new_guid
+
+            # check that request to update the study VLMD includes the updated VLMD
+            mocked_mds_put.assert_called_with(
+                f'https://{config.HOST_NAME}/mds/metadata/my_study_id?merge=True',
+                headers={'Authorization': 'bearer my_client_token', 'content-type': 'application/json'},
+                json={'variable_level_metadata': vlmd_for_study}
+            )
+
         finally:
             cleanup_files([args.output])
 
diff --git a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
index e389415d..f9e120a9 100644
--- a/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
+++ b/vlmd-submission-tools/vlmd_submission_tools/subcommands/upload_dictionary_to_mds.py
@@ -159,7 +159,6 @@ def main(cls, options: Namespace) -> None:
             json_data = {
                 "variable_level_metadata": vlmd_for_study
             }
-            # data = {"data_dictionaries": existing_data_dictionaries}
             url = f"https://{config.HOST_NAME}/mds/metadata/{options.study_id}?merge=True"
             response = requests.put(url, headers=headers, json=json_data)
             response.raise_for_status()