Skip to content

Commit

Permalink
fix bug Q2 ingestion
Browse files Browse the repository at this point in the history
  • Loading branch information
juansensio committed Nov 28, 2024
1 parent 01cfe4f commit 8b52d4e
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 45 deletions.
1 change: 1 addition & 0 deletions api/api/src/usecases/datasets/ingest_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import geopandas as gpd
import json
import shutil
import pystac

from .retrieve_dataset import retrieve_owned_dataset
from ...errors import DatasetVersionDoesNotExistError
Expand Down
10 changes: 9 additions & 1 deletion api/api/src/usecases/datasets/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,12 @@ def calculate(cls, catalog: Union[pystac.Catalog, str]) -> None:
# f"MLDatasetExtension does not apply to type '{type(catalog).__name__}'"
# )

print("hyola1")

catalog.make_all_asset_hrefs_relative()

print("hyola2")

try:
catalog.add_metric(cls._search_spatial_duplicates(catalog))
# catalog.add_metric(cls._get_classes_balance(catalog))
Expand All @@ -366,6 +372,8 @@ def calculate(cls, catalog: Union[pystac.Catalog, str]) -> None:
finally:
catalog.make_all_asset_hrefs_relative()

print("hyola")

try:
print("Validating and saving...")
catalog.validate()
Expand All @@ -374,7 +382,7 @@ def calculate(cls, catalog: Union[pystac.Catalog, str]) -> None:
destination
) # Remove the old catalog and replace it with the new one
catalog.set_root(catalog)
catalog.normalize_and_save(root_href=destination)
catalog.normalize_and_save(root_href=destination, catalog_type=pystac.CatalogType.SELF_CONTAINED)
except STACValidationError:
# Return full callback
traceback.print_exc()
Expand Down
2 changes: 1 addition & 1 deletion eotdl/eotdl/curation/stac/extensions/ml_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def calculate(cls, catalog: Union[pystac.Catalog, str]) -> None:
destination
) # Remove the old catalog and replace it with the new one
catalog.set_root(catalog)
catalog.normalize_and_save(root_href=destination)
catalog.normalize_and_save(root_href=destination, catalog_type=pystac.CatalogType.SELF_CONTAINED)
print("Success!")
except STACValidationError:
# Return full callback
Expand Down
3 changes: 2 additions & 1 deletion eotdl/eotdl/datasets/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from tqdm import tqdm
import json
import frontmatter
import pystac

from ..auth import with_auth
from .metadata import Metadata
Expand Down Expand Up @@ -129,7 +130,7 @@ def ingest_stac(stac_catalog, logger=None, user=None):
repo, files_repo = DatasetsAPIRepo(), FilesAPIRepo()
# load catalog
logger("Loading STAC catalog...")
df = STACDataFrame.from_stac_file(stac_catalog)
df = STACDataFrame.from_stac_file(stac_catalog) # assets are absolute for file ingestion
catalog = df[df["type"] == "Catalog"]
assert len(catalog) == 1, "STAC catalog must have exactly one root catalog"
dataset_name = catalog.id.iloc[0]
Expand Down
4 changes: 2 additions & 2 deletions eotdl/eotdl/repos/APIRepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

class APIRepo:
def __init__(self, url=None):
default_url = "https://api.eotdl.com/"
# default_url = "http://localhost:8010/"
# default_url = "https://api.eotdl.com/"
default_url = "http://localhost:8001/"
self.url = url if url else os.getenv("EOTDL_API_URL", default_url)

def format_response(self, response):
Expand Down
89 changes: 49 additions & 40 deletions tutorials/notebooks/04_q2_datasets.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -34,14 +44,13 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['EuroSAT-small-STAC',\n",
" 'EuroSAT-RGB-small-STAC',\n",
"['EuroSAT-RGB-small-STAC',\n",
" 'jaca_dataset_stac',\n",
" 'eurosat_rgb_dataset',\n",
" 'jaca_dataset_q2',\n",
Expand All @@ -56,7 +65,7 @@
" 'eurosat_rgb_stac_labels']"
]
},
"execution_count": 15,
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -69,26 +78,26 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating splits...\n",
"Total size: 27000\n",
"Train size: 21600\n",
"Test size: 2700\n",
"Validation size: 2700\n",
"Total size: 100\n",
"Train size: 80\n",
"Test size: 10\n",
"Validation size: 10\n",
"Generating Training split...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 21600/21600 [18:47<00:00, 19.16it/s]\n"
"100%|██████████| 80/80 [00:00<00:00, 4559.65it/s]\n"
]
},
{
Expand All @@ -102,7 +111,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 2700/2700 [02:27<00:00, 18.33it/s]\n"
"100%|██████████| 10/10 [00:00<00:00, 3770.16it/s]\n"
]
},
{
Expand All @@ -116,15 +125,28 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 2700/2700 [02:24<00:00, 18.63it/s]\n"
"100%|██████████| 10/10 [00:00<00:00, 3216.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Success on splits generation!\n",
"Validating and saving...\n",
"Validating and saving...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Success!\n"
]
}
Expand Down Expand Up @@ -156,30 +178,30 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Looking for spatial duplicates...: 400it [00:00, 6598.45it/s]\n",
"Calculating classes balance...: 400it [00:00, 230551.27it/s]"
"Looking for spatial duplicates...: 0it [00:00, ?it/s]"
]
},
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"Validating and saving...\n",
"Success!\n"
"Looking for spatial duplicates...: 400it [00:00, 4398.40it/s]\n",
"Calculating classes balance...: 400it [00:00, 168260.11it/s]\n"
]
},
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
"Validating and saving...\n",
"Success!\n"
]
}
],
Expand Down Expand Up @@ -216,43 +238,30 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading STAC catalog...\n",
"New version created, version: 1\n"
"New version created, version: 29\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 400/400 [01:36<00:00, 4.16it/s]\n"
"100%|██████████| 400/400 [00:05<00:00, 78.97it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ingesting STAC catalog...\n"
]
},
{
"ename": "Exception",
"evalue": "[Errno 2] No such file or directory: '/home/juan/Desktop/eotdl/tutorials/notebooks/data/EuroSAT-Q2-small/catalog.json'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[23], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01meotdl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ingest_dataset\n\u001b[0;32m----> 3\u001b[0m \u001b[43mingest_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdata/EuroSAT-Q2-small\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/lib/python3.11/site-packages/eotdl/datasets/ingest.py:29\u001b[0m, in \u001b[0;36mingest_dataset\u001b[0;34m(path, verbose, logger, force_metadata_update, sync_metadata)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPath must be a folder\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcatalog.json\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m [f\u001b[38;5;241m.\u001b[39mname \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m path\u001b[38;5;241m.\u001b[39miterdir()]:\n\u001b[0;32m---> 29\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mingest_stac\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcatalog.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogger\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)\n",
"File \u001b[0;32m~/miniconda3/lib/python3.11/site-packages/eotdl/auth/auth.py:61\u001b[0m, in \u001b[0;36mwith_auth.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 60\u001b[0m user \u001b[38;5;241m=\u001b[39m auth()\n\u001b[0;32m---> 61\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muser\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/lib/python3.11/site-packages/eotdl/datasets/ingest.py:167\u001b[0m, in \u001b[0;36mingest_stac\u001b[0;34m(stac_catalog, logger, user)\u001b[0m\n\u001b[1;32m 164\u001b[0m data, error \u001b[38;5;241m=\u001b[39m repo\u001b[38;5;241m.\u001b[39mingest_stac(json\u001b[38;5;241m.\u001b[39mloads(df\u001b[38;5;241m.\u001b[39mto_json()), dataset_id, user)\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m error:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;66;03m# TODO: delete all assets that were uploaded\u001b[39;00m\n\u001b[0;32m--> 167\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(error)\n\u001b[1;32m 168\u001b[0m logger(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDone\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n",
"\u001b[0;31mException\u001b[0m: [Errno 2] No such file or directory: '/home/juan/Desktop/eotdl/tutorials/notebooks/data/EuroSAT-Q2-small/catalog.json'"
"Ingesting STAC catalog...\n",
"Done\n"
]
}
],
Expand All @@ -279,7 +288,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.8.19"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 8b52d4e

Please sign in to comment.