-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
da5b346
commit e72d9a1
Showing
11 changed files
with
855 additions
and
21 deletions.
There are no files selected for viewing
30 changes: 30 additions & 0 deletions
30
apptax/migrations/versions/da3172cecdb1_taxref_taxref_v18.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
"""[taxref] Taxref v18 | ||
Revision ID: da3172cecdb1 | ||
Revises: 2c68a907f74c | ||
Create Date: 2025-01-14 11:44:12.356028 | ||
""" | ||
from alembic import op | ||
import sqlalchemy as sa | ||
|
||
|
||
# revision identifiers, used by Alembic. | ||
revision = 'da3172cecdb1' | ||
down_revision = '2c68a907f74c' | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade(): | ||
op.add_column( | ||
table_name="taxref", column=sa.Column("cd_ba", sa.Integer()), schema="taxonomie" | ||
) | ||
op.add_column( | ||
table_name="taxref", column=sa.Column("nomenclatural_comment", sa.String(500)), schema="taxonomie" | ||
) | ||
|
||
def downgrade(): | ||
op.drop_column(table_name="taxref", column_name="cd_ba", schema="taxonomie") | ||
op.drop_column(table_name="taxref", column_name="nomenclatural_comment", schema="taxonomie") | ||
|
199 changes: 199 additions & 0 deletions
199
apptax/taxonomie/commands/migrate_taxref/commands_v18.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
import importlib | ||
import click | ||
from zipfile import ZipFile | ||
from sqlalchemy import text | ||
from flask.cli import with_appcontext | ||
|
||
from utils_flask_sqla.migrations.utils import open_remote_file | ||
|
||
from apptax.database import db | ||
from apptax.taxonomie.commands.utils import ( | ||
copy_from_csv, | ||
truncate_bdc_statuts, | ||
refresh_taxref_vm, | ||
insert_taxref_numversion, | ||
) | ||
from apptax.taxonomie.commands.taxref_v18 import import_bdc_statuts_v18 | ||
from .utils import save_data, analyse_taxref_changes | ||
from . import logger | ||
|
||
|
||
base_url = "http://geonature.fr/data/inpn/taxonomie/" | ||
|
||
|
||
@click.group(help="Migrate to TaxRef v18.") | ||
def migrate_to_v18(): | ||
pass | ||
|
||
|
||
@migrate_to_v18.command() | ||
@with_appcontext | ||
def import_taxref_v18(): | ||
""" | ||
Procédure de migration de taxref vers la version 18 | ||
Test de la disparition des cd_noms | ||
""" | ||
# Prerequis : deps_test_fk_dependencies_cd_nom | ||
query = text( | ||
importlib.resources.read_text( | ||
"apptax.taxonomie.commands.migrate_taxref.data.changes_detection", | ||
"0.2_taxref_detection_repercussion_disparition_cd_nom.sql", | ||
) | ||
) | ||
db.session.execute(query) | ||
|
||
# import taxref v18 data | ||
import_data_taxref_v18() | ||
db.session.commit() | ||
|
||
# Analyse des changements à venir | ||
analyse_taxref_changes() | ||
|
||
|
||
@migrate_to_v18.command() | ||
@click.option("--keep-cdnom", is_flag=True) | ||
@with_appcontext | ||
def test_changes_detection(keep_cdnom): | ||
"""Analyse des répercussions de changement de taxref | ||
:param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer | ||
:type keep-cdnom: boolean | ||
3 étapes : | ||
- Detection des cd_noms manquants | ||
- Création d'une copie de travail de bib_noms | ||
- Analyse des modifications taxonomique (split, merge, ...) et | ||
de leur répercussion sur les attributs et medias de taxhub | ||
""" | ||
# Analyse des changements à venir | ||
analyse_taxref_changes(keep_missing_cd_nom=keep_cdnom) | ||
|
||
|
||
@migrate_to_v18.command() | ||
@click.option("--keep-oldtaxref", is_flag=True) | ||
@click.option("--keep-oldbdc", is_flag=True) | ||
@click.option("--keep-cdnom", is_flag=True) | ||
@click.option("--taxref-region", type=str) | ||
@click.option("--script_predetection", type=click.Path(exists=True)) | ||
@click.option("--script_postdetection", type=click.Path(exists=True)) | ||
@with_appcontext | ||
def apply_changes( | ||
keep_oldtaxref, | ||
keep_oldbdc, | ||
keep_cdnom, | ||
taxref_region, | ||
script_predetection, | ||
script_postdetection, | ||
): | ||
"""Procédure de migration de taxref vers la version 18 | ||
Application des changements import des données dans les tables taxref et bdc_status | ||
:param keep-oldtaxref: Indique si l'on souhaite concerver l'ancienne version du referentiel taxref | ||
:type keep-oldtaxref: boolean | ||
:param keep-oldbdc: Indique si l'on souhaite concerver l'ancienne version du referentiel bdc_status | ||
:type keep-oldbdc: boolean | ||
:param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer | ||
:type keep-cdnom: boolean | ||
:param script_predetection: Emplacement d'un fichier sql de correction avant la detection des changements | ||
:type script_predetection: Path | ||
:param script_postdetection: Emplacement d'un fichier sql de correction après la detection des changements | ||
:type script_postdetection: Path | ||
""" | ||
|
||
# Analyse des changements à venir | ||
analyse_taxref_changes( | ||
keep_missing_cd_nom=keep_cdnom, | ||
script_predetection=script_predetection, | ||
script_postdetection=script_postdetection, | ||
) | ||
|
||
# Save taxref and bdc_status data | ||
save_data(17, keep_oldtaxref, keep_oldbdc) | ||
|
||
# Update taxref v18 | ||
logger.info("Migration of taxref ...") | ||
try: | ||
query = text( | ||
importlib.resources.read_text( | ||
"apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v18", | ||
"3.2_alter_taxref_data.sql", | ||
) | ||
) | ||
db.session.execute(query, {"keep_cd_nom": keep_cdnom, "taxref_region": taxref_region}) | ||
db.session.commit() | ||
logger.info("it's done") | ||
except Exception as e: | ||
logger.error(str(e)) | ||
|
||
# Import bdc status data and insert into taxhub tables | ||
import_and_format_dbc_status() | ||
|
||
# Clean DB | ||
logger.info("Clean DB") | ||
query = text( | ||
importlib.resources.read_text( | ||
"apptax.taxonomie.commands.migrate_taxref.data", "5_clean_db.sql" | ||
) | ||
) | ||
db.session.execute(query) | ||
|
||
logger.info("Refresh materialized views…") | ||
refresh_taxref_vm() | ||
|
||
insert_taxref_numversion(18) | ||
db.session.commit() | ||
|
||
|
||
def import_data_taxref_v18(): | ||
""" | ||
Import des données brutes de taxref v18 en base | ||
avant leur traitement | ||
""" | ||
print("sdfsdfsdf") | ||
logger.info("Import TAXREFv18 into tmp table…") | ||
|
||
# Préparation création de table temporaire permettant d'importer taxref | ||
query = text( | ||
importlib.resources.read_text( | ||
"apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v18", | ||
"0_taxref_import_data.sql", | ||
) | ||
) | ||
db.session.execute(query) | ||
db.session.commit() | ||
|
||
with open_remote_file(base_url, "TAXREF_v18_2025.zip", open_fct=ZipFile) as archive: | ||
with archive.open("TAXREFv18.txt") as f: | ||
logger.info("Insert TAXREFv18 into taxonomie.import_taxref table…") | ||
copy_from_csv( | ||
f, | ||
table_name="import_taxref", | ||
delimiter="\t", | ||
) | ||
with archive.open("CDNOM_DISPARUS.txt") as f: | ||
logger.info("Insert missing cd_nom into taxonomie.cdnom_disparu table…") | ||
copy_from_csv( | ||
f, | ||
table_name="cdnom_disparu", | ||
delimiter="\t", | ||
) | ||
|
||
with archive.open("rangs_note.csv") as f: | ||
logger.info("Insert rangs_note tmp table…") | ||
copy_from_csv( | ||
f, | ||
table_name="import_taxref_rangs", | ||
encoding="WIN1252", | ||
delimiter=";", | ||
) | ||
|
||
|
||
def import_and_format_dbc_status(): | ||
""" | ||
Import des données brutes de la base bdc_status en base | ||
Puis traitement des données de façon à les ventiler dans les différentes tables | ||
""" | ||
pass | ||
# truncate_bdc_statuts() | ||
# import_bdc_statuts_v18(logger) |
72 changes: 72 additions & 0 deletions
72
apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/0_taxref_import_data.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
-- Créer la table import_taxref | ||
|
||
|
||
DROP TABLE IF EXISTS taxonomie.import_taxref; | ||
CREATE TABLE taxonomie.import_taxref | ||
( | ||
regne character varying(20), | ||
phylum character varying(50), | ||
classe character varying(50), | ||
ordre character varying(50), | ||
famille character varying(50), | ||
sous_famille character varying(50), | ||
tribu character varying(50), | ||
group1_inpn character varying(50), | ||
group2_inpn character varying(50), | ||
group3_inpn character varying(50), | ||
cd_nom integer NOT NULL, | ||
cd_taxsup integer, | ||
cd_sup integer, | ||
cd_ref integer, | ||
cd_ba integer, | ||
rang character varying(10), | ||
lb_nom character varying(100), | ||
lb_auteur character varying(500), | ||
nomenclatural_comment character varying(500), | ||
nom_complet character varying(500), | ||
nom_complet_html character varying(500), | ||
nom_valide character varying(500), | ||
nom_vern text, | ||
nom_vern_eng character varying(500), | ||
habitat character varying(10), | ||
fr character varying(10), | ||
gf character varying(10), | ||
mar character varying(10), | ||
gua character varying(10), | ||
sm character varying(10), | ||
sb character varying(10), | ||
spm character varying(10), | ||
may character varying(10), | ||
epa character varying(10), | ||
reu character varying(10), | ||
sa character varying(10), | ||
ta character varying(10), | ||
taaf character varying(10), | ||
pf character varying(10), | ||
nc character varying(10), | ||
wf character varying(10), | ||
cli character varying(10), | ||
url text, | ||
url_inpn text | ||
); | ||
|
||
ALTER TABLE taxonomie.import_taxref ADD CONSTRAINT pk_import_taxref PRIMARY KEY (cd_nom); | ||
|
||
-- Créer la table cdnom_disparus | ||
DROP TABLE IF EXISTS taxonomie.cdnom_disparu; | ||
CREATE TABLE taxonomie.cdnom_disparu ( | ||
CD_NOM int, | ||
PLUS_RECENTE_DIFFUSION character varying(50), | ||
CD_NOM_REMPLACEMENT int, | ||
CD_RAISON_SUPPRESSION int, | ||
RAISON_SUPPRESSION text | ||
); | ||
|
||
|
||
DROP TABLE IF EXISTS taxonomie.import_taxref_rangs; | ||
CREATE TABLE taxonomie.import_taxref_rangs ( | ||
level int NOT NULL, | ||
rang varchar(20) NOT NULL, | ||
detail_fr varchar(50) NOT NULL, | ||
detail_en varchar(50) NOT NULL | ||
); |
Oops, something went wrong.