Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove redundant import steps for easier debugging #825

Merged
merged 7 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ PG_HOST=localhost
PG_USER=datamade
PG_PASSWORD=

SOURCE_DATA_PATH=sfm_pc/management/commands/country_data/countries
DATA_ARCHIVE_PATH=data/wwic_download/countries

.PHONY : import_directory import_db flush_db recreate_db

Expand Down Expand Up @@ -43,4 +45,4 @@ recreate_db : import_directory flush_db import_docket_import data_archive
clean :
rm auth_models.json *errors.csv

include docket.mk
include docket.mk download.mk
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ appropriate directory, and fire the recipe to build a fresh database:
```
tmux new -s fresh-import
sudo su - datamade
workon sfm
source ~/.virtualenvs/sfm/bin/activate
cd ~/sfm-importer
make recreate_db
```
Expand All @@ -376,7 +376,7 @@ Finally, switch the `sfm` and `importer` databases:

```
# Renames the databases in a transaction -- the app doesn't need to stop
psql postgres < sfm_pc/management/commands/flush/rename.sql
psql -U postgres < sfm_pc/management/commands/flush/rename.sql
```

Presto! A fresh import, with no server downtime.
6 changes: 3 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ services:
- .:/app
environment:
- IMPORT_DIRECTORY=${IMPORT_DIRECTORY}
- PG_HOST=${PG_HOST}
- PG_USER=${PG_USER}
- PG_PASSWORD=${PG_PASSWORD}
- PG_HOST=postgres
- PG_USER=sfm
- PG_PASSWORD=postgres
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
command: python manage.py runserver 0.0.0.0:8000
Expand Down
66 changes: 11 additions & 55 deletions docket.mk
Original file line number Diff line number Diff line change
@@ -1,67 +1,23 @@
# Path variable for the source data
SOURCE_DATA_PATH=sfm_pc/management/commands/country_data/countries
clean_import :
rm -rf $(SOURCE_DATA_PATH)/*

# Variables for the archive data
DATA_ARCHIVE_PATH=data/wwic_download/countries
COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5)
ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv
.PHONY: $(SOURCE_DATA_PATH) source_import clean_import

.PHONY: $(SOURCE_DATA_PATH) data/wwic_download/countries data_archive wwic_download.zip directories data/wwic_download/metadata/sfm_research_handbook.pdf


# Create the data archive and upload it to S3
data_archive : wwic_download.zip
aws s3 cp $< s3://$(shell cat configs/s3_config.json | jq -r '.data_archive_bucket')/
rm $<

wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf
cd data/wwic_download && zip -r ../../$@ .

filtered_data: directories $(SOURCE_DATA_PATH) $(foreach country,$(COUNTRY_NAMES),$(patsubst %,$(country)_%,$(ENTITIES)))
echo "filtered csvs for entities"

directories :
mkdir -p $(foreach country,$(COUNTRY_NAMES),$(DATA_ARCHIVE_PATH)/$(country))

define filter_entity_data
$(shell csvgrep --columns $(1):status:admin --match 3 $< | \
python data/processors/blank_columns.py --entity $(1) > $(DATA_ARCHIVE_PATH)/$*/$@)
endef

%_units.csv : $(SOURCE_DATA_PATH)/%/units.csv
$(call filter_entity_data,unit)

%_persons.csv : $(SOURCE_DATA_PATH)/%/persons.csv
$(call filter_entity_data,person)

%_incidents.csv : $(SOURCE_DATA_PATH)/%/incidents.csv
$(call filter_entity_data,incident)

%_sources.csv : $(SOURCE_DATA_PATH)/%/sources.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.csv : $(SOURCE_DATA_PATH)/%/locations.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.geojson : $(SOURCE_DATA_PATH)/%/locations.geojson
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

data/wwic_download/metadata/sfm_research_handbook.pdf :
curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/


# Download the source data and load it into the database
%_import : %.csv $(SOURCE_DATA_PATH)
%_import : %.csv $(SOURCE_DATA_PATH) source_import
perl -pe "s/,/ /g" $< | \
xargs -L1 bash -c ' \
echo "Loading data for country code $$3" && (\
echo "Loading data for country code $$3 from $(SOURCE_DATA_PATH)/$$4" && (\
python -u manage.py import_country_data \
--country_code $$3 \
--country_path $(word 2, $^)/$$4 \
--sources_path $(word 2, $^)/sources.csv || \
--country_path $(SOURCE_DATA_PATH)/$$4 || \
exit 255 \
)'

source_import : $(SOURCE_DATA_PATH)
echo "Loading source data" && \
python -u manage.py import_source_data \
--sources_path $(SOURCE_DATA_PATH)/sources.csv

$(SOURCE_DATA_PATH) : import_docket.csv
perl -pe "s/,/ /g" $< | \
xargs -L1 bash -c ' \
Expand Down
49 changes: 49 additions & 0 deletions download.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Variables for the archive data
COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5)
ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv

clean_archive :
rm -rf $(DATA_ARCHIVE_PATH)/*

.PHONY : $(DATA_ARCHIVE_PATH) data_archive wwic_download.zip directories \
data/wwic_download/metadata/sfm_research_handbook.pdf clean_archive

# Create the data archive and upload it to S3
data_archive : wwic_download.zip
aws s3 cp $< s3://$(shell cat configs/s3_config.json | jq -r '.data_archive_bucket')/
rm $<

wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf
cd data/wwic_download && zip -r ../../$@ .

filtered_data: directories $(SOURCE_DATA_PATH) $(foreach country,$(COUNTRY_NAMES),$(patsubst %,$(country)_%,$(ENTITIES)))
echo "filtered csvs for entities"

directories :
mkdir -p $(foreach country,$(COUNTRY_NAMES),$(DATA_ARCHIVE_PATH)/$(country))

define filter_entity_data
$(shell csvgrep --columns $(1):status:admin --match 3 $< | \
python data/processors/blank_columns.py --entity $(1) > $(DATA_ARCHIVE_PATH)/$*/$@)
endef

%_units.csv : $(SOURCE_DATA_PATH)/%/units.csv
$(call filter_entity_data,unit)

%_persons.csv : $(SOURCE_DATA_PATH)/%/persons.csv
$(call filter_entity_data,person)

%_incidents.csv : $(SOURCE_DATA_PATH)/%/incidents.csv
$(call filter_entity_data,incident)

%_sources.csv : $(SOURCE_DATA_PATH)/%/sources.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.csv : $(SOURCE_DATA_PATH)/%/locations.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.geojson : $(SOURCE_DATA_PATH)/%/locations.geojson
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

data/wwic_download/metadata/sfm_research_handbook.pdf :
curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/
26 changes: 1 addition & 25 deletions fixtures/import_docket.csv
Original file line number Diff line number Diff line change
@@ -1,26 +1,2 @@
source_document_id,location_document_id,entitity_document_id,sfm:iso,sfm:country_name
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1ztOfGaQT3WDrq-BOjT0x5VErzgrWQ0Ku,1Ck11zLFVP6iJZFAR0_Xsq0UaeEJrmFl7ysbFX9mGu7c,ae,united-arab-emirates
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1HpIjYaH_iMCRQD1jP159VGz-2NL4nB5p,1EqAi59wjE1v-bYX3cC1qdl6zkThpWJ8YcvSPUC-RGHc,bd,bangladesh
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1j8KgLnpjlnLy6bN4ozkwnBpkHUI6i3si,1wBmSuTkoEhosDzfHtyvZqd9SKez-sWoPoJ9oPonWsSo,bf,burkina-faso
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1mjTLMZ1la3zyiVQxLZ56sW497Sp8Lh5m,1c0O2XlwSpTAtB0AdhkkdgevWbsBUxvsmsETUwPPVIlk,bh,bahrain
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qZoQciglG1DOeEa3hh5iUvF7q4_bKOQl,1cZVy2PUAzeq2xOoLRLwL9z9mqbry32zv_XY7sjEih2c,eg,egypt
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1eZHw6k7xM7Z_ZNMnF0Wc5cjJuVyuOop3,1CKkNsXwRdwXDiOldwT-6baw9DayXA2Vsn4ttpwP9SuM,jo,jordan
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1vnWgeTkq5TUyF7555F4renMJnl4WiFNy,1Y6-9-9kai-YyK1pXvcv_W6fqUn9lORltUhuFc2YUu1I,kw,kuwait
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,13XnZOF7U0uwL3EP_QpdTVd1FUh3A3cwi,1aGbMvFHzGn9ZlKKcFhiQ2c9egsoGDH11QBgyqmhS-IM,lr,liberia
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1fnTq_ONVxzVBbCyQy_-s6ngmadA6st70,16962grIJlisFbh2Zp9kBAhv6jVnZz6bHgb6RGBUHd3o,ma,morocco
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1syUOihIFKzk6QsLXd7XNUZIwzZZfAqEH,1UcgoJ_ytS-WSWl2_5OuV9h92wSCBWRFBoDtr4Ztqt14,ml,mali
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1dU24WM8MAnqApFcBoYCiKPzPevebal6_,1vwb7ENaOeVRJIc5iCDBbF8K0Oql4SscENmLEdUT77Hg,mm,myanmar
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,17Zqej6mrCT3BBBKcKj7949qHyRCa-9SJ,1cUtCEUuZRMqcxlRqFyoEM9eAdiDdWy2DUocroYivCx4,mr,mauritania
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nVWV5_1kGDwyWJ3PPqExKfchs3sAlEuh,168KuHwUr9565zWaQVZ5au3qtGOb-qyJx_WOwNzqt_Eo,mx,mexico
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qlHquI9EDz2lteBcjz_MheNLspg3mp_q,1_Pj5BryFXUPQPmMigII8G2HBUrpsnkK5V-Zu_9LCdGw,ne,niger
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1boFMPurqyxbfYBvfisRiROmzr8TuPI8j,1f3W3qJATCzVjZGw239Wy3D25THs8ThnvoC24aUFaGZQ,ng,nigeria
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nMXXtFwJ3TqeynpKSW11uYAzihSMV8So,1Uc5eZswLB6mrwQLhd_OYQm7v7ThH99N0eb7RbTtD5iY,np,nepal
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,19o8a1zwxUEYFxvZkqs2AwCyIm0oe_CPF,1h1a0S5aVv9Z3wucgKsYXmg5Z_CWzsKfjJSfJFcXxPSY,ph,philippines
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1x4LjK_UWdxZm5EyNCupj7ikv7E-WMrkd,1UGOxjmJdJ9Dzj8cX3mZkgXAzT_ap_EMD2OqLjzDeGeE,qa,qatar
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1wSzKP9bsXB9w4U8frj4Y5kHrfV3C6Vi8,1QAgVpj0bf_A0HGFzHgwxBbZqgIFurfH4h7u1MnfKzJc,rw,rwanda
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1FLls5iHQD71Omy4VEzXYQ4HacMubzg8v,1a9XRXK5rG4_n0Afw7tIDkIbAmdydqKcU8J8zx5pLnVU,sa,saudi-arabia
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1tNS4yJYlh265zDy9rQnjAZSqSmBZxrRh,11dEjFSe56YdmJfVeKhRZpQKSgRb6mfM1DWKoNFxYg9Y,sd,sudan
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1l3NE8P8Xi-1qGwqZcdVdvV3Hn1h4Bwjv,1YxRrB39ItO_kEPTrMQ9FJlvMEp1Fjby0vchHiwW3C_I,sl,sierra-leone
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1yPQVFwbQ4edUfBhgAbL2o9DAOljZigTF,15cnbBqIlp4LzEXrs2z2L4_RTnY5e1GMrGV150JV615Q,td,chad
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1cyrCvMKVRHJtpQtcbTpoboJc9iNZ-oHy,1WlN4Hbv3JKE76hnNYkr80HU9oNJwjjOnj9nt7mm9ddw,ug,uganda
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1Ii31JX8y2InKt-FnHK-6kaqVK41XBOzY,1r62axKA5xgvJAiSiHrKgHZSATwSkKB-K15fdmLbn3zo,ye,yemen
1d2FIMxqeL7Oa1hQrnbuFuzNr2lVLwfD7hknTbX8E-Dw,12O-PyMp4CN7O8ZdnZpCNm8Rs3lzLfMvo,1uVz_9edm0ejSGOHCRV2BWZoPPKOX64XpBORjm47hopU,mm,myanmar
101 changes: 62 additions & 39 deletions location/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,21 @@


class LocationManager(models.Manager):

def from_humane_id(self, humane_id):
if isinstance(humane_id, str):
return self.get(**{'sfm__location:humane_id:admin': humane_id})
return self.get(
**{
"sfm__location:humane_id:admin": humane_id,
"geometry__isnull": False,
}
)

return self.filter(**{'sfm__location:humane_id:admin__in': humane_id})
return self.filter(
**{
"sfm__location:humane_id:admin__in": humane_id,
"geometry__isnull": False,
}
)


class Location(models.Model):
Expand All @@ -21,16 +30,20 @@ class Location(models.Model):
feature_type = models.TextField(blank=True, null=True)
tags = models.JSONField(blank=True, null=True)
sfm = models.JSONField(blank=True, null=True)
adminlevel1 = models.ForeignKey('self',
related_name='area_locations',
on_delete=models.CASCADE,
null=True,
blank=True)
adminlevel2 = models.ForeignKey('self',
related_name='place_locations',
on_delete=models.CASCADE,
null=True,
blank=True)
adminlevel1 = models.ForeignKey(
"self",
related_name="area_locations",
on_delete=models.CASCADE,
null=True,
blank=True,
)
adminlevel2 = models.ForeignKey(
"self",
related_name="place_locations",
on_delete=models.CASCADE,
null=True,
blank=True,
)
adminlevel = models.CharField(max_length=50, null=True, blank=True)
geometry = GeometryField(blank=True, null=True)

Expand Down Expand Up @@ -60,42 +73,52 @@ def related_entities(self):
for associationarea in self.associationarea_set.all():
association = associationarea.object_ref
organization = association.organization.get_value().value
related_entities.append({
'name': organization.name.get_value().value,
'entity_type': _('Organization'),
'start_date': association.startdate.get_value(),
'end_date': association.enddate.get_value(),
'open_ended': association.open_ended.get_value(),
'url': reverse('view-organization', kwargs={'slug': organization.uuid}),
})
related_entities.append(
{
"name": organization.name.get_value().value,
"entity_type": _("Organization"),
"start_date": association.startdate.get_value(),
"end_date": association.enddate.get_value(),
"open_ended": association.open_ended.get_value(),
"url": reverse(
"view-organization", kwargs={"slug": organization.uuid}
),
}
)

for emplacementsite in self.emplacementsite_set.all():
emplacement = emplacementsite.object_ref
organization = emplacement.organization.get_value().value
related_entities.append({
'name': organization.name.get_value().value,
'entity_type': _('Organization'),
'start_date': emplacement.startdate.get_value(),
'end_date': emplacement.enddate.get_value(),
'open_ended': emplacement.open_ended.get_value(),
'url': reverse('view-organization', kwargs={'slug': organization.uuid}),
})
related_entities.append(
{
"name": organization.name.get_value().value,
"entity_type": _("Organization"),
"start_date": emplacement.startdate.get_value(),
"end_date": emplacement.enddate.get_value(),
"open_ended": emplacement.open_ended.get_value(),
"url": reverse(
"view-organization", kwargs={"slug": organization.uuid}
),
}
)

for violationlocation in self.violationlocation_set.all():
violation = violationlocation.object_ref
related_entities.append({
'name': truncatewords(violation.description.get_value(), 10),
'entity_type': _('Violation'),
'start_date': violation.startdate.get_value(),
'end_date': violation.enddate.get_value(),
'open_ended': '',
'url': reverse('view-violation', kwargs={'slug': violation.uuid}),
})
related_entities.append(
{
"name": truncatewords(violation.description.get_value(), 10),
"entity_type": _("Violation"),
"start_date": violation.startdate.get_value(),
"end_date": violation.enddate.get_value(),
"open_ended": "",
"url": reverse("view-violation", kwargs={"slug": violation.uuid}),
}
)

return related_entities

@property
def osm_feature_type(self):
if self.feature_type == 'boundary':
return 'relation'
if self.feature_type == "boundary":
return "relation"
return self.feature_type
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ django-countries-plus==1.3.2
django-bootstrap-pagination==1.6.4
django-date-extensions==3.1.1
django-leaflet==0.28.2
psycopg2==2.8.6
psycopg2-binary==2.8.6
django-rosetta==0.9.8
django-queryset-csv==1.1.0
boto3==1.24.21
Expand Down
Loading
Loading