Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding NTL Metadata, seriously #107

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,382 changes: 1,382 additions & 0 deletions docs/user-docs/space2stats_nighttime_lights.ipynb

Large diffs are not rendered by default.

741 changes: 741 additions & 0 deletions notebooks/MP_SCRIPTS/NighttimeLights/Combine_CSV_into_parquet.ipynb

Large diffs are not rendered by default.

519 changes: 519 additions & 0 deletions notebooks/MP_SCRIPTS/NighttimeLights/TEST_zonal_stats_data_NTL.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,7 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
data_prefix = "VIIRS_ANNUAL_EOG"

# Get list of nighttime lights VIIRS data
# ntl_files = ntl.aws_search_ntl()
ntl_folder = "/home/public/Data/GLOBAL/NighttimeLights/VIIRS_ANNUAL_EOG_V21"
ntl_files = [
os.path.join(ntl_folder, x)
for x in os.listdir(ntl_folder)
if x.endswith(".tif")
]
ntl_files = ntl.aws_search_ntl()

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
Expand Down
247 changes: 247 additions & 0 deletions notebooks/MP_SCRIPTS/Urbanization/Combine_CSV_into_parquet.ipynb

Large diffs are not rendered by default.

771 changes: 771 additions & 0 deletions notebooks/MP_SCRIPTS/Urbanization/TEST_zonal_stats_data.ipynb

Large diffs are not rendered by default.

173 changes: 173 additions & 0 deletions notebooks/MP_SCRIPTS/Urbanization/zonal_urbanization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import multiprocessing
import os
import sys

import GOSTrocks.ntlMisc as ntl
import GOSTrocks.rasterMisc as rMisc
import pandas as pd
from GOSTrocks.misc import tPrint
from h3 import h3

# import geopandas as gpd
# import numpy as np


sys.path.append("../../src")
import global_zonal
import h3_helper

AWS_S3_BUCKET = "wbg-geography01"
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")

if __name__ == "__main__":
verbose = True
run_urban = False
run_urban_pop = True
run_urban_pop = False
tPrint("Starting")
h3_level = 6
data_prefix = "Urbanization"
data_prefix_pop = "Urbanization_Pop"

# Urbanization layers
unq_urban = [11, 12, 13, 21, 22, 23, 30]
ghsl_folder = "/home/public/Data/GLOBAL/GHSL/"

ghs_smod = os.path.join(
ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
)
ghs_pop = os.path.join(
ghsl_folder, "Pop", "GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
)

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
# if verbose:

ghs_smod = os.path.join(
ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
)
ghs_pop = os.path.join(
ghsl_folder, "POP", "GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
)

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
# if verbose:

# tPrint("H3_0 list generated")

h3_1_list = h3_helper.generate_lvl1_lists(
h3_level, return_gdf=True, buffer0=True, read_pickle=True, write_pickle=False
)
if verbose:
tPrint("H3_1 list generated")

urban_pop_args = []
urban_args = []
for h3_1_key, cur_gdf in h3_1_list.items():
if run_urban_pop:
# Set up mp arguments for urban population
pop_filename = "GHS_POP_2020_Urban_Breakdown.csv"
pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}"
pop_full_path = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
urban_pop_args.append(
[cur_gdf, "shape_id", ghs_pop, ghs_smod, pop_full_path, unq_urban]
)
if run_urban:
# set up mp arguments for urban summary
urban_filename = "GHS_SMOD_2020.csv"
urban_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_1_key}/{urban_filename}"
urban_full_path = os.path.join("s3://", AWS_S3_BUCKET, urban_out_s3_key)
urban_args.append(
[cur_gdf, "shape_id", ghs_smod, unq_urban, urban_full_path]
)
# Set up mp arguments for urban population
pop_filename = "GHS_POP_2020_Urban_Breakdown.csv"
pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}"
pop_full_path = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
try:
tempPD = pd.read_csv(pop_full_path)
except:
urban_pop_args.append(
[cur_gdf, "shape_id", ghs_pop, ghs_smod, pop_full_path, unq_urban]
)

# set up mp arguments for urban summary
urban_filename = "GHS_SMOD_2020.csv"
urban_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_1_key}/{urban_filename}"
urban_full_path = os.path.join("s3://", AWS_S3_BUCKET, urban_out_s3_key)
urban_args.append([cur_gdf, "shape_id", ghs_smod, unq_urban, urban_full_path])

if run_urban:
tPrint(f"Running calculations on urban: {len(urban_args)} processes")
# Run multi processing on urban
if multiprocess:
with multiprocessing.Pool(processes=min([70, len(urban_args)])) as pool:
results = pool.starmap(global_zonal.zonal_stats_categories, urban_args)
tPrint(f"Finished urban calculations: {len(results)}")
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
else:
for a in urban_args:
results = global_zonal.zonal_stats_categories(*a)
out_file = list(results.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
tPrint(f"Finished {out_file}")

if run_urban_pop:
tPrint(
f"Running calculations on urban population: {len(urban_pop_args)} processes"
)
# Run multi processing on urban_pop_calculations
if multiprocess:
with multiprocessing.Pool(processes=min([40, len(urban_pop_args)])) as pool:
results = pool.starmap(
global_zonal.zonal_stats_categorical, urban_pop_args
)
tPrint(f"Finished multiprocessing urban pop calculations: {len(results)}")
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
else:
for a in urban_pop_args:
combo = global_zonal.zonal_stats_categorical(
*a, verbose=verbose, minVal=0
)
out_file = list(combo.keys())[0]
tPrint(f"Completed {out_file}")
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
tPrint("Finished")
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,17 @@
},
{
"rel": "item",
"href": "./nighttime_lights_2013/nighttime_lights_2013.json",
"href": "./urbanization_ghssmod/urbanization_ghssmod.json",
"type": "application/json",
"title": "Nighttime Lights"
"title": "Urbanization by population and by area"
},
{
"rel": "item",
"href": "./urbanization_ghssmod/urbanization_ghssmod.json",
"href": "./nighttime_lights/nighttime_lights.json",
"type": "application/json",
"title": "Urbanization by population and by area"
}
"title": "Nighttime Lights"
},

],
"Title": "Space2Stats Database",
"Description": "This database contains geospatial statistics for the entire globe standardized to a hexagonal grid. The spatial unit of the dataset is the H3 level 6 (approximately 36 sq. km. per cell). The variables cover a wide range of geographic themes relevant to international development, including demographic, socio-economic, environmental, climate, and infrastructure. An API enables users to query, access, and aggregate statistics from the Space2Stats database. The purpose of this API is to facilitate the generation of sub-national geospatial aggregates for any administrative boundary set.",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
{
"type": "Feature",
"stac_version": "1.0.0",
"stac_extensions": [
"https://stac-extensions.github.io/table/v1.2.0/schema.json",
"https://stac-extensions.github.io/scientific/v1.0.0/schema.json"
],
"id": "nighttime_lights",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-179.99999561620714,
-89.98750455101016
],
[
-179.99999561620714,
89.98750455101016
],
[
179.99999096313272,
89.98750455101016
],
[
179.99999096313272,
-89.98750455101016
],
[
-179.99999561620714,
-89.98750455101016
]
]
]
},
"bbox": [
-179.99999561620714,
-89.98750455101016,
179.99999096313272,
89.98750455101016
],
"properties": {
"name": "Nighttime Lights",
"description": "Sum of luminosity values measured by monthly composites from VIIRS satellite.",
"methodological_notes": "Monthly composites generated by NASA through the Lights Every Night partnership.",
"source_data": "World Bank - Light Every Night, https://registry.opendata.aws/wb-light-every-night/",
"sci:citation": "tbd",
"method": "sum",
"resolution": "500 mts",
"themes": "Socio-economic",
"table:columns": [
{
"name": "sum_viirs_ntl_2012",
"description": "Sum of VIIRS nighttlime lights brightness for 2012",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2013",
"description": "Sum of VIIRS nighttlime lights brightness for 2013",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2014",
"description": "Sum of VIIRS nighttlime lights brightness for 2014",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2015",
"description": "Sum of VIIRS nighttlime lights brightness for 2015",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2016",
"description": "Sum of VIIRS nighttlime lights brightness for 2016",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2017",
"description": "Sum of VIIRS nighttlime lights brightness for 2017",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2018",
"description": "Sum of VIIRS nighttlime lights brightness for 2018",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2019",
"description": "Sum of VIIRS nighttlime lights brightness for 2019",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2020",
"description": "Sum of VIIRS nighttlime lights brightness for 2020",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2021",
"description": "Sum of VIIRS nighttlime lights brightness for 2021",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2022",
"description": "Sum of VIIRS nighttlime lights brightness for 2022",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2023",
"description": "Sum of VIIRS nighttlime lights brightness for 2023",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2024",
"description": "Sum of VIIRS nighttlime lights brightness for 2024",
"type": "float64"
},
{
"name": "hex_id",
"description": "H3 unique identifier",
"type": "object"
}
],
"datetime": "2024-12-17T09:05:44.687946Z"
},
"links": [
{
"rel": "root",
"href": "../../catalog.json",
"type": "application/json",
"title": "Space2Stats Database"
},
{
"rel": "collection",
"href": "../collection.json",
"type": "application/json",
"title": "Space2Stats Collection"
},
{
"rel": "parent",
"href": "../collection.json",
"type": "application/json",
"title": "Space2Stats Collection"
}
],
"assets": {
"api-docs": {
"href": "https://space2stats.ds.io/docs",
"type": "text/html",
"title": "API Documentation",
"roles": [
"metadata"
]
}
},
"collection": "space2stats-collection"
}
Loading
Loading