diff --git a/docs/tools/cmor.rst b/docs/tools/cmor.rst index ee55cf43..36caefcf 100644 --- a/docs/tools/cmor.rst +++ b/docs/tools/cmor.rst @@ -75,7 +75,7 @@ background ~~~~~~~~~~ The bulk of this routine is housed in ``fre/cmor/cmor_mixer.py``, which is a rewritten version of -Sergey Malyshev's original ``CMORcommander.py`` script, utilized during GFDL's CMIP6 publishing run. +Sergey Nikonov's original ``CMORcommander.py`` script, utilized during GFDL's CMIP6 publishing run. This code is dependent on two primary json configuration files- a MIP variable table and another containing experiment (i.e. model) specific metdata (e.g. grid) to append diff --git a/fre/cmor/__init__.py b/fre/cmor/__init__.py index 465ad02c..eb84d10a 100644 --- a/fre/cmor/__init__.py +++ b/fre/cmor/__init__.py @@ -1,2 +1,3 @@ ''' for fre.cmor imports ''' from .cmor_mixer import cmor_run_subtool +from .cmor_lister import cmor_list_subtool diff --git a/fre/cmor/cmor_lister.py b/fre/cmor/cmor_lister.py new file mode 100644 index 00000000..95ebb8ec --- /dev/null +++ b/fre/cmor/cmor_lister.py @@ -0,0 +1,120 @@ +''' fre cmor list +because ian got tired of typing things like the following in bash... + +varname=sos; \ +table_files=$(ls fre/tests/test_files/cmip6-cmor-tables/Tables/CMIP6_*.json); \ +for table_file in $table_files; do \ + echo $table_file; \ + cat $table_file | grep -A 10 "\"$varname\""; \ +done; + +''' + +#import os +import glob +import json +#import shutil +#import subprocess +from pathlib import Path + +import click + +DO_NOT_PRINT_LIST=[ 'comment', + 'ok_min_mean_abs', 'ok_max_mean_abs', + 'valid_min', 'valid_max' ] + +def print_var_content( table_config_file = None, var_name = None): + ''' one variable printing routine- looks for info regarding var_name in table_config_file ''' + try: + proj_table_vars=json.load(table_config_file) + except Exception as exc: + raise Exception(f'problem getting proj_table_vars... WHY') + + var_content = None + try: + var_content = proj_table_vars["variable_entry"].get(var_name) + except: + #print(f'(cmor_list_subtool) WARNING no "variable_entry" key. for {json_table_config}.' + # ' not the right json file probably. moving on!') + return + + if var_content is None: + #print(f'(cmor_list_subtool) variable {var_name} not found in {Path(json_table_config).name}, moving on!') + return + + table_name = None + try: + #print(f'(print_var_content) trying to get table_name from proj_table_vars...') + #print(f' table header is {proj_table_vars["Header"]}') + table_name = proj_table_vars["Header"].get('table_id').split(' ')[1] + #print(f' table_name = {table_name}') + except: + print(f'print_var_content) WARNING couldnt get header and table_name field') + pass + + if table_name is not None: + print(f'(print_var_content) found {var_name} data in table {table_name}!') + else: + print(f'(print_var_content) found {var_name} data in table, but not its table_name!') + + print(f' variable key: {var_name}') + for content in var_content: + if content in DO_NOT_PRINT_LIST: + continue + print(f' {content}: {var_content[content]}') + print('\n') + + return + +def cmor_list_subtool( json_var_list = None, json_table_config_dir = None, opt_var_name = None): + ''' + finds tables in the CMIP json config directory containing variable data of interest. prints it + out to screen, intended largely as a helper tool for cli users. + ''' + if not Path(json_table_config_dir).exists(): + raise OSError(f'(cmor_list_subtool) ERROR directory {json_table_config_dir} does not exist! exit.') + + print(f'(cmor_list_subtool) attempting to find and open files in dir: \n {json_table_config_dir} ') + json_table_configs=glob.glob(f'{json_table_config_dir}/CMIP6_*.json') + if json_table_configs is None: + raise OSError(f'ERROR directory {json_table_config_dir} contains no JSON files, exit.') + else: + print(f'(cmor_list_subtool) found content in json_table_config_dir')#: {json_table_configs}') + + var_list = None + if json_var_list is not None: + with open( json_var_list, "r", encoding = "utf-8") as var_list_file : + var_list=json.load(var_list_file) + + if opt_var_name is None and var_list is None: + raise ValueError(f'(cmor_list_subtool) ERROR: no opt_var_name given but also no content in variable list!!! exit!') + + if opt_var_name is not None: + print(f'(cmor_list_subtool) opt_var_name is not None: looking for only ONE variables worth of info!') + for json_table_config in json_table_configs: + #print(f'(cmor_list_subtool) attempting to open {json_table_config}') + with open( json_table_config, "r", encoding = "utf-8") as table_config_file: + print_var_content(table_config_file, opt_var_name) + + elif var_list is not None: + print(f'(cmor_list_subtool) opt_var_name is None, and var_list is not None, looking for many variables worth of info!') + for var in var_list: + for json_table_config in json_table_configs: + #print(f'(cmor_list_subtool) attempting to open {json_table_config}') + with open( json_table_config, "r", encoding = "utf-8") as table_config_file: + #print(f' var = {var}, var_list[{var}]={var_list[var]}') + print_var_content(table_config_file, str(var_list[var])) + else: + print(f'(FATAL) this line should be unreachable!!!') + + return + + +@click.command() +def _cmor_list_subtool( json_var_list = None, json_table_config_dir = None, opt_var_name = None): + ''' entry point to fre cmor run for click. see cmor_list_subtool for argument descriptions.''' + return cmor_list_subtool(json_var_list, json_table_config_dir, opt_var_name) + + +if __name__ == '__main__': + cmor_list_subtool() diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 0f4ef243..1047ba75 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -7,55 +7,70 @@ import os import glob import json +import shutil import subprocess from pathlib import Path +import numpy as np + import netCDF4 as nc import click import cmor # ----- \start consts DEBUG_MODE_RUN_ONE = True - # ----- \end consts -### ------ helper functions ------ ### -def copy_nc(in_nc, out_nc): + +def from_dis_gimme_dis(from_dis, gimme_dis): ''' - copy target input netcdf file in_nc to target out_nc. I have to think this is not a trivial copy - operation, as if it were, using shutil's copy would be sufficient. accepts two arguments - in_nc: string, path to an input netcdf file we wish to copy - out_nc: string, an output path to copy the targeted input netcdf file to + gives you gimme_dis from from_dis. accepts two arguments, both mandatory. + from_dis: the target netCDF4.Dataset object to try reading from + gimme_dis: what from_dis is hopefully gonna have and you're gonna get ''' - print(f'(copy_nc) in_nc: {in_nc}\n' - f' out_nc: {out_nc}') - - # input file - dsin = nc.Dataset(in_nc) - - # output file, same exact data_model as input file. - # note- totally infuriating... - # the correct value for the format arg is netCDF4.Dataset.data_model - # and NOT netCDF4.Dataset.disk_format - dsout = nc.Dataset(out_nc, "w", - format = dsin.data_model) - - #Copy dimensions - for dname, the_dim in dsin.dimensions.items(): - dsout.createDimension( dname, - len(the_dim) if not the_dim.isunlimited() else None ) - - # Copy variables and attributes - for v_name, varin in dsin.variables.items(): - out_var = dsout.createVariable(v_name, varin.datatype, varin.dimensions) - out_var.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()}) - out_var[:] = varin[:] - dsout.setncatts({a:dsin.getncattr(a) for a in dsin.ncattrs()}) - - # close up - dsin.close() - dsout.close() - + try: + return from_dis[gimme_dis][:].copy() + except Exception as exc: + print(f'(from_dis_gimme_dis) WARNING I am sorry, I could not not give you this: {gimme_dis}' +# f' from this: {from_dis} ' + f' exc = {exc}' + f' returning None!' ) + return None + +def find_statics_file(bronx_file_path): + print('(find_statics_file) HELLO WORLD!') + #assert type(bronx_file_path) == "" + bronx_file_path_elem=bronx_file_path.split('/') + num_elem=len(bronx_file_path_elem) + print(f'bronx_file_path_elem = {bronx_file_path_elem}') + while bronx_file_path_elem[num_elem-2] != 'pp': + bronx_file_path_elem.pop() + num_elem=num_elem-1 + print(bronx_file_path_elem) + statics_path='/'.join(bronx_file_path_elem) + statics_file=glob.glob(statics_path+'/*static*.nc')[0] + if Path(statics_file).exists(): + return statics_file + else: + return None + + +def create_lev_bnds(bound_these = None, with_these = None): + the_bnds = None + assert len(with_these) == len(bound_these) + 1 + print(f'(create_lev_bnds) bound_these is... ') + print(f' bound_these = \n{bound_these}') + print(f'(create_lev_bnds) with_these is... ') + print(f' with_these = \n{with_these}') + + + the_bnds = np.arange(len(bound_these)*2).reshape(len(bound_these),2) + for i in range(0,len(bound_these)): + the_bnds[i][0]=with_these[i] + the_bnds[i][1]=with_these[i+1] + print(f'(create_lev_bnds) the_bnds is... ') + print(f' the_bnds = \n{the_bnds}') + return the_bnds def get_var_filenames(indir, var_filenames = None, local_var = None): ''' @@ -101,6 +116,7 @@ def get_iso_datetimes(var_filenames, iso_datetime_arr = None): if len(iso_datetime_arr) < 1: raise ValueError('(get_iso_datetimes) ERROR: iso_datetime_arr has length 0!') + def check_dataset_for_ocean_grid(ds): ''' checks netCDF4.Dataset ds for ocean grid origin, and throws an error if it finds one. accepts @@ -108,35 +124,40 @@ def check_dataset_for_ocean_grid(ds): ds: netCDF4.Dataset object containing variables with associated dimensional information. ''' if "xh" in list(ds.variables.keys()): - raise NotImplementedError( - "(check_dataset_for_ocean_grid) 'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") + print("(check_dataset_for_ocean_grid) WARNING: 'xh' found in var_list: ocean grid req'd" + " sometimes i don't cmorize right! check me!") + return True + return False + -def get_vertical_dimension(ds,target_var): +def get_vertical_dimension(ds, target_var): ''' determines the vertical dimensionality of target_var within netCDF4 Dataset ds. accepts two arguments and returns an object represnting the vertical dimensions assoc with the target_var. ds: netCDF4.Dataset object containing variables with associated dimensional information. target_var: string, representating a variable contained within the netCDF4.Dataset ds - ''' vert_dim = 0 for name, variable in ds.variables.items(): - # not the var we are looking for? move on. - if name != target_var: - continue + if name != target_var: # not the var we are looking for? move on. + continue dims = variable.dimensions - for dim in dims: + for dim in dims: #print(f'(get_vertical_dimension) dim={dim}') + + # check for special case + if dim.lower() == 'landuse': # aux coordinate, so has no axis property + vert_dim = dim + break + # if it is not a vertical axis, move on. - print(f'(get_vertical_dimension) dim={dim}') - if dim == 'landuse': - continue if not (ds[dim].axis and ds[dim].axis == "Z"): continue vert_dim = dim + return vert_dim -def create_tmp_dir(outdir): +def create_tmp_dir(outdir, json_exp_config = None): ''' creates a tmp_dir based on targeted output directory root. returns the name of the tmp dir. accepts one argument: @@ -144,20 +165,42 @@ def create_tmp_dir(outdir): file output. tmp_dir will be slightly different depending on the output directory targeted ''' - print(f"(create_tmp_dir) outdir = {outdir}") + # first see if the exp_config has any additional output path structure to create + outdir_from_exp_config = None + if json_exp_config is not None: + with open(json_exp_config, "r", encoding = "utf-8") as table_config_file: + try: + outdir_from_exp_config = json.load(table_config_file)["outpath"] + except: + print(f'(create_tmp_dir) WARNING could not read outdir from json_exp_config.' + ' the cmor module will throw a toothless warning' ) + + # assign an appropriate temporary working directory tmp_dir = None if any( [ outdir == "/local2", outdir.find("/work") != -1, outdir.find("/net" ) != -1 ] ): - print(f'(create_tmp_dir) using /local /work /net ( tmp_dir = {outdir}/ )') tmp_dir = str( Path("{outdir}/").resolve() ) + print(f'(create_tmp_dir) using /local /work /net ( tmp_dir = {tmp_dir} )') else: - print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/tmp/ )') tmp_dir = str( Path(f"{outdir}/tmp/").resolve() ) + print(f'(create_tmp_dir) NOT using /local /work /net ( tmp_dir = {tmp_dir} )') + + # once we know where the tmp_dir should be, create it try: os.makedirs(tmp_dir, exist_ok=True) + # and if we need to additionally create outdir_from_exp_config... try doing that too + if outdir_from_exp_config is not None: + print(f'(create_tmp_dir) attempting to create {outdir_from_exp_config} dir in tmp_dir targ') + try: + os.makedirs(tmp_dir+'/'+outdir_from_exp_config, exist_ok=True) + except: # ... but don't error out for lack of success here, not worth it. cmor can do the lift too. + print(f'(create_tmp_dir) attempting to create {outdir_from_exp_config} dir in tmp_dir targ did not work') + print( ' .... oh well! it was ust to try to avoid a warning anyways.... moving on') + pass except Exception as exc: - raise OSError('(create_tmp_dir) problem creating temp output directory. stop.') from exc + raise OSError(f'(create_tmp_dir) problem creating tmp output directory {tmp_dir}. stop.') from exc + return tmp_dir @@ -168,7 +211,8 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, netcdf_file = None, target_var = None, json_exp_config = None, - json_table_config = None):#, tmp_dir = None ): + json_table_config = None, prev_path=None, + ):#, tmp_dir = None ): ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. accepts six arguments, all required: @@ -190,71 +234,205 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, # open the input file print(f"(rewrite_netcdf_file_var) opening {netcdf_file}" ) - ds = nc.Dataset(netcdf_file,'a') + ds = nc.Dataset(netcdf_file,'r+')#'a') # ocean grids are not implemented yet. print( '(rewrite_netcdf_file_var) checking input netcdf file for oceangrid condition') - check_dataset_for_ocean_grid(ds) - + uses_ocean_grid = check_dataset_for_ocean_grid(ds) + if uses_ocean_grid: + print('(rewrite_netcdf_file_var) OH BOY you have a file on the native tripolar grid...\n' + ' ... this is gonna be fun!' ) - # figure out the dimension names programmatically TODO - # Define lat and lon dimensions - # Assume input file is lat/lon grid - lat = ds["lat"][:] - lon = ds["lon"][:] - lat_bnds = ds["lat_bnds"][:] - lon_bnds = ds["lon_bnds"][:] - - ## Define time - #time = ds["time"][:] + # try to read what coordinate(s) we're going to be expecting for the variable + expected_mip_coord_dims=None + try: + expected_mip_coord_dims = proj_table_vars["variable_entry"] [target_var] ["dimensions"] + print( '(rewrite_netcdf_file_var) i am hoping to find data for the following coordinate dimensions:\n' + f' expected_mip_coord_dims = {expected_mip_coord_dims}' ) + except Exception as exc: + print(f'(rewrite_netcdf_file_var) WARNING could not get expected coordinate dimensions for {target_var}. ' + ' in proj_table_vars file {json_table_config}. \n exc = {exc}') + + + ## figure out the coordinate/dimension names programmatically TODO + + # Attempt to read lat coordinates + print(f'(rewrite_netcdf_file_var) attempting to read coordinate, lat') + lat = from_dis_gimme_dis( from_dis = ds, + gimme_dis = "lat") + print(f'(rewrite_netcdf_file_var) attempting to read coordinate BNDS, lat_bnds') + lat_bnds = from_dis_gimme_dis( from_dis = ds, + gimme_dis = "lat_bnds") + print(f'(rewrite_netcdf_file_var) attempting to read coordinate, lon') + lon = from_dis_gimme_dis( from_dis = ds, + gimme_dis = "lon") + print(f'(rewrite_netcdf_file_var) attempting to read coordinate BNDS, lon_bnds') + lon_bnds = from_dis_gimme_dis( from_dis = ds, + gimme_dis = "lon_bnds") # read in time_coords + units - time_coords = ds["time"][:] + print(f'(rewrite_netcdf_file_var) attempting to read coordinate time, and units...') + time_coords = from_dis_gimme_dis( from_dis = ds, + gimme_dis = 'time' ) + time_coord_units = ds["time"].units - print(f"(rewrite_netcdf_file_var) time_coord_units = {time_coord_units}") + print(f" time_coord_units = {time_coord_units}") # read in time_bnds , if present - time_bnds = [] - try: - time_bnds = ds["time_bnds"][:] - #print(f"(rewrite_netcdf_file_var) time_bnds = {time_bnds}") - except ValueError: - print( "(rewrite_netcdf_file_var) WARNING grabbing time_bnds didnt work... moving on") - + print(f'(rewrite_netcdf_file_var) attempting to read coordinate BNDS, time_bnds') + time_bnds = from_dis_gimme_dis( from_dis = ds, + gimme_dis = 'time_bnds' ) # read the input variable data, i believe - var = ds[target_var][:] + print(f'(rewrite_netcdf_file_var) attempting to read variable data, {target_var}') + var = from_dis_gimme_dis( from_dis = ds, + gimme_dis = target_var ) + #var = ds[target_var][:] + # the tripolar grid is designed to reduce distortions in ocean data brought on + # by singularities (poles) being placed in oceans (e.g. the N+S poles of standard sphere grid) + # but, the tripolar grid is complex, so the values stored in the file are a lat/lon *on the tripolar grid* + # in order to get spherical lat/lon, one would need to convert on the fly, but implementing such an inverse is not trivial + # thankfully, the spherical lat/lons tend to already be computed in advance, and stored elsewhere. at GFDL they're in "statics" + do_special_ocean_file_stuff=all( [ uses_ocean_grid, + lat is None, + lon is None ] ) + + statics_file_path = None + x, y = None, None + i_ind, j_ind = None, None + cmor_grid_id = None + if do_special_ocean_file_stuff: + try: + print(f'(rewrite_netcdf_file_var) netcdf_file is {netcdf_file}') + statics_file_path = find_statics_file(prev_path) + print(f'(rewrite_netcdf_file_var) statics_file_path is {statics_file_path}') + except Exception as exc: + print(f'(rewrite_netcdf_file_var) WARNING: pretty sure an ocean statics file is needed, but it could not be found.' + ' moving on and doing my best, but i am probably going to break' ) + raise Exception('(rewrite_netcdf_file_var) EXITING BC STATICS') from exc + print(f"(rewrite_netcdf_file_var) statics file found.") + statics_file_name=Path(statics_file_path).name + put_statics_file_here=str(Path(netcdf_file).parent) + shutil.copy(statics_file_path, put_statics_file_here) + del statics_file_path + statics_file_path = put_statics_file_here + '/' + statics_file_name + print(f'(rewrite_netcdf_file_var) statics file path is now: {statics_file_path}') + + statics_ds=nc.Dataset(statics_file_path, 'r') + + # grab the lat/lon points, have shape (yh, xh) + statics_lat = from_dis_gimme_dis(statics_ds, 'geolat')#statics_ds['geolat'][:]#.copy() + statics_lon = from_dis_gimme_dis(statics_ds, 'geolon')#statics_ds['geolon'][:]#.copy() + print(f'FOO min entry of geolat: {statics_lat[:].data.min()}') + print(f'BAR min entry of geolon: {statics_lon[:].data.min()}') + + lat = ds.createVariable('lat', np.float32, ('yh', 'xh') ) + lat[:] = statics_lat[:] + lon = ds.createVariable('lon', np.float32, ('yh', 'xh') ) + lon[:] = statics_lon[:] + print(f'FOO min entry of lat: {lat[:].data.min()}') + print(f'BAR min entry of lon: {lon[:].data.min()}') + + # grab the corners of the cells, should have shape (yh+1, xh+1) + lat_c = from_dis_gimme_dis(statics_ds,'geolat_c') + lon_c = from_dis_gimme_dis(statics_ds,'geolon_c') + print(f'FOO min entry of geolat_c: {lat_c[:].data.min()}') + print(f'BAR min entry of geolon_c: {lon_c[:].data.min()}') + + vertex = 4 + ds.createDimension('vertex', vertex) + + lat_bnds = ds.createVariable('lat_bnds', np.float32, ('yh', 'xh', 'vertex') ) + lat_bnds[:,:,0] = lat_c[1:,1:] # NE corner + lat_bnds[:,:,1] = lat_c[1:,:-1] # NW corner + lat_bnds[:,:,2] = lat_c[:-1,:-1] # SW corner + lat_bnds[:,:,3] = lat_c[:-1,1:] # SE corner + + + lon_bnds = ds.createVariable('lon_bnds', np.float32, ('yh', 'xh', 'vertex') ) + lon_bnds[:,:,0] = lon_c[1:,1:] # NE corner + lon_bnds[:,:,1] = lon_c[1:,:-1] # NW corner + lon_bnds[:,:,2] = lon_c[:-1,:-1] # SW corner + lon_bnds[:,:,3] = lon_c[:-1,1:] # SE corner + + + print(f'(rewrite_netcdf_file_var) HARD PART: creating indices (j_index) from y (yh)') + y = from_dis_gimme_dis(ds, 'yh') + + print(f' ds.createVariable...') + #j_ind = ds.createVariable('j', int, ('yh') ) + j_ind = ds.createVariable('j_index', np.int32, ('yh') ) + print(f' np.arange...') + #j_ind[:] = np.zeros(len(y), dtype=int ) + j_ind[:] = np.arange(0, len(y), dtype=np.int32 ) + + + print(f'(rewrite_netcdf_file_var) HARD PART: creating indices (i_index) from x (xh)') + x = from_dis_gimme_dis(ds, 'xh') + + print(f' ds.createVariable...') + #i_ind = ds.createVariable('i', int, ('xh') ) + i_ind = ds.createVariable('i_index', np.int32, ('xh') ) + print(f' np.arange...') + #i_ind[:] = np.zeros(len(x), dtype=int ) + i_ind[:] = np.arange(0, len(x), dtype=np.int32 ) + + #cmor_grid_id = cmor.grid( ) + + #var.coordinates = 'lat lon' + var.coordinates = 'j_index i_index' + #var.coordinates = '' + + + + + + + + + + #print(f' geolat = {lat}') + #assert False + + + + + + - # determine the vertical dimension by looping over netcdf variables - vert_dim = get_vertical_dimension(ds, target_var) - print(f"(rewrite_netcdf_file_var) Vertical dimension of {target_var}: {vert_dim}") # grab var_dim var_dim = len(var.shape) print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") - # Check var_dim - if var_dim not in [3, 4]: - raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") + ## Check var_dim + #if var_dim not in [3, 4]: + # raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") + + # determine the vertical dimension by looping over netcdf variables + vert_dim = get_vertical_dimension(ds, target_var) # returns int( 0 ) if not present + print(f"(rewrite_netcdf_file_var) Vertical dimension of {target_var}: {vert_dim}") # Check var_dim and vert_dim and assign lev if relevant. # error if vert_dim wrong given var_dim - lev = None - if var_dim == 4: - if vert_dim not in [ "plev30", "plev19", "plev8", + lev, lev_units = None, "1" #1 #"none" #None #"" + lev_bnds = None + if vert_dim != 0: + if vert_dim.lower() not in [ "z_l", "landuse", "plev39", "plev30", "plev19", "plev8", "height2m", "level", "lev", "levhalf"] : raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') lev = ds[vert_dim] - + if vert_dim.lower() != "landuse": + lev_units = ds[vert_dim].units # now we set up the cmor module object # initialize CMOR cmor.setup( - netcdf_file_action = cmor.CMOR_PRESERVE, - set_verbosity = cmor.CMOR_QUIET, #default is CMOR_NORMAL - exit_control = cmor.CMOR_NORMAL, - logfile = None, + netcdf_file_action = cmor.CMOR_PRESERVE, #.CMOR_APPEND,# + set_verbosity = cmor.CMOR_QUIET,#.CMOR_NORMAL, # + exit_control = cmor.CMOR_NORMAL,#.CMOR_EXIT_ON_WARNING,# +# logfile = './foo.log', create_subdirectories = 1 ) @@ -264,41 +442,130 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, # load CMOR table print(f"(rewrite_netcdf_file_var) cmor is opening json_table_config = {json_table_config}") - cmor.load_table(json_table_config) + if do_special_ocean_file_stuff: + print("FOOOOOOOOOOOOOOOOOOOOOOO"+ str(Path(json_table_config).parent) + '/CMIP6_grids.json') + cmor.load_table( str(Path(json_table_config).parent) + '/CMIP6_grids.json' ) + else: + cmor.load_table(json_table_config) units = proj_table_vars["variable_entry"] [target_var] ["units"] print(f"(rewrite_netcdf_file_var) units={units}") - cmor_lat = cmor.axis("latitude", coord_vals = lat, cell_bounds = lat_bnds, units = "degrees_N") - cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") - try: + + # setup cmor latitude axis if relevant + cmor_lat = None + if do_special_ocean_file_stuff: + print(f'(rewrite_netcdf_file_var) WARNING: calling cmor.axis for an index!') + #cmor_lat = cmor.axis("j", coord_vals = j_ind[:], units = "1") + cmor_lat = cmor.axis("j_index", coord_vals = j_ind[:], units = "1") + #cmor_lat = cmor.axis("projection_y_coordinate", coord_vals = y[:], units = "degrees") + elif any( [ lat is None ] ): + print(f'(rewrite_netcdf_file_var) WARNING: lat or lat_bnds is None, skipping assigning cmor_lat') + else: + print(f'(rewrite_netcdf_file_var) assigning cmor_lat') + if lat_bnds is None: + cmor_lat = cmor.axis("latitude", coord_vals = lat[:], units = "degrees_N") + else: + cmor_lat = cmor.axis("latitude", coord_vals = lat[:], cell_bounds = lat_bnds, units = "degrees_N") + print(f' DONE assigning cmor_lat') + + # setup cmor longitude axis if relevant + cmor_lon = None + if do_special_ocean_file_stuff: + print(f'(rewrite_netcdf_file_var) WARNING: calling cmor.axis for an index!') + #cmor_lon = cmor.axis("i", coord_vals = i_ind[:], units = "1") + cmor_lon = cmor.axis("i_index", coord_vals = i_ind[:], units = "1") + #cmor_lon = cmor.axis("projection_x_coordinate", coord_vals = x[:], units = "degrees") + elif any( [ lon is None ] ): + print(f'(rewrite_netcdf_file_var) WARNING: lon or lon_bnds is None, skipping assigning cmor_lon') + else: + print(f'(rewrite_netcdf_file_var) assigning cmor_lon') + cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") + print(f' DONE assigning cmor_lon') + + + # setup the cmor_grid when needed (ocean things, typically) + cmor_grid = None + if do_special_ocean_file_stuff: + cmor_grid = cmor.grid([cmor_lat, cmor_lon], + latitude = lat[:], longitude = lon[:], + latitude_vertices = lat_bnds[:], + longitude_vertices = lon_bnds[:]) + + # load back up the normal table file? + cmor.load_table(json_table_config) + + # setup cmor time axis if relevant + cmor_time = None + print(f'(rewrite_netcdf_file_var) assigning cmor_time') + try: #if vert_dim != 'landuse': print( f"(rewrite_netcdf_file_var) Executing cmor.axis('time', \n" f" coord_vals = \n{time_coords}, \n" f" cell_bounds = time_bnds, units = {time_coord_units}) ") + print(f'(rewrite_netcdf_file_var) assigning cmor_time using time_bnds...') cmor_time = cmor.axis("time", coord_vals = time_coords, cell_bounds = time_bnds, units = time_coord_units) - except ValueError as exc: - print(f"(rewrite_netcdf_file_var) WARNING exception raised... exc={exc}\n" - " cmor_time = cmor.axis('time', \n" + except ValueError as exc: #else: + print(f"(rewrite_netcdf_file_var) cmor_time = cmor.axis('time', \n" " coord_vals = time_coords, units = time_coord_units)") + print(f'(rewrite_netcdf_file_var) assigning cmor_time WITHOUT time_bnds...') cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) - - # initializations + print(f' DONE assigning cmor_time') + +# # setup cmor time axis if relevant +# cmor_time = None +# try: +# print( f"(rewrite_netcdf_file_var) Executing cmor.axis('time', \n" +# f" coord_vals = \n{time_coords}, \n" +# f" cell_bounds = time_bnds, units = {time_coord_units}) ") +# print(f'(rewrite_netcdf_file_var) assigning cmor_time using time_bnds...') +# cmor_time = cmor.axis("time", coord_vals = time_coords, +# cell_bounds = time_bnds, units = time_coord_units) +# except ValueError as exc: +# print(f"(rewrite_netcdf_file_var) WARNING exception raised... exc={exc}\n" +# " cmor_time = cmor.axis('time', \n" +# " coord_vals = time_coords, units = time_coord_units)") +# print(f'(rewrite_netcdf_file_var) assigning cmor_time WITHOUT time_bnds...') +# cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) + + + + # other vertical-axis-relevant initializations save_ps = False ps = None ierr_ap, ierr_b = None, None ips = None - # set axes for 3-dim case - if var_dim == 3: - axes = [cmor_time, cmor_lat, cmor_lon] - print(f"(rewrite_netcdf_file_var) axes = {axes}") - # set axes for 4-dim case - elif var_dim == 4: - - if vert_dim in ["plev30", "plev19", "plev8", "height2m"]: - cmor_lev = cmor.axis( vert_dim, - coord_vals = lev[:], units = lev.units ) + # set cmor vertical axis if relevant + cmor_lev = None + if lev is not None: + print(f'(rewrite_netcdf_file_var) assigning cmor_lev') + + if vert_dim.lower() in ["landuse", "plev39", "plev30", "plev19", "plev8", "height2m"]: + print(f'(rewrite_netcdf_file_var) non-hybrid sigma coordinate case') + if vert_dim.lower() != "landuse": + cmor_vert_dim_name = vert_dim + cmor_lev = cmor.axis( cmor_vert_dim_name, + coord_vals = lev[:], units = lev_units ) + else: + landuse_str_list=['primary_and_secondary_land', 'pastures', 'crops', 'urban'] + cmor_vert_dim_name = "landUse" # this is why can't we have nice things + cmor_lev = cmor.axis( cmor_vert_dim_name, + coord_vals = np.array( + landuse_str_list, + dtype=f'S{len(landuse_str_list[0])}' ), + units = lev_units ) + + + elif vert_dim in ["z_l"]: + lev_bnds = create_lev_bnds( bound_these = lev, + with_these = ds['z_i'] ) + print('(rewrite_netcdf_file_var) created lev_bnds...') + print(f' lev_bnds = \n{lev_bnds}') + cmor_lev = cmor.axis( 'depth_coord', + coord_vals = lev[:], + units = lev_units, + cell_bounds = lev_bnds) elif vert_dim in ["level", "lev", "levhalf"]: # find the ps file nearby @@ -311,7 +578,7 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, if vert_dim == "levhalf": cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", coord_vals = lev[:], - units = lev.units ) + units = lev_units ) ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ap_half", axis_ids = [cmor_lev, ], @@ -325,7 +592,7 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, else: cmor_lev = cmor.axis( "alternate_hybrid_sigma", coord_vals = lev[:], - units = lev.units, + units = lev_units, cell_bounds = ds[vert_dim+"_bnds"] ) ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ap", @@ -342,15 +609,45 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}\n' f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}' ) + axis_ids = [] + if cmor_time is not None: + print(f'(rewrite_netcdf_file_var) appending cmor_time to axis_ids list...') + axis_ids.append(cmor_time) + print(f' axis_ids now = {axis_ids}') + if cmor_lat is not None: + print(f'(rewrite_netcdf_file_var) appending cmor_lat to axis_ids list...') + axis_ids.append(cmor_lat) + print(f' axis_ids now = {axis_ids}') + if cmor_lon is not None: + print(f'(rewrite_netcdf_file_var) appending cmor_lon to axis_ids list...') + axis_ids.append(cmor_lon) + print(f' axis_ids now = {axis_ids}') + ips = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ps", - axis_ids = [cmor_time, cmor_lat, cmor_lon], + axis_ids = axis_ids, #[cmor_time, cmor_lat, cmor_lon], units = "Pa" ) save_ps = True - # assign axes at end of 4-dim case - axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon] - - + print(f' DONE assigning cmor_lev') + + + axes = [] + if cmor_time is not None: + print(f'(rewrite_netcdf_file_var) appending cmor_time to axes list...') + axes.append(cmor_time) + print(f' axes now = {axes}') + if cmor_lev is not None: + print(f'(rewrite_netcdf_file_var) appending cmor_lev to axes list...') + axes.append(cmor_lev) + print(f' axes now = {axes}') + if cmor_lat is not None: + print(f'(rewrite_netcdf_file_var) appending cmor_lat to axes list...') + axes.append(cmor_lat) + print(f' axes now = {axes}') + if cmor_lon is not None: + print(f'(rewrite_netcdf_file_var) appending cmor_lon to axes list...') + axes.append(cmor_lon) + print(f' axes now = {axes}') # read positive attribute and create cmor_var? can this return none? TODO @@ -359,7 +656,6 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, cmor_var = cmor.variable(target_var, units, axes, positive = positive) # Write the output to disk - #var = ds[target_var][:] #was this ever needed? why? cmor.write(cmor_var, var) if save_ps: if any( [ ips is None, ps is None ] ): @@ -371,7 +667,6 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, cmor.close(ips, file_name = True, preserve = False) filename = cmor.close(cmor_var, file_name = True, preserve = False) print(f"(rewrite_netcdf_file_var) returned by cmor.close: filename = {filename}") - #cmor.close() ds.close() print('-------------------------- END rewrite_netcdf_file_var call -----\n\n') @@ -409,7 +704,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, #determine a tmp dir for working on files. - tmp_dir = create_tmp_dir( outdir ) + '/' + tmp_dir = create_tmp_dir( outdir, json_exp_config) + '/' print(f'(cmorize_target_var_files) will use tmp_dir={tmp_dir}') @@ -429,14 +724,14 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{local_var}.nc" print(f"(cmorize_target_var_files) nc_file_work = {nc_file_work}") - copy_nc( nc_fls[i], nc_file_work) + shutil.copy(nc_fls[i], nc_file_work) # if the ps file exists, we'll copy it to the work directory too nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', '.ps.nc') nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', '.ps.nc') if Path(nc_ps_file).exists(): print(f"(cmorize_target_var_files) nc_ps_file_work = {nc_ps_file_work}") - copy_nc(nc_ps_file, nc_ps_file_work) + shutil.copy(nc_ps_file, nc_ps_file_work) # TODO think of better way to write this kind of conditional data movement... @@ -454,19 +749,26 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, gotta_go_back_here=os.getcwd() try: - print(f"cd'ing to \n {make_cmor_write_here}" ) + print(f"(cmorize_target_var_files) WARNING changing directory to: \n {make_cmor_write_here}" ) os.chdir( make_cmor_write_here ) except: raise OSError(f'could not chdir to {make_cmor_write_here}') print ("(cmorize_target_var_files) calling rewrite_netcdf_file_var") - local_file_name = rewrite_netcdf_file_var( proj_table_vars , - local_var , - nc_file_work , - target_var , - json_exp_config , - json_table_config ) - os.chdir( gotta_go_back_here ) + try: + local_file_name = rewrite_netcdf_file_var( proj_table_vars , + local_var , + nc_file_work , + target_var , + json_exp_config , + json_table_config , nc_fls[i] ) + except Exception as exc: + raise Exception('(cmorize_target_var_files) problem with rewrite_netcdf_file_var. exc=\n' + f' {exc}\n' + ' exiting and executing finally block.') + finally: # should always execute, errors or not! + print(f'(cmorize_target_var_files) WARNING changing directory to: \n {gotta_go_back_here}') + os.chdir( gotta_go_back_here ) # now that CMOR has rewritten things... we can take our post-rewriting actions diff --git a/fre/cmor/frecmor.py b/fre/cmor/frecmor.py index e882186a..bc482ff3 100644 --- a/fre/cmor/frecmor.py +++ b/fre/cmor/frecmor.py @@ -2,8 +2,17 @@ import click +from .cmor_lister import _cmor_list_subtool from .cmor_mixer import _cmor_run_subtool +OPT_VAR_NAME_HELP="optional, specify a variable name to specifically process only filenames " + \ + "matching that variable name. I.e., this string help target local_vars, not " + \ + "target_vars." +VARLIST_HELP="path pointing to a json file containing directory of key/value pairs. " + \ + "the keys are the \'local\' names used in the filename, and the values " + \ + "pointed to by those keys are strings representing the name of the variable " + \ + "contained in targeted files. the key and value are often the same, " + \ + "but it is not required." @click.group(help=click.style(" - access fre cmor subcommands", fg=(232,91,204))) def cmor_cli(): ''' entry point to fre cmor click commands ''' @@ -16,11 +25,7 @@ def cmor_cli(): required=True) @click.option("-l", "--varlist", type=str, - help="path pointing to a json file containing directory of key/value pairs. " + \ - "the keys are the \'local\' names used in the filename, and the values " + \ - "pointed to by those keys are strings representing the name of the variable " + \ - "contained in targeted files. the key and value are often the same, " + \ - "but it is not required.", + help=VARLIST_HELP, required=True) @click.option("-r", "--table_config", type=str, @@ -40,9 +45,7 @@ def cmor_cli(): required=True) @click.option('-v', "--opt_var_name", type = str, - help="optional, specify a variable name to specifically process only filenames " + \ - "matching that variable name. I.e., this string help target local_vars, not " + \ - "target_vars.", + help=OPT_VAR_NAME_HELP, required=False) @click.pass_context def run(context, indir, varlist, table_config, exp_config, outdir, opt_var_name): @@ -62,5 +65,46 @@ def run(context, indir, varlist, table_config, exp_config, outdir, opt_var_name) # context.forward( # _cmor_run_subtool() ) + +@cmor_cli.command() +@click.option("-l", "--varlist", + type=str, + help=VARLIST_HELP, + required=False) +@click.option("-r", "--table_config_dir", + type=str, + help="directory holding MIP tables to search for variables in var list", + required=True) +@click.option('-v', "--opt_var_name", + type = str, + help=OPT_VAR_NAME_HELP, + required=False) +@click.pass_context +def list(context, varlist, table_config_dir, opt_var_name): + ''' + loop over json table files in config_dir and show which tables contain variables in var list/ + the tool will also print what that table entry is expecting of that variable as well. if given + an opt_var_name in addition to varlist, only that variable name will be printed out. + accepts 3 arguments, two of the three required. + ''' + #context.forward( _cmor_list_subtool ) + + # if opt_var_name specified, forget the list. + if opt_var_name is not None: + varlist=None + + # custom arg requirement of "one of the two or both" in click should be implemented with + # logic before calling context.invoke( , *args ) + if opt_var_name is None and varlist is None: + raise ValueError('opt_var_name and varlist cannot both be None') + + context.invoke( + _cmor_list_subtool, + json_var_list = varlist, + json_table_config_dir = table_config_dir, + opt_var_name = opt_var_name + ) + + if __name__ == "__main__": cmor_cli() diff --git a/fre/tests/test_files/CMOR_input_example.json b/fre/tests/test_files/CMOR_input_example.json index 3ce8a985..f37b85e1 100644 --- a/fre/tests/test_files/CMOR_input_example.json +++ b/fre/tests/test_files/CMOR_input_example.json @@ -70,5 +70,5 @@ "#output_path_template": "Template for output path directory using tables keys or global attributes, these should follow the relevant data reference syntax", "output_path_template": "<_member_id>", - "output_file_template": "
<_member_id>", + "output_file_template": "
<_member_id>" } diff --git a/fre/tests/test_fre_cmor_cli.py b/fre/tests/test_fre_cmor_cli.py index 2d03a1c7..fa07595f 100644 --- a/fre/tests/test_fre_cmor_cli.py +++ b/fre/tests/test_fre_cmor_cli.py @@ -151,3 +151,19 @@ def test_cli_fre_cmor_run_case2(capfd): Path(full_outputfile).exists(), Path(full_inputfile).exists() ] ) _out, _err = capfd.readouterr() + +# fre cmor list +def test_cli_fre_cmor_list(): + ''' fre cmor list ''' + result = runner.invoke(fre.fre, args=["cmor", "list"]) + assert result.exit_code == 2 + +def test_cli_fre_cmor_list_help(): + ''' fre cmor list --help ''' + result = runner.invoke(fre.fre, args=["cmor", "list", "--help"]) + assert result.exit_code == 0 + +def test_cli_fre_cmor_list_opt_dne(): + ''' fre cmor list optionDNE ''' + result = runner.invoke(fre.fre, args=["cmor", "list", "optionDNE"]) + assert result.exit_code == 2 diff --git a/run_test_file_cases.py b/run_test_file_cases.py index a588f803..9adae56a 100644 --- a/run_test_file_cases.py +++ b/run_test_file_cases.py @@ -5,7 +5,6 @@ it is for a very context-dependent set of tests for a very specific point in time. ''' - import sys import os from pathlib import Path @@ -13,15 +12,21 @@ import fre from fre.cmor.cmor_mixer import cmor_run_subtool as run_cmor + +def print_cwd(): + print(f'os.getcwd() = {os.getcwd()}') + print(f'\n\n\n\n') + def print_the_outcome(some_return,case_str): print('-----------------------------------------------------------------------------------------------------------------') if some_return != 0: - print(f'{case_str} case failed[[[FAIL -_-]]]: some_return={some_return}') + print(f'{case_str} case failed [[[ FAIL -_- ]]]: some_return={some_return}') else: - print(f'{case_str} case probably OK [[[PROB-OK ^-^]]]: some_return={some_return}') + print(f'{case_str} case probably OK [[[ PROB-OK ^-^ ]]]: some_return={some_return}') print('-----------------------------------------------------------------------------------------------------------------') - print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n') - assert some_return == 0 + print(f'\n\n\n\n\n\n\n\n\n\n') + print_cwd() + #assert some_return == 0 # global consts for these tests, with no/trivial impact on the results ROOTDIR='fre/tests/test_files' @@ -65,132 +70,164 @@ def run_cmor_RUN(filename, table, opt_var_name): return FOO_return + + + + + + ## 1) SUCCEEDs ## land, Lmon, gr1 #testfile_land_gr1_Lmon = \ -# '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/land/ts/monthly/5yr/land.005101-005512.lai.nc' +# '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/' + \ +# 'pp/land/ts/monthly/5yr/' + \ +# 'land.005101-005512.lai.nc' #try: # some_return = run_cmor_RUN(testfile_land_gr1_Lmon, 'Lmon', opt_var_name = 'lai') #except: # print(f'exception caught: exc=\n{exc}') # some_return=-1 -# pass +## pass #print_the_outcome(some_return,'land_gr1_Lmon / lai') - - +# +# ## 2) SUCCEEDs ## atmos, Amon / cl #testfile_atmos_level_cmip_gr1_Amon_complex_vert = \ -# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.196001-196412.cl.nc' +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/' + \ +# 'pp/atmos_level_cmip/ts/monthly/5yr/' + \ +# 'atmos_level_cmip.196001-196412.cl.nc' #try: # some_return = run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_complex_vert, 'Amon', opt_var_name = 'cl') #except Exception as exc: # print(f'exception caught: exc=\n{exc}') # some_return=-1 -# pass +## pass #print_the_outcome(some_return,'atmos_level_cmip_gr1_Amon_complex_vert / cl') - - +# +# ## 3) SUCCEEDs ## atmos, Amon / mc #testfile_atmos_level_cmip_gr1_Amon_fullL = \ -# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.195501-195912.mc.nc' +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/' + \ +# 'pp/atmos_level_cmip/ts/monthly/5yr/' + \ +# 'atmos_level_cmip.195501-195912.mc.nc' #try: # some_return = run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_fullL, 'Amon', opt_var_name = 'mc') #except Exception as exc: # print(f'exception caught: exc=\n{exc}') # some_return=-1 -# pass +## pass #print_the_outcome(some_return,'atmos_level_cmip_gr1_Amon_fullL / mc') - - -# 4) FAIL (no longitude coordinate case) -# atmos, Amoon / ta -# just like #1, but lack longitude -# Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / -testfile_atmos_gr1_AmonZ_nolons = \ - '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' -try: - some_return = run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') -except Exception as exc: - print(f'exception caught: exc=\n{exc}') - some_return=-1 - pass -print_the_outcome(some_return,'atmos_gr1_AmonZ_nolons / ta') - - -## 5) SUCCEEDS -## ocean, Omon / sos -#testfile_ocean_monthly_1x1deg_gr = \ -# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/ocean_monthly_1x1deg/ts/monthly/5yr/ocean_monthly_1x1deg.190001-190412.sos.nc' +# +# +## 4) SUCCEEDs (no longitude coordinate case) +## atmos, AERmonZ / ta +## just like #1, but lack longitude +#testfile_atmos_gr1_AERmonZ_nolons = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/' + \ +# 'pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/' + \ +# 'atmos_plev39_cmip.201001-201412.ta.nc' #try: -# some_return = run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') +# some_return = run_cmor_RUN(testfile_atmos_gr1_AERmonZ_nolons, 'AERmonZ', opt_var_name = 'ta') #except Exception as exc: # print(f'exception caught: exc=\n{exc}') # some_return=-1 -# pass -#print_the_outcome(some_return,'ocean_monthly_1x1deg_gr / sos') - - - -## 6) FAIL (copy_nc failure!!! WEIRD) +## pass +#print_the_outcome(some_return,'atmos_gr1_AERmonZ_nolons / ta') +# +# +## 5) SUCCEEDs ## ocean, Omon / sos -## Result - error, AttributeError: NetCDF: Attempt to define fill value when data already exists. -#testfile_ocean_monthly_gn = \ -# '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ts/monthly/5yr/ocean_monthly.002101-002512.sos.nc' +#testfile_ocean_monthly_1x1deg_gr = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/' + \ +# 'pp/ocean_monthly_1x1deg/ts/monthly/5yr/' + \ +# 'ocean_monthly_1x1deg.190001-190412.sos.nc' #try: -# some_return = run_cmor_RUN(testfile_ocean_monthly_gn, 'Omon', opt_var_name = 'sos') +# some_return = run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') #except Exception as exc: # print(f'exception caught: exc=\n{exc}') # some_return=-1 -# pass -#print_the_outcome(some_return,'ocean_monthly_gn / sos') - - - -## 7) FAIL (copy_nc failure!!! WEIRD) +## pass +#print_the_outcome(some_return,'ocean_monthly_1x1deg_gr / sos') +# +# +## 7) SUCCEEDs ## ocean, Omon / so -## Result - identical failure to #6 #testfile_ocean_monthly_z_1x1deg_gr = \ -# '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly_z_1x1deg/ts/monthly/5yr/ocean_monthly_z_1x1deg.000101-000512.so.nc' +# '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/' + \ +# 'pp/ocean_monthly_z_1x1deg/ts/monthly/5yr/' + \ +# 'ocean_monthly_z_1x1deg.000101-000512.so.nc' #try: # some_return = run_cmor_RUN(testfile_ocean_monthly_z_1x1deg_gr, 'Omon', opt_var_name = 'so') #except Exception as exc: # print(f'exception caught: exc=\n{exc}') # some_return=-1 -# pass +## pass #print_the_outcome(some_return,'ocean_monthly_z_1x1deg_gr / so') - - -# 8) FAIL (no latitude nor longitude coordinates cases) -# atmos, Amon / ch4global -# Result - error, File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lat not found in / -testfile_atmos_scalar_gn_Amon_nolon_nolat = \ - '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' -try: - some_return = run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') -except Exception as exc: - print(f'exception caught: exc=\n{exc}') - some_return=-1 - pass -print_the_outcome(some_return,'atmos_scalar_gn_Amon_nolon_nolat / ch4global') - - -## 9) FAIL (4 dimensional data with no vertical) -## Result - error, -## File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", -## line 134, in get_vertical_dimension if not (ds[dim].axis and ds[dim].axis == "Z"): -## AttributeError: NetCDF: Attribute not found +#if some_return != 0: +# print('didnt pass ocean-file test number 7. exit.') +# sys.exit() +# +# +## 8) SUCCEEDs (no latitude, nor longitude, nor vertical coordinates cases) +## atmos, Amon / ch4global +#testfile_atmos_scalar_gn_Amon_nolon_nolat = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/' + \ +# 'pp/atmos_scalar/ts/monthly/5yr/' + \ +# 'atmos_scalar.197001-197412.ch4global.nc' +#try: +# some_return = run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +## pass +#print_the_outcome(some_return,'atmos_scalar_gn_Amon_nolon_nolat / ch4global') +# +# +# +## 9) SUCCEEDs (needs coordinate variable axis with character string values) +## land, Emon / gppLut #testfile_LUmip_refined_gr1_Emon_landusedim = \ -# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/LUmip_refined/ts/monthly/5yr/LUmip_refined.185001-185412.gppLut.nc' +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/' + \ +# 'pp/LUmip_refined/ts/monthly/5yr/' + \ +# 'LUmip_refined.185001-185412.gppLut.nc' #try: # some_return = run_cmor_RUN(testfile_LUmip_refined_gr1_Emon_landusedim, 'Emon', opt_var_name = 'gppLut') #except Exception as exc: # print(f'exception caught: exc=\n{exc}') # some_return=-1 # pass -#print_the_outcome(some_return,'LUmip_refined_gr1_Emon_langusedim / gppLut') +#print_the_outcome(some_return,'LUmip_refined_gr1_Emon_landusedim / gppLut') +#if some_return != 0: +# print('didnt pass the land-file test. exit.') +# #sys.exit() + + +#### THIS CASE MAY WORK if i rewrite the ocean file correctly, effectively appending the lat/lon data from a statics file. +#### for this case, that file is: +#### '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/' + \ +#### 'pp/ocean_monthly/' + \ +#### 'ocean_monthly.static.nc' +#### and that data is stored under "geolon" and "geolat" consuming dims "x" and "y". +# 6) FAIL +# ocean, Omon / sos +# Result - error, it wants lat/lon, but only xh, yh coordinates are available +testfile_ocean_monthly_gn = \ + '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/' + \ + 'pp/ocean_monthly/ts/monthly/5yr/' + \ + 'ocean_monthly.002101-002512.sos.nc' +try: + some_return = run_cmor_RUN(testfile_ocean_monthly_gn, 'Omon', opt_var_name = 'sos') +except Exception as exc: + print(f'exception caught: exc=\n{exc}') + some_return=-1 + pass +print_the_outcome(some_return,'ocean_monthly_gn / sos') +if some_return != 0: + print('didnt pass ocean-file test 6... exit.') +# sys.exit() diff --git a/scratchwork_rewrite_sos_ocean_monthly_gn_file.py b/scratchwork_rewrite_sos_ocean_monthly_gn_file.py new file mode 100644 index 00000000..04f7167f --- /dev/null +++ b/scratchwork_rewrite_sos_ocean_monthly_gn_file.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +#import sys +#import os +from pathlib import Path + +import numpy +#from numpy.dtypes import StringDType +from netCDF4 import Dataset #, stringtochar + + + +# open netcdf file in append mode? write mode? read? +# +# +sos_gn_fin=Dataset('./tmp/.nc', mode='r') +sos_gn_fin_ncattrs=sos_gn_fin.__dict__ #dictionary + +# the target data of interest +sos_gn_var_data = sos_gn_fin.variables['sos'][:] +sos_gn_var_atts = sos_gn_fin.variables['sos'].__dict__ + +# coordinate variables, their _bnds, and their identically named dimensions +# coordinate variable == a variable with the same name as a dimension. +# pitfall: an "axis" in netcdf is not analagous to a dimension, overloaded term +bnds_coord_data = sos_gn_fin.variables['bnds'][:] +bnds_coord_atts = sos_gn_fin.variables['bnds'].__dict__ +bnds_coord_dims = sos_gn_fin.dimensions['bnds'].size + +time_coord_data = sos_gn_fin.variables['time'][:] +time_coord_atts = sos_gn_fin.variables['time'].__dict__ +time_coord_bnds = sos_gn_fin.variables['time_bnds'][:] +time_coord_bnds_atts = sos_gn_fin.variables['time_bnds'].__dict__ +#time_coord_dims = sos_gn_fin.dimensions['time'].size + +lat_coord_data = sos_gn_fin.variables['lat'][:] +lat_coord_atts = sos_gn_fin.variables['lat'].__dict__ +lat_coord_bnds = sos_gn_fin.variables['lat_bnds'][:] +lat_coord_bnds_atts = sos_gn_fin.variables['lat_bnds'].__dict__ +lat_coord_dims = sos_gn_fin.dimensions['lat'].size + +lon_coord_data = sos_gn_fin.variables['lon'][:] +lon_coord_atts = sos_gn_fin.variables['lon'].__dict__ +lon_coord_bnds = sos_gn_fin.variables['lon_bnds'][:] +lon_coord_bnds_atts = sos_gn_fin.variables['lon_bnds'].__dict__ +lon_coord_dims = sos_gn_fin.dimensions['lon'].size + +''' + we're going to essentially re-create the most important parts of the file and see if i can't make it sort of work + recall, a netCDF4 file is, basically, 4 sets of things + attributes, i.e effectively global metadata + groups, i.e. a heirarchy with nesting a lot like directories (older netcdf files only have a root group) + dimensions, i.e. a set of named-integers to define the number of divisions on an axis + variables, i.e. arrays representing data with shapes described by the dimensions in the file +''' + +# open the output file +sos_gn_fout=Dataset('./alt_sos_gn_input/PLAY_.nc',mode='w') +sos_gn_fout.setncatts(sos_gn_fin_ncattrs) + +''' + from netCDF4 python API doc, for easy referencing + createDimension(self, + dimname, size=None)... None will imply unlimited +''' +sos_gn_fout.createDimension( 'time', + None ) #time_coord_dims +sos_gn_fout.createDimension( 'bnds', + bnds_coord_dims ) + +sos_gn_fout.createDimension( 'lat', + lat_coord_dims ) +sos_gn_fout.createDimension( 'lon', + lon_coord_dims ) + + + + +''' + from netCDF4 python API doc, for easy referencing + def createVariable(self, + varname, datatype, dimensions=(), + lots others ) +''' +# easy variables first. +# bnds +sos_gn_fout.createVariable( 'bnds', sos_gn_fin.variables['bnds'].dtype, + dimensions = ( sos_gn_fout.dimensions['bnds'] ) ) +sos_gn_fout.variables['bnds'][:] = bnds_coord_data +sos_gn_fout.variables['bnds'].setncatts( bnds_coord_atts ) + +# time +sos_gn_fout.createVariable( 'time', sos_gn_fin.variables['time'].dtype, + dimensions = ( sos_gn_fout.dimensions['time'] ) ) +sos_gn_fout.variables['time'][:] = time_coord_data +sos_gn_fout.variables['time'].setncatts( time_coord_atts ) + +# time_bnds +sos_gn_fout.createVariable( 'time_bnds', sos_gn_fin.variables['time_bnds'].dtype, + fill_value = sos_gn_fin.variables['time_bnds']._FillValue, # necessary bc of unlimited + extra limited dim shape? + dimensions = ( sos_gn_fout.dimensions['time'], + sos_gn_fout.dimensions['bnds'] ) ) +sos_gn_fout.variables['time_bnds'][:] = time_coord_bnds +for att in time_coord_bnds_atts: #sos_gn_fout.variables['time_bnds'].setncatts( time_coord_bnds_atts ) + if att != '_FillValue': + sos_gn_fout.variables['time_bnds'].setncattr( att, time_coord_bnds_atts[att] ) + +# lat +sos_gn_fout.createVariable( 'lat', sos_gn_fin.variables['lat'].dtype, + dimensions = ( sos_gn_fout.dimensions['lat'] ) ) +sos_gn_fout.variables['lat'][:] = lat_coord_data +sos_gn_fout.variables['lat'].setncatts( lat_coord_atts ) + +# lat_bnds +sos_gn_fout.createVariable( 'lat_bnds', sos_gn_fin.variables['lat_bnds'].dtype, + dimensions = ( sos_gn_fout.dimensions['lat'], + sos_gn_fout.dimensions['bnds'] ) ) +sos_gn_fout.variables['lat_bnds'][:] = lat_coord_bnds +sos_gn_fout.variables['lat_bnds'].setncatts( lat_coord_bnds_atts ) + +# lon +sos_gn_fout.createVariable( 'lon', sos_gn_fin.variables['lon'].dtype, + dimensions = ( sos_gn_fout.dimensions['lon'] ) ) +sos_gn_fout.variables['lon'][:] = lon_coord_data +sos_gn_fout.variables['lon'].setncatts( lon_coord_atts ) + +# lon_bnds +sos_gn_fout.createVariable( 'lon_bnds', sos_gn_fin.variables['lon_bnds'].dtype, + dimensions = ( sos_gn_fout.dimensions['lon'], + sos_gn_fout.dimensions['bnds'] ) ) +sos_gn_fout.variables['lon_bnds'][:] = lon_coord_bnds +sos_gn_fout.variables['lon_bnds'].setncatts( lon_coord_bnds_atts ) + +# data time!!! +sos_gn_fout.createVariable( 'sos', sos_gn_fin.variables['sos'].dtype, + fill_value = sos_gn_fin.variables['sos']._FillValue, + dimensions = ( sos_gn_fout.dimensions['time'], + None, #TODO SHOULD NOT BE NONE!!!! + sos_gn_fout.dimensions['lat'], + sos_gn_fout.dimensions['lon'] ) ) +sos_gn_fout.variables['sos'][:] = sos_gn_var_data +for att in sos_gn_var_atts: + if att not in ["time_avg_info", "_FillValue"]: + sos_gn_fout.variables['sos'].setncattr(att, sos_gn_var_atts[att] ) + +sos_gn_fout.close() + +## test that the two are equivalent "quickly"... +#unmsk_sos_gn_var_data=sos_gn_var_data[~sos_gn_var_data.mask] +#unmsk_sos_gn_var_out=sos_gn_var_out[~sos_gn_var_out.mask] +#for i in range(0, len( unmsk_sos_gn_var_data ) ): +# if i%100 == 0: +# print(f'i = {i}') +# diff = unmsk_sos_gn_var_data[i] - unmsk_sos_gn_var_out[i] +# if diff > 0.: +# print(f'diff = \n {diff}')