Skip to content
This repository has been archived by the owner on Oct 9, 2019. It is now read-only.

Commit

Permalink
Merge pull request #24 from benjwadams/add_climatology_check
Browse files Browse the repository at this point in the history
Adds climatology check
  • Loading branch information
lukecampbell committed Jan 13, 2015
2 parents 4e1d270 + 6ba57fb commit 2856139
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 0 deletions.
37 changes: 37 additions & 0 deletions ioos_qartod/qc_tests/qc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pyproj
import quantities as q
import pandas as pd
import multiprocessing


class QCFlags:
Expand Down Expand Up @@ -95,6 +97,41 @@ def range_check(arr, sensor_span, user_span=None, prev_qc=None):
return flag_arr


def _process_time_chunk(value_pairs):
"""Takes values and thresholds for climatologies
and returns whether passing or not, or returns UNKNOWN
if the threshold is None."""
vals = value_pairs[0]
threshold = value_pairs[1]
if threshold is not None:
return ((vals >= threshold[0]) &
(vals <= threshold[1])).astype('i4')
else:
return pd.Series(np.repeat(QCFlags.UNKNOWN, len(vals)), vals.index,
dtype='i4')


@add_qartod_ident(5, 'Climatology Test')
def climatology_check(time_series, clim_table, group_function):
"""
Takes a pandas time series, a dict of 2-tuples with (low, high) thresholds
as values, and a grouping function to group the time series into bins which
correspond to the climatology lookup table. Flags data within
the threshold as good data and data lying outside of it as bad. Data for
which climatology values do not exist (i.e. no entry to look up in the dict)
will be flagged as Unknown/not evaluated.
"""
grouped_data = time_series.groupby(group_function)
vals = [(g, clim_table.get(grp_val)) for (grp_val, g) in grouped_data]
# should speed up processing of climatologies
pool = multiprocessing.Pool()
chunks = pool.map(_process_time_chunk, vals)
res = pd.concat(chunks)
#replace 0s from boolean with suspect values
res[res == 0] = QCFlags.SUSPECT
return res


@add_qartod_ident(6, 'Spike Test')
def spike_check(arr, low_thresh, high_thresh, prev_qc=None):
"""
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
numpy
pandas
quantities
pyproj
nose
19 changes: 19 additions & 0 deletions test/test_qartod_qc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import numpy.testing as npt
import pandas as pd
from ioos_qartod.qc_tests import qc
from ioos_qartod.qc_tests.qc import QCFlags
import unittest
Expand Down Expand Up @@ -41,6 +42,24 @@ def test_range_check_mixed(self):
npt.assert_array_equal(qc.range_check(vals, sensor_span, user_span),
np.array([4, 4, 3, 3, 1, 1, 1, 3, 3, 4]))

def test_climatology_check(self):
# 14 vals - 2010-01-03 to 2010-04-04
dates = pd.date_range('2010-01-01', '2010-04-10', freq='W')
# monthly vals
monthly_clim = {1: (6.0, 10), 2: (1.4, 6.4), 3: (4.2, 13.0)}
ts = pd.Series(np.array([12.1, 9.0, 1.3, 6.2, 9.9, # Jan
1.6, 2.0, 9.0, 4.0, # Feb
5.0, 5.5, 10.6, 16.0, # Mar
17.2 # Apr
]), dates)
expected = np.array([3, 1, 3, 1, 1, # Jan
1, 1, 3, 1, # Feb
1, 1, 1, 3, # Mar
2], # Apr should be unknown as w/o clim value
dtype='i4')
results = qc.climatology_check(ts, monthly_clim, lambda t: t.month)
npt.assert_array_equal(results, expected)

def test_overlapping_threshold_ranges(self):
"""
Test to see if overlapping sensor and user ranges will throw an
Expand Down

0 comments on commit 2856139

Please sign in to comment.