From 1f3fba93389be93d180abc4957f9489cc77d36ab Mon Sep 17 00:00:00 2001 From: Emilio Mayorga Date: Tue, 7 Mar 2017 20:48:33 -0800 Subject: [PATCH 1/2] Added kwargs to hads collect method, to support optional, user-driven verify=False option to be passed to requests (to handle failing SSL cert) --- pyoos/collectors/hads/hads.py | 46 +++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/pyoos/collectors/hads/hads.py b/pyoos/collectors/hads/hads.py index da3f53a..eb7781e 100644 --- a/pyoos/collectors/hads/hads.py +++ b/pyoos/collectors/hads/hads.py @@ -1,5 +1,3 @@ -from __future__ import (absolute_import, division, print_function) - import os.path import re import requests @@ -75,7 +73,7 @@ def _list_variables(self, station_codes): 'sinceday' : -1}) resp.raise_for_status() - list(map(variables.add, rvar.findall(resp.text))) + map(variables.add, rvar.findall(resp.text)) return variables def list_features(self): @@ -84,23 +82,23 @@ def list_features(self): return station_codes - def collect(self): + def collect(self, **kwargs): var_filter = None if hasattr(self, '_variables'): var_filter = self._variables time_extents = (self.start_time if hasattr(self, 'start_time') else None, self.end_time if hasattr(self, 'end_time') else None) - metadata, raw_data = self.raw() + metadata, raw_data = self.raw(**kwargs) return self.parser.parse(metadata, raw_data, var_filter, time_extents) - def raw(self, format=None): + def raw(self, format=None, **kwargs): """ Returns a tuple of (metadata, raw data) """ station_codes = self._apply_features_filter(self._get_station_codes()) - metadata = self._get_metadata(station_codes) - raw_data = self._get_raw_data(station_codes) + metadata = self._get_metadata(station_codes, **kwargs) + raw_data = self._get_raw_data(station_codes, **kwargs) return (metadata, raw_data) @@ -116,12 +114,18 @@ def _apply_features_filter(self, station_codes): return station_codes - def _get_metadata(self, station_codes): + def _get_metadata(self, station_codes, **kwargs): + if 'verify' in kwargs: + verify_cert = kwargs['verify'] + else: + verify_cert = True # the default for requests + resp = requests.post(self.metadata_url, data={'state' : 'nil', 'hsa' : 'nil', 'of' : '1', 'extraids' : " ".join(station_codes), - 'data' : "Get Meta Data"}) + 'data' : "Get Meta Data"}, + verify=verify_cert) resp.raise_for_status() return resp.text @@ -141,8 +145,8 @@ def _get_station_codes(self, force=False): if self.bbox: with collection(os.path.join("resources", "ne_50m_admin_1_states_provinces_lakes_shp.shp"), "r") as c: - geom_matches = [x['properties'] for x in c.filter(bbox=self.bbox)] - state_matches = [x['postal'] if x['admin'] != 'Canada' else 'CN' for x in geom_matches] + geom_matches = map(lambda x: x['properties'], c.filter(bbox=self.bbox)) + state_matches = map(lambda x: x['postal'] if x['admin'] != 'Canada' else u'CN', geom_matches) self.station_codes = [] @@ -155,7 +159,7 @@ def _get_station_codes(self, force=False): self.station_codes.extend(self._get_stations_for_state(state_url)) if self.bbox: - # retreive metadata for all stations to properly filter them + # retrieve metadata for all stations to properly filter them metadata = self._get_metadata(self.station_codes) parsed_metadata = self.parser._parse_metadata(metadata) @@ -165,20 +169,25 @@ def in_bbox(code): return lon >= self.bbox[0] and lon <= self.bbox[2] and lat >= self.bbox[1] and lat <= self.bbox[3] - self.station_codes = list(filter(in_bbox, self.station_codes)) + self.station_codes = filter(in_bbox, self.station_codes) return self.station_codes def _get_state_urls(self): root = BeautifulSoup(requests.get(self.states_url).text) areas = root.find_all("area") - return list(set([x.attrs.get('href', None) for x in areas])) + return list(set(map(lambda x: x.attrs.get('href', None), areas))) def _get_stations_for_state(self, state_url): state_root = BeautifulSoup(requests.get(state_url).text) - return [x for x in [x.attrs['href'].split("nesdis_id=")[-1] for x in state_root.find_all('a')] if len(x) > 0] + return filter(lambda x: len(x) > 0, map(lambda x: x.attrs['href'].split("nesdis_id=")[-1], state_root.find_all('a'))) + + def _get_raw_data(self, station_codes, **kwargs): + if 'verify' in kwargs: + verify_cert = kwargs['verify'] + else: + verify_cert = True # the default for requests - def _get_raw_data(self, station_codes): since = 7 if hasattr(self, 'start_time') and self.start_time is not None: # calc delta between now and start_time @@ -196,7 +205,8 @@ def _get_raw_data(self, station_codes): 'hsa' : 'nil', 'of' : '1', 'extraids' : " ".join(station_codes), - 'sinceday' : since}) + 'sinceday' : since}, + verify=verify_cert) resp.raise_for_status() return resp.text From bf2a16242b2f876aaf72a67ef51b27d995120a6e Mon Sep 17 00:00:00 2001 From: Emilio Mayorga Date: Thu, 9 Mar 2017 14:04:30 -0800 Subject: [PATCH 2/2] Removed hads collector old code that had crept into my kwargs/verify commits; restored those changes to current master --- pyoos/collectors/hads/hads.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pyoos/collectors/hads/hads.py b/pyoos/collectors/hads/hads.py index eb7781e..81c0655 100644 --- a/pyoos/collectors/hads/hads.py +++ b/pyoos/collectors/hads/hads.py @@ -1,3 +1,5 @@ +from __future__ import (absolute_import, division, print_function) + import os.path import re import requests @@ -73,7 +75,7 @@ def _list_variables(self, station_codes): 'sinceday' : -1}) resp.raise_for_status() - map(variables.add, rvar.findall(resp.text)) + list(map(variables.add, rvar.findall(resp.text))) return variables def list_features(self): @@ -145,8 +147,8 @@ def _get_station_codes(self, force=False): if self.bbox: with collection(os.path.join("resources", "ne_50m_admin_1_states_provinces_lakes_shp.shp"), "r") as c: - geom_matches = map(lambda x: x['properties'], c.filter(bbox=self.bbox)) - state_matches = map(lambda x: x['postal'] if x['admin'] != 'Canada' else u'CN', geom_matches) + geom_matches = [x['properties'] for x in c.filter(bbox=self.bbox)] + state_matches = [x['postal'] if x['admin'] != 'Canada' else 'CN' for x in geom_matches] self.station_codes = [] @@ -169,18 +171,18 @@ def in_bbox(code): return lon >= self.bbox[0] and lon <= self.bbox[2] and lat >= self.bbox[1] and lat <= self.bbox[3] - self.station_codes = filter(in_bbox, self.station_codes) + self.station_codes = list(filter(in_bbox, self.station_codes)) return self.station_codes def _get_state_urls(self): root = BeautifulSoup(requests.get(self.states_url).text) areas = root.find_all("area") - return list(set(map(lambda x: x.attrs.get('href', None), areas))) + return list(set([x.attrs.get('href', None) for x in areas])) def _get_stations_for_state(self, state_url): state_root = BeautifulSoup(requests.get(state_url).text) - return filter(lambda x: len(x) > 0, map(lambda x: x.attrs['href'].split("nesdis_id=")[-1], state_root.find_all('a'))) + return [x for x in [x.attrs['href'].split("nesdis_id=")[-1] for x in state_root.find_all('a')] if len(x) > 0] def _get_raw_data(self, station_codes, **kwargs): if 'verify' in kwargs: