From 368ffbdb61357bb8b87e452a1cd962a2c9d34b7a Mon Sep 17 00:00:00 2001 From: "Christopher J. Wright" Date: Mon, 16 Nov 2020 11:07:12 -0500 Subject: [PATCH 1/2] add mechanism for getting and caching libcflib data --- libcflib/expose_data.py | 34 ++++++++++++++++++++++++++++++++++ tests/test_expose_data.py | 16 ++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 libcflib/expose_data.py create mode 100644 tests/test_expose_data.py diff --git a/libcflib/expose_data.py b/libcflib/expose_data.py new file mode 100644 index 0000000..ba21c8c --- /dev/null +++ b/libcflib/expose_data.py @@ -0,0 +1,34 @@ +import pathlib +from requests_cache import CachedSession +from ruamel import yaml + + +class CachedData: + IMPORT_MAP_URL_TEMPLATE = 'https://raw.githubusercontent.com/regro/libcfgraph/master/import_maps/{import_first_two_letters}.json' + FILE_LISTING_URL = 'https://raw.githubusercontent.com/regro/libcfgraph/master/.file_listing.json' + HUBS_AUTHS_URL = 'https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/ranked_hubs_authorities.json' + DEFAULT_SESSION_SETTINGS = { + 'cache_name': str(pathlib.Path('~/.config/libcflib/cache_session.sqlite').absolute()), + 'backend': 'sqlite', + 'exire_after': 60*60*1 + } + + def __init__(self, session_kwargs=None): + if session_kwargs is None: + p = pathlib.Path('~/.config/libcflib/cache_session_kwargs.yaml').absolute() + if p.exists(): + session_kwargs = yaml.load(p.open()) + else: + session_kwargs = {} + _session_kwargs = self.DEFAULT_SESSION_SETTINGS.copy() + _session_kwargs.update(session_kwargs) + p = pathlib.Path(_session_kwargs['cache_name']) + p.mkdir(parents=True, exist_ok=True) + p.touch() + self.session = CachedSession(**_session_kwargs) + + def _get(self, url): + return self.session.get(url).json() + + def get_import_map(self, first_two_letters): + return self._get(self.IMPORT_MAP_URL_TEMPLATE.format(import_first_two_letters=first_two_letters)) diff --git a/tests/test_expose_data.py b/tests/test_expose_data.py new file mode 100644 index 0000000..7ac3d33 --- /dev/null +++ b/tests/test_expose_data.py @@ -0,0 +1,16 @@ +import pathlib + +from libcflib.expose_data import CachedData + + +def test_get_import_map(tmpdir): + cd = CachedData(session_kwargs={ + 'cache_name': str(pathlib.Path(tmpdir) / pathlib.Path('.config/libcflib/cache_session.sqlite')), + 'backend': 'sqlite', + 'exire_after': 60*60*1 + }) + first_two_letters = 'ma' + data = cd.get_import_map(first_two_letters) + assert 'matplotlib' in data + url = cd.IMPORT_MAP_URL_TEMPLATE.format(import_first_two_letters=first_two_letters) + assert cd.session.cache.has_url(url) From e444476559bfeaefcb2f3e9a6008024906c64bab Mon Sep 17 00:00:00 2001 From: "Christopher J. Wright" Date: Mon, 16 Nov 2020 11:21:07 -0500 Subject: [PATCH 2/2] black --- libcflib/expose_data.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/libcflib/expose_data.py b/libcflib/expose_data.py index ba21c8c..b73da83 100644 --- a/libcflib/expose_data.py +++ b/libcflib/expose_data.py @@ -4,25 +4,29 @@ class CachedData: - IMPORT_MAP_URL_TEMPLATE = 'https://raw.githubusercontent.com/regro/libcfgraph/master/import_maps/{import_first_two_letters}.json' - FILE_LISTING_URL = 'https://raw.githubusercontent.com/regro/libcfgraph/master/.file_listing.json' - HUBS_AUTHS_URL = 'https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/ranked_hubs_authorities.json' + IMPORT_MAP_URL_TEMPLATE = "https://raw.githubusercontent.com/regro/libcfgraph/master/import_maps/{import_first_two_letters}.json" + FILE_LISTING_URL = ( + "https://raw.githubusercontent.com/regro/libcfgraph/master/.file_listing.json" + ) + HUBS_AUTHS_URL = "https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/ranked_hubs_authorities.json" DEFAULT_SESSION_SETTINGS = { - 'cache_name': str(pathlib.Path('~/.config/libcflib/cache_session.sqlite').absolute()), - 'backend': 'sqlite', - 'exire_after': 60*60*1 + "cache_name": str( + pathlib.Path("~/.config/libcflib/cache_session.sqlite").absolute() + ), + "backend": "sqlite", + "exire_after": 60 * 60 * 1, } def __init__(self, session_kwargs=None): if session_kwargs is None: - p = pathlib.Path('~/.config/libcflib/cache_session_kwargs.yaml').absolute() + p = pathlib.Path("~/.config/libcflib/cache_session_kwargs.yaml").absolute() if p.exists(): session_kwargs = yaml.load(p.open()) else: session_kwargs = {} _session_kwargs = self.DEFAULT_SESSION_SETTINGS.copy() _session_kwargs.update(session_kwargs) - p = pathlib.Path(_session_kwargs['cache_name']) + p = pathlib.Path(_session_kwargs["cache_name"]) p.mkdir(parents=True, exist_ok=True) p.touch() self.session = CachedSession(**_session_kwargs) @@ -31,4 +35,8 @@ def _get(self, url): return self.session.get(url).json() def get_import_map(self, first_two_letters): - return self._get(self.IMPORT_MAP_URL_TEMPLATE.format(import_first_two_letters=first_two_letters)) + return self._get( + self.IMPORT_MAP_URL_TEMPLATE.format( + import_first_two_letters=first_two_letters + ) + )