From 3ee3dda2673e99c215c1470b061a9a63c4e6fa50 Mon Sep 17 00:00:00 2001 From: Rifky Bujana Bisri Date: Sun, 30 Jul 2023 16:51:46 -0700 Subject: [PATCH 1/4] Update .gitignore & refactor material.py --- .gitignore | 163 +++++++++++++++++++++++++++++++++++++++++++- stadata/material.py | 26 ++++--- 2 files changed, 179 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index a577810..7f4fd0a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,165 @@ dist/ stadata.egg-infostadata.egg-info stadata.egg-info/ .DS_Store -stadata/__pycache__ \ No newline at end of file +stadata/__pycache__ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/stadata/material.py b/stadata/material.py index 0b8fa9c..5fe9a61 100644 --- a/stadata/material.py +++ b/stadata/material.py @@ -1,20 +1,28 @@ import requests class Material(object): - DATA=None - CONTENT=None - def __init__(self, data): - self.DATA=data + self.DATA = data + + # Download pdf response = requests.get(data['pdf']) + # get pdf content self.CONTENT = response.content - def desc(self): + """ + Show material description + """ return self.DATA - def download(self,url): - pdf = open(url+"/"+self.DATA['title']+".pdf", 'wb') - pdf.write(self.CONTENT) - pdf.close() + def download(self, url): + """ + Download pdf file + :param url: url to save pdf file + """ + # open file in the url for writing + with open(f"{url}/{self.DATA['title']}.pdf", 'wb') as pdf: + # write pdf content to file + pdf.write(self.CONTENT) + print("Download content success") \ No newline at end of file From 6fed82e246d4c34c287832789f3cf0693ade516f Mon Sep 17 00:00:00 2001 From: Rifky Bujana Bisri Date: Sun, 30 Jul 2023 16:54:01 -0700 Subject: [PATCH 2/4] update material.py --- stadata/material.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stadata/material.py b/stadata/material.py index 5fe9a61..6d95487 100644 --- a/stadata/material.py +++ b/stadata/material.py @@ -15,7 +15,7 @@ def desc(self): """ return self.DATA - def download(self, url): + def download(self, url: str): """ Download pdf file :param url: url to save pdf file From 06d86295af6ca2ec5065e8c7143a937c05bb06b2 Mon Sep 17 00:00:00 2001 From: Rifky Bujana Bisri Date: Sun, 30 Jul 2023 17:45:11 -0700 Subject: [PATCH 3/4] refactor api request for __get_list and __get_view --- stadata/main.py | 117 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 96 insertions(+), 21 deletions(-) diff --git a/stadata/main.py b/stadata/main.py index a0ea59c..668660a 100644 --- a/stadata/main.py +++ b/stadata/main.py @@ -3,23 +3,96 @@ import pandas as pd from tqdm import tqdm import html +import json from .material import Material BASE_URL = "https://webapi.bps.go.id/v1/" +class Model(object): + """ + Object contains different request for each model + """ + + def __init__(self, **kwargs) -> None: + self.__dict__ = kwargs + + def data(self, token: str): + """ + Get data from webapi + """ + return requests.get(f"{BASE_URL}api/list/model/{self.model}" + + f"/perpage/100000/lang/{self.lang}" + + f"/domain/{self.domain}" + + f"/key/{token}" + + f"/keyword/{self.keyword}" + + f"/page/{str(self.page)}" + + f"/var/{str(self.var)}" + + (f"/th/{str(self.th)}" if self.th != '' else '')) + + def pressrelease(self, token: str): + """ + Get press release from webapi + """ + return requests.get(f"{BASE_URL}api/list/model/{self.model}" + + f"/perpage/100000/lang/{self.lang}" + + f"/domain/{self.domain}" + + f"/key/{token}" + + f"/keyword/{self.keyword}" + + f"/page/{str(self.page)}" + + (f"/month/{str(self.month)}" if self.month != '' else '') + + (f"/year/{str(self.year)}" if self.year != '' else '')) + + def publication(self, token: str): + """ + Get publication from webapi + """ + return requests.get(f"{BASE_URL}api/list/model/{self.model}" + + f"/perpage/100000/lang/{self.lang}" + + f"/domain/{self.domain}" + + f"/key/{token}" + + f"/keyword/{self.keyword}" + + f"/page/{str(self.page)}" + + (f"/month/{str(self.month)}" if self.month != '' else '') + + (f"/year/{str(self.year)}" if self.year != '' else '')) + + def other(self, token: str): + """ + Get static table from webapi + """ + return requests.get(f"{BASE_URL}api/list/model/{self.model}" + + f"/perpage/100000/lang/{self.lang}" + + f"/domain/{self.domain}" + + f"/key/{token}" + + f"/keyword/{self.keyword}" + + f"/page/{str(self.page)}") + class Client(object): """ Object to connect with webapi """ - TOKEN = "" - def __init__(self, token): + + def __init__(self, token: str): """ Initialize client object :param token: token from webapi website """ self.TOKEN = token - def __get_list(self,lang = 'ind',domain='0000',model='statictable',keyword='',page=1,var='',turvar='',vervar='',th='',turth='',month='',year=''): + def __get_list( + self, + lang:str = 'ind', + domain:str = '0000', + model:str = 'statictable', + keyword:str = '', + page:int = 1, + var:str = '', + turvar:str = '', + vervar:str = '', + th:str = '', + turth:str = '', + month:str = '', + year:str = '' + ): """ Method to get list data based on model :param lang: Language to display data. Default value: ind. Allowed values: "ind", "eng" @@ -35,28 +108,24 @@ def __get_list(self,lang = 'ind',domain='0000',model='statictable',keyword='',pa :param month: Month of publication or press release in int :param year: Year of publication or press release """ - if(model=='data'): - if(th != ''): - url_th = '/th/'f'{th}' - else: - url_th = '' - res = requests.get(f'{BASE_URL}api/list/model/'f'{model}/perpage/100000/lang/'f'{lang}/domain/'f'{domain}/key/'f'{self.TOKEN}/keyword/'f'{keyword}/page/'f'{str(page)}/var/'f'{str(var)}'f'{url_th}') - elif((model=='pressrelease')|(model=='publication')): - res = requests.get('https://webapi.bps.go.id/v1/api/list/model/'+model+'/perpage/100000/lang/'+lang+'/domain/'+domain+'/key/'+key+'/keyword/'+keyword+'/page/'+str(page)+ - (('/month/'+str(month)) if month != '' else '')+ - (('/year/'+str(year)) if year != '' else '')) + + model_request = Model(lang=lang, domain=domain, model=model, keyword=keyword, page=page, var=var, + turvar=turvar, vervar=vervar, th=th, turth=turth, month=month, year=year) + + if model in ['data', 'pressrelease', 'publication']: + res = model_request.__getattribute__(model)(self.TOKEN) else: - res = requests.get(f'{BASE_URL}api/list/model/'f'{model}/perpage/100000/lang/'f'{lang}/domain/'f'{domain}/key/'f'{self.TOKEN}/keyword/'f'{keyword}/page/'f'{str(page)}') - if(res.status_code!=200): + res = model_request.other(self.TOKEN) + + if(res.status_code != 200): warnings.warn("Connection failed") else: res = res.json() - if(res['status']!='OK'): + # if res is not OK, raise exception + if(res['status'] != 'OK'): raise Exception(res['message']) return res - - def __get_view(self,domain,model,lang,idx): """ Based Method view statictable @@ -65,12 +134,18 @@ def __get_view(self,domain,model,lang,idx): :param model: Type data to display :idx : ID static table to show """ - res = requests.get(f'{BASE_URL}api/view/model/'f'{model}/lang/'f'{lang}/domain/'f'{domain}/id/'f'{idx}/key/'+self.TOKEN+'/') - if(res.status_code!=200): + res = requests.get(f'{BASE_URL}api/view/model/{model}' + + f'/lang/{lang}' + + f'/domain/{domain}' + + f'/id/{idx}' + + f'/key/{self.TOKEN}/') + + if(res.status_code != 200): warnings.warn("Connection failed") else: res = res.json() - if(res['status']!='OK'): + # if res is not OK, raise exception + if(res['status'] != 'OK'): raise Exception(res['message']) return res From cfdd53db04df558afca6cd4e400fe417fda27da0 Mon Sep 17 00:00:00 2001 From: Rifky Bujana Bisri Date: Mon, 31 Jul 2023 21:05:23 -0700 Subject: [PATCH 4/4] refactor requests code --- stadata/main.py | 84 +++++++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/stadata/main.py b/stadata/main.py index 668660a..66be9f3 100644 --- a/stadata/main.py +++ b/stadata/main.py @@ -2,20 +2,48 @@ import warnings import pandas as pd from tqdm import tqdm +from functools import wraps import html import json from .material import Material BASE_URL = "https://webapi.bps.go.id/v1/" -class Model(object): +def validate_request(func): """ - Object contains different request for each model + Decorator to validate request + + :param func: function to be decorated + :return: decorated function + """ + + @wraps(func) + def decorated(*args, **kwargs): + if args[0].TOKEN == '': + raise Exception('Token is not set') + + res = func(*args, **kwargs) + + if(res.status_code != 200): + warnings.warn("Connection failed") + else: + res = res.json() + # if res is not OK, raise exception + if(res['status'] != 'OK'): + raise Exception(res['message']) + return res + + return decorated + +class RequestModel(object): + """ + Object contains different request for each data model """ def __init__(self, **kwargs) -> None: self.__dict__ = kwargs + @validate_request def data(self, token: str): """ Get data from webapi @@ -29,6 +57,7 @@ def data(self, token: str): f"/var/{str(self.var)}" + (f"/th/{str(self.th)}" if self.th != '' else '')) + @validate_request def pressrelease(self, token: str): """ Get press release from webapi @@ -42,6 +71,7 @@ def pressrelease(self, token: str): (f"/month/{str(self.month)}" if self.month != '' else '') + (f"/year/{str(self.year)}" if self.year != '' else '')) + @validate_request def publication(self, token: str): """ Get publication from webapi @@ -55,6 +85,7 @@ def publication(self, token: str): (f"/month/{str(self.month)}" if self.month != '' else '') + (f"/year/{str(self.year)}" if self.year != '' else '')) + @validate_request def other(self, token: str): """ Get static table from webapi @@ -65,6 +96,18 @@ def other(self, token: str): f"/key/{token}" + f"/keyword/{self.keyword}" + f"/page/{str(self.page)}") + + @validate_request + def view(self, token: str): + """ + Get view from webapi + """ + return requests.get(f"{BASE_URL}api/view/model/{self.model}" + + f"/lang/{self.lang}" + + f"/domain/{self.domain}" + + f"/id/{self.idx}" + + f"/key/{token}") + class Client(object): """ @@ -109,22 +152,15 @@ def __get_list( :param year: Year of publication or press release """ - model_request = Model(lang=lang, domain=domain, model=model, keyword=keyword, page=page, var=var, - turvar=turvar, vervar=vervar, th=th, turth=turth, month=month, year=year) + # create model request object based on the parameter + request_model = RequestModel(lang=lang, domain=domain, model=model, keyword=keyword, page=page, var=var, + turvar=turvar, vervar=vervar, th=th, turth=turth, month=month, year=year) if model in ['data', 'pressrelease', 'publication']: - res = model_request.__getattribute__(model)(self.TOKEN) - else: - res = model_request.other(self.TOKEN) - - if(res.status_code != 200): - warnings.warn("Connection failed") - else: - res = res.json() - # if res is not OK, raise exception - if(res['status'] != 'OK'): - raise Exception(res['message']) - return res + # run request based on model name + return request_model.__getattribute__(model)(self.TOKEN) + + return request_model.other(self.TOKEN) def __get_view(self,domain,model,lang,idx): """ @@ -134,20 +170,8 @@ def __get_view(self,domain,model,lang,idx): :param model: Type data to display :idx : ID static table to show """ - res = requests.get(f'{BASE_URL}api/view/model/{model}' + - f'/lang/{lang}' + - f'/domain/{domain}' + - f'/id/{idx}' + - f'/key/{self.TOKEN}/') - - if(res.status_code != 200): - warnings.warn("Connection failed") - else: - res = res.json() - # if res is not OK, raise exception - if(res['status'] != 'OK'): - raise Exception(res['message']) - return res + requests_model = RequestModel(lang=lang, domain=domain, model=model, idx=idx) + return requests_model.view(self.TOKEN) def __format_list(self,list): list['domain'] = list['domain'].map('{0:0>4}'.format)