From 01451b318ccc4f5aebf77c8e936c72ae192501f9 Mon Sep 17 00:00:00 2001 From: Justin Calamari Date: Tue, 30 Jan 2018 01:48:19 -0500 Subject: [PATCH 1/2] Add docstrings for databasesearcher and datapublisher --- sampledb/databasesearcher.py | 58 +++++++++++++++++++++++++----- sampledb/datapublisher.py | 70 ++++++++++++++++++++++++++++++------ sampledb/sampledatabase.py | 44 +++++++++++++++++++++++ sampledb/searchresult.py | 41 ++++++++++++++++----- 4 files changed, 186 insertions(+), 27 deletions(-) diff --git a/sampledb/databasesearcher.py b/sampledb/databasesearcher.py index 959bf02..5c98818 100644 --- a/sampledb/databasesearcher.py +++ b/sampledb/databasesearcher.py @@ -5,19 +5,37 @@ class DatabaseSearcher(object): """ - Seach a database. + Seach a MongoDB database. """ def __init__(self, collection): - """ - Create a DatabaseSearcher. + """Create a DatabaseSearcher. + + Parameters + ---------- + collection : pymongo.collection.Collection + The MongoDB collection to search. + + Returns + ------- + DatabaseSearcher + A DatabaseSearcher object that searches the input MongoDB collection. """ self.collection = collection @classmethod def parse_date(cls, date): - """ - Convert a string in 'YYYY-MM-DD' format to a datetime object. + """Convert a string in 'YYYY-MM-DD' format to a datetime object. + + Parameters + ---------- + date : str + A date in 'YYYY-MM-DD' format. + + Returns + ------- + datetime + The input date as a datetime object. """ date = date.split('-') date = [int(i) for i in date] @@ -25,6 +43,20 @@ def parse_date(cls, date): @classmethod def date_range(cls, startdate=None, enddate=None): + """Return a MongoDB query for entries between two dates. + + Parameters + ---------- + startdate : str, optional + Search for entries on or after this date, given in 'YYYY-MM-DD' format. Default is None. + enddate : str, optional + Search for entries on or before this date, given in 'YYYY-MM-DD' format. Default is None. + + Returns + ------- + dict + A MongoDB style query for entries between the two given dates. + """ range_ = {} if startdate: start = cls.parse_date(startdate) @@ -39,9 +71,19 @@ def date_range(cls, startdate=None, enddate=None): return {} def search(self, **kwargs): - """ - Search the database for entries with the specified key, value pairs. - Returns a cursor with the results. + """Search the database for entries with the specified key, value pairs. Returns a cursor with the results. + + Parameters + ---------- + startdate : str + A date in 'YYYY-MM-DD' format. Search for entries on or after this date. + enddate : str + A date in 'YYYY-MM-DD' format. Search for entries on or before this date. + + Returns + ------- + SearchResult + The entries matching the input query. """ query = kwargs if 'uid' in kwargs and isinstance(kwargs['uid'], list): diff --git a/sampledb/datapublisher.py b/sampledb/datapublisher.py index 4e8b79f..50adbfe 100644 --- a/sampledb/datapublisher.py +++ b/sampledb/datapublisher.py @@ -7,18 +7,42 @@ class DataPublisher(object): """ - Publish data to a database. + Publish data from a spreadsheet to a MongoDB database. """ def __init__(self, collection, schema={}): - """ - Create a DataPublisher. + """Create a DataPublisher. + + Parameters + ---------- + collection : pymongo.collection.Collection + The MongoDB collection to which data is published. + schema : dict, optional + A json schema against which data is validated. Defaults to an empty dict, which accepts all data. + + Returns + ------- + DataPublisher + A DataPublisher object that publishes to the input MongoDB collection. """ self.collection = collection self.schema = schema @classmethod def get_SAF(cls, filename): + """Get the SAF number of the samples in the spreadsheet if it is in the filename. + + Parameters + ---------- + filename : str + The name of the spreadsheet containing the data to be published. + + Returns + ------- + str + The SAF number of the data in the spreadsheet, or None if it cannot be found. + """ + splt = filename.split('_') if len(splt) != 2: return None @@ -28,9 +52,16 @@ def get_SAF(cls, filename): @classmethod def parse_sheet(cls, sheet): - """ - Converts each row in a sheet to a dictionary. - Returns a list of the dictionaries. + """Converts each row in a single sheet of a workbook to a dictionary. + + Parameters + ---------- + sheet : pandas.core.frame.DataFrame + + Returns + ------- + list of dict + A list of dictionaries of data for each sample in the sheet. """ keys = {} for key in sheet.columns: @@ -53,9 +84,17 @@ def parse_sheet(cls, sheet): @classmethod def parse_wb(cls, wb): - """ - Converts each row in all sheets of a workbook to a dictionary. + """Converts each row in all sheets of a workbook to a dictionary. Returns a list of the dictionaries. + + Parameters + ---------- + wb : pandas.io.excel.ExcelFile + + Returns + ------- + list of dict + A list of dictionaries of data for each sample in a workbook. """ samples = [] @@ -65,11 +104,22 @@ def parse_wb(cls, wb): return samples def get_schema(self): + """Return the schema against which this DataPublisher validates. + + Returns + ------- + dict + The json schema against which this DataPublisher validates. + """ return self.schema def publish(self, filename): - """ - Publish a spreadsheet to the database. + """Publish a spreadsheet to the database. + + Parameters + ---------- + filename : str + The name of the spreadsheet containing the data to be published. """ saf = self.get_SAF(filename) wb = pd.ExcelFile(filename) diff --git a/sampledb/sampledatabase.py b/sampledb/sampledatabase.py index 6fab4f6..62b2fdb 100644 --- a/sampledb/sampledatabase.py +++ b/sampledb/sampledatabase.py @@ -6,23 +6,67 @@ class SampleDatabase(object): + """ + """ def __init__(self, hostname=None, db='sampleDB', collection='samples'): + """ + Parameters + ---------- + + Returns + ------- + """ c = MongoClient(hostname) collection = c[db][collection] self.searcher = DatabaseSearcher(collection) self.publisher = DataPublisher(collection) def load_schema(self, schema_file): + """ + Parameters + ---------- + + Returns + ------- + """ with open(schema_file) as sch: schema = json.load(sch) self.publisher.schema = schema def get_schema(self): + """Return the schema against which this DataPublisher validates. + + Returns + ------- + dict + The json schema against which this DataPublisher validates. + """ return self.publisher.get_schema() def search(self, **kwargs): + """Search the database for entries with the specified key, value pairs. Returns a cursor with the results. + + Parameters + ---------- + startdate : str + A date in 'YYYY-MM-DD' format. Search for entries on or after this date. + enddate : str + A date in 'YYYY-MM-DD' format. Search for entries on or before this date. + + Returns + ------- + SearchResult + The entries matching the input query. + """ return self.searcher.search(**kwargs) def publish(self, filename): + """Publish a spreadsheet to the database. + + Parameters + ---------- + filename : str + The name of the spreadsheet containing the data to be published. + """ self.publisher.publish(filename) diff --git a/sampledb/searchresult.py b/sampledb/searchresult.py index c828d64..3fcb1e4 100644 --- a/sampledb/searchresult.py +++ b/sampledb/searchresult.py @@ -10,8 +10,13 @@ class SearchResult(object): """ def __init__(self, results): - """ - Create a SearchResult. + """Create a SearchResult. + + Parameters + ---------- + + Returns + ------- """ self.results = pd.DataFrame(results) if self.results.size == 0: @@ -34,22 +39,40 @@ def __ne__(self, other): return not self.__eq__(other) def count(self): - """ - Returns the number of samples that match the search. + """Get the number of samples that match the search. + + Parameters + ---------- + + Returns + ------- + int + The number of samples that match the search. """ return len(self.results) def filter(self, indices): - """ - Filter the search results. - Returns new SearchResult with only the filtered results. + """Filter the search results. + + Parameters + ---------- + + Returns + ------- + SearchResult + A new SearchResult object with only the filtered results. """ df = self.results.filter(items=indices, axis=0) return SearchResult(df.reset_index(drop=True)) def download(self, filename): - """ - Download the search results as a spreadsheet. + """Download the search results as a spreadsheet. + + Parameters + ---------- + + Returns + ------- """ frames = [] for name in self.results: From 57bb93c76505505525cb7fabc010483ea2d12cd8 Mon Sep 17 00:00:00 2001 From: Justin Calamari Date: Fri, 2 Feb 2018 13:42:40 -0500 Subject: [PATCH 2/2] DOC: add documentation for sampledatabase.py and searchresult.py --- sampledb/sampledatabase.py | 19 +++++++++++++++++-- sampledb/searchresult.py | 15 ++++++++------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/sampledb/sampledatabase.py b/sampledb/sampledatabase.py index 62b2fdb..98836c3 100644 --- a/sampledb/sampledatabase.py +++ b/sampledb/sampledatabase.py @@ -7,15 +7,25 @@ class SampleDatabase(object): """ + Search and publish data to a MongoDB database. """ def __init__(self, hostname=None, db='sampleDB', collection='samples'): - """ + """Create a SampleDatabase. + Parameters ---------- + hostname : str, optional + The hostname of IP address of the server hosting the database. Defaults to None, which is equivalent to localhost. + db : str, optional + The name of the MongoDB database. Defaults to 'sampleDB'. + collection : str, optional + The name of the MongoDB collection. Defaults to 'samples'. Returns ------- + SampleDatabase + A SampleDatabase object for the specified MongoDB collection. """ c = MongoClient(hostname) collection = c[db][collection] @@ -23,12 +33,17 @@ def __init__(self, hostname=None, db='sampleDB', collection='samples'): self.publisher = DataPublisher(collection) def load_schema(self, schema_file): - """ + """Loads a json schema from a specified file as a dict. + Parameters ---------- + schema_file : str + The name of the json schema file. Returns ------- + dict + The json schema as a dict. """ with open(schema_file) as sch: schema = json.load(sch) diff --git a/sampledb/searchresult.py b/sampledb/searchresult.py index 3fcb1e4..0d9575f 100644 --- a/sampledb/searchresult.py +++ b/sampledb/searchresult.py @@ -6,7 +6,7 @@ class SearchResult(object): """ - An object containing the matching results of a search on the database. + An object containing the results of a search on the database. """ def __init__(self, results): @@ -14,9 +14,12 @@ def __init__(self, results): Parameters ---------- + results : list of dict or pandas.DataFrame Returns ------- + SearchResult + A SearchResult object for the input results. """ self.results = pd.DataFrame(results) if self.results.size == 0: @@ -41,9 +44,6 @@ def __ne__(self, other): def count(self): """Get the number of samples that match the search. - Parameters - ---------- - Returns ------- int @@ -56,6 +56,8 @@ def filter(self, indices): Parameters ---------- + indices : list of int + A list of indices of the entries to keep. Returns ------- @@ -70,9 +72,8 @@ def download(self, filename): Parameters ---------- - - Returns - ------- + filename : str + The name of the spreadsheet to write the results to. """ frames = [] for name in self.results: