From 01451b318ccc4f5aebf77c8e936c72ae192501f9 Mon Sep 17 00:00:00 2001
From: Justin Calamari <justin.calamari@gmail.com>
Date: Tue, 30 Jan 2018 01:48:19 -0500
Subject: [PATCH 1/2] Add docstrings for databasesearcher and datapublisher

---
 sampledb/databasesearcher.py | 58 +++++++++++++++++++++++++-----
 sampledb/datapublisher.py    | 70 ++++++++++++++++++++++++++++++------
 sampledb/sampledatabase.py   | 44 +++++++++++++++++++++++
 sampledb/searchresult.py     | 41 ++++++++++++++++-----
 4 files changed, 186 insertions(+), 27 deletions(-)

diff --git a/sampledb/databasesearcher.py b/sampledb/databasesearcher.py
index 959bf02..5c98818 100644
--- a/sampledb/databasesearcher.py
+++ b/sampledb/databasesearcher.py
@@ -5,19 +5,37 @@
 
 class DatabaseSearcher(object):
     """
-    Seach a database.
+    Seach a MongoDB database.
     """
 
     def __init__(self, collection):
-        """
-        Create a DatabaseSearcher.
+        """Create a DatabaseSearcher.
+
+        Parameters
+        ----------
+        collection : pymongo.collection.Collection
+            The MongoDB collection to search.
+
+        Returns
+        -------
+        DatabaseSearcher
+            A DatabaseSearcher object that searches the input MongoDB collection.
         """
         self.collection = collection
 
     @classmethod
     def parse_date(cls, date):
-        """
-        Convert a string in 'YYYY-MM-DD' format to a datetime object.
+        """Convert a string in 'YYYY-MM-DD' format to a datetime object.
+
+        Parameters
+        ----------
+        date : str
+            A date in 'YYYY-MM-DD' format.
+
+        Returns
+        -------
+        datetime
+            The input date as a datetime object.
         """
         date = date.split('-')
         date = [int(i) for i in date]
@@ -25,6 +43,20 @@ def parse_date(cls, date):
 
     @classmethod
     def date_range(cls, startdate=None, enddate=None):
+        """Return a MongoDB query for entries between two dates.
+
+        Parameters
+        ----------
+        startdate : str, optional
+            Search for entries on or after this date, given in 'YYYY-MM-DD' format. Default is None.
+        enddate : str, optional
+            Search for entries on or before this date, given in 'YYYY-MM-DD' format. Default is None.
+
+        Returns
+        -------
+        dict
+            A MongoDB style query for entries between the two given dates.
+        """
         range_ = {}
         if startdate:
             start = cls.parse_date(startdate)
@@ -39,9 +71,19 @@ def date_range(cls, startdate=None, enddate=None):
             return {}
 
     def search(self, **kwargs):
-        """
-        Search the database for entries with the specified key, value pairs.
-        Returns a cursor with the results.
+        """Search the database for entries with the specified key, value pairs. Returns a cursor with the results.
+        
+        Parameters
+        ----------
+        startdate : str
+            A date in 'YYYY-MM-DD' format. Search for entries on or after this date.
+        enddate : str
+            A date in 'YYYY-MM-DD' format. Search for entries on or before this date.
+
+        Returns
+        -------
+        SearchResult
+            The entries matching the input query.
         """
         query = kwargs
         if 'uid' in kwargs and isinstance(kwargs['uid'], list):
diff --git a/sampledb/datapublisher.py b/sampledb/datapublisher.py
index 4e8b79f..50adbfe 100644
--- a/sampledb/datapublisher.py
+++ b/sampledb/datapublisher.py
@@ -7,18 +7,42 @@
 
 class DataPublisher(object):
     """
-    Publish data to a database.
+    Publish data from a spreadsheet to a MongoDB database.
     """
 
     def __init__(self, collection, schema={}):
-        """
-        Create a DataPublisher.
+        """Create a DataPublisher.
+
+        Parameters
+        ----------
+        collection : pymongo.collection.Collection
+            The MongoDB collection to which data is published.
+        schema : dict, optional
+            A json schema against which data is validated. Defaults to an empty dict, which accepts all data.
+
+        Returns
+        -------
+        DataPublisher
+            A DataPublisher object that publishes to the input MongoDB collection.
         """
         self.collection = collection
         self.schema = schema
 
     @classmethod
     def get_SAF(cls, filename):
+        """Get the SAF number of the samples in the spreadsheet if it is in the filename.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the spreadsheet containing the data to be published.
+
+        Returns
+        -------
+        str
+            The SAF number of the data in the spreadsheet, or None if it cannot be found.
+        """
+
         splt = filename.split('_')
         if len(splt) != 2:
             return None
@@ -28,9 +52,16 @@ def get_SAF(cls, filename):
 
     @classmethod
     def parse_sheet(cls, sheet):
-        """
-        Converts each row in a sheet to a dictionary.
-        Returns a list of the dictionaries.
+        """Converts each row in a single sheet of a workbook to a dictionary.
+
+        Parameters
+        ----------
+        sheet : pandas.core.frame.DataFrame
+
+        Returns
+        -------
+        list of dict
+            A list of dictionaries of data for each sample in the sheet.
         """
         keys = {}
         for key in sheet.columns:
@@ -53,9 +84,17 @@ def parse_sheet(cls, sheet):
 
     @classmethod
     def parse_wb(cls, wb):
-        """
-        Converts each row in all sheets of a workbook to a dictionary.
+        """Converts each row in all sheets of a workbook to a dictionary.
         Returns a list of the dictionaries.
+
+        Parameters
+        ----------
+        wb : pandas.io.excel.ExcelFile
+
+        Returns
+        -------
+        list of dict
+            A list of dictionaries of data for each sample in a workbook.
         """
         samples = []
 
@@ -65,11 +104,22 @@ def parse_wb(cls, wb):
         return samples
 
     def get_schema(self):
+        """Return the schema against which this DataPublisher validates.
+
+        Returns
+        -------
+        dict
+            The json schema against which this DataPublisher validates.
+        """
         return self.schema
 
     def publish(self, filename):
-        """
-        Publish a spreadsheet to the database.
+        """Publish a spreadsheet to the database.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the spreadsheet containing the data to be published.
         """
         saf = self.get_SAF(filename)
         wb = pd.ExcelFile(filename)
diff --git a/sampledb/sampledatabase.py b/sampledb/sampledatabase.py
index 6fab4f6..62b2fdb 100644
--- a/sampledb/sampledatabase.py
+++ b/sampledb/sampledatabase.py
@@ -6,23 +6,67 @@
 
 
 class SampleDatabase(object):
+    """
+    """
 
     def __init__(self, hostname=None, db='sampleDB', collection='samples'):
+        """
+        Parameters
+        ----------
+
+        Returns
+        -------
+        """
         c = MongoClient(hostname)
         collection = c[db][collection]
         self.searcher = DatabaseSearcher(collection)
         self.publisher = DataPublisher(collection)
 
     def load_schema(self, schema_file):
+        """
+        Parameters
+        ----------
+
+        Returns
+        -------
+        """
         with open(schema_file) as sch:
             schema = json.load(sch)
         self.publisher.schema = schema
 
     def get_schema(self):
+        """Return the schema against which this DataPublisher validates.
+
+        Returns
+        -------
+        dict
+            The json schema against which this DataPublisher validates.
+        """
         return self.publisher.get_schema()
 
     def search(self, **kwargs):
+        """Search the database for entries with the specified key, value pairs. Returns a cursor with the results.
+        
+        Parameters
+        ----------
+        startdate : str
+            A date in 'YYYY-MM-DD' format. Search for entries on or after this date.
+        enddate : str
+            A date in 'YYYY-MM-DD' format. Search for entries on or before this date.
+
+        Returns
+        -------
+        SearchResult
+            The entries matching the input query.
+        """
         return self.searcher.search(**kwargs)
 
     def publish(self, filename):
+        """Publish a spreadsheet to the database.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the spreadsheet containing the data to be published.
+        """
         self.publisher.publish(filename)
diff --git a/sampledb/searchresult.py b/sampledb/searchresult.py
index c828d64..3fcb1e4 100644
--- a/sampledb/searchresult.py
+++ b/sampledb/searchresult.py
@@ -10,8 +10,13 @@ class SearchResult(object):
     """
 
     def __init__(self, results):
-        """
-        Create a SearchResult.
+        """Create a SearchResult.
+
+        Parameters
+        ----------
+        
+        Returns
+        -------
         """
         self.results = pd.DataFrame(results)
         if self.results.size == 0:
@@ -34,22 +39,40 @@ def __ne__(self, other):
         return not self.__eq__(other)
 
     def count(self):
-        """
-        Returns the number of samples that match the search.
+        """Get the number of samples that match the search.
+        
+        Parameters
+        ----------
+
+        Returns
+        -------
+        int
+            The number of samples that match the search.
         """
         return len(self.results)
 
     def filter(self, indices):
-        """
-        Filter the search results.
-        Returns new SearchResult with only the filtered results.
+        """Filter the search results.
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+        SearchResult
+            A new SearchResult object with only the filtered results.
         """
         df = self.results.filter(items=indices, axis=0)
         return SearchResult(df.reset_index(drop=True))
 
     def download(self, filename):
-        """
-        Download the search results as a spreadsheet.
+        """Download the search results as a spreadsheet.
+        
+        Parameters
+        ----------
+
+        Returns
+        -------
         """
         frames = []
         for name in self.results:

From 57bb93c76505505525cb7fabc010483ea2d12cd8 Mon Sep 17 00:00:00 2001
From: Justin Calamari <justin.calamari@gmail.com>
Date: Fri, 2 Feb 2018 13:42:40 -0500
Subject: [PATCH 2/2] DOC: add documentation for sampledatabase.py and
 searchresult.py

---
 sampledb/sampledatabase.py | 19 +++++++++++++++++--
 sampledb/searchresult.py   | 15 ++++++++-------
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/sampledb/sampledatabase.py b/sampledb/sampledatabase.py
index 62b2fdb..98836c3 100644
--- a/sampledb/sampledatabase.py
+++ b/sampledb/sampledatabase.py
@@ -7,15 +7,25 @@
 
 class SampleDatabase(object):
     """
+    Search and publish data to a MongoDB database.
     """
 
     def __init__(self, hostname=None, db='sampleDB', collection='samples'):
-        """
+        """Create a SampleDatabase.
+
         Parameters
         ----------
+        hostname : str, optional
+            The hostname of IP address of the server hosting the database. Defaults to None, which is equivalent to localhost.
+        db : str, optional
+            The name of the MongoDB database. Defaults to 'sampleDB'.
+        collection : str, optional
+            The name of the MongoDB collection. Defaults to 'samples'.
 
         Returns
         -------
+        SampleDatabase
+            A SampleDatabase object for the specified MongoDB collection.
         """
         c = MongoClient(hostname)
         collection = c[db][collection]
@@ -23,12 +33,17 @@ def __init__(self, hostname=None, db='sampleDB', collection='samples'):
         self.publisher = DataPublisher(collection)
 
     def load_schema(self, schema_file):
-        """
+        """Loads a json schema from a specified file as a dict.
+
         Parameters
         ----------
+        schema_file : str
+            The name of the json schema file.
 
         Returns
         -------
+        dict
+            The json schema as a dict.
         """
         with open(schema_file) as sch:
             schema = json.load(sch)
diff --git a/sampledb/searchresult.py b/sampledb/searchresult.py
index 3fcb1e4..0d9575f 100644
--- a/sampledb/searchresult.py
+++ b/sampledb/searchresult.py
@@ -6,7 +6,7 @@
 
 class SearchResult(object):
     """
-    An object containing the matching results of a search on the database.
+    An object containing the results of a search on the database.
     """
 
     def __init__(self, results):
@@ -14,9 +14,12 @@ def __init__(self, results):
 
         Parameters
         ----------
+        results : list of dict or pandas.DataFrame
         
         Returns
         -------
+        SearchResult
+            A SearchResult object for the input results.
         """
         self.results = pd.DataFrame(results)
         if self.results.size == 0:
@@ -41,9 +44,6 @@ def __ne__(self, other):
     def count(self):
         """Get the number of samples that match the search.
         
-        Parameters
-        ----------
-
         Returns
         -------
         int
@@ -56,6 +56,8 @@ def filter(self, indices):
 
         Parameters
         ----------
+        indices : list of int
+            A list of indices of the entries to keep.
 
         Returns
         -------
@@ -70,9 +72,8 @@ def download(self, filename):
         
         Parameters
         ----------
-
-        Returns
-        -------
+        filename : str
+            The name of the spreadsheet to write the results to.
         """
         frames = []
         for name in self.results: