Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 50 additions & 8 deletions sampledb/databasesearcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,58 @@

class DatabaseSearcher(object):
"""
Seach a database.
Seach a MongoDB database.
"""

def __init__(self, collection):
"""
Create a DatabaseSearcher.
"""Create a DatabaseSearcher.

Parameters
----------
collection : pymongo.collection.Collection
The MongoDB collection to search.

Returns
-------
DatabaseSearcher
A DatabaseSearcher object that searches the input MongoDB collection.
"""
self.collection = collection

@classmethod
def parse_date(cls, date):
"""
Convert a string in 'YYYY-MM-DD' format to a datetime object.
"""Convert a string in 'YYYY-MM-DD' format to a datetime object.

Parameters
----------
date : str
A date in 'YYYY-MM-DD' format.

Returns
-------
datetime
The input date as a datetime object.
"""
date = date.split('-')
date = [int(i) for i in date]
return datetime(date[0], date[1], date[2])

@classmethod
def date_range(cls, startdate=None, enddate=None):
"""Return a MongoDB query for entries between two dates.

Parameters
----------
startdate : str, optional
Search for entries on or after this date, given in 'YYYY-MM-DD' format. Default is None.
enddate : str, optional
Search for entries on or before this date, given in 'YYYY-MM-DD' format. Default is None.

Returns
-------
dict
A MongoDB style query for entries between the two given dates.
"""
range_ = {}
if startdate:
start = cls.parse_date(startdate)
Expand All @@ -39,9 +71,19 @@ def date_range(cls, startdate=None, enddate=None):
return {}

def search(self, **kwargs):
"""
Search the database for entries with the specified key, value pairs.
Returns a cursor with the results.
"""Search the database for entries with the specified key, value pairs. Returns a cursor with the results.

Parameters
----------
startdate : str
A date in 'YYYY-MM-DD' format. Search for entries on or after this date.
enddate : str
A date in 'YYYY-MM-DD' format. Search for entries on or before this date.

Returns
-------
SearchResult
The entries matching the input query.
"""
query = kwargs
if 'uid' in kwargs and isinstance(kwargs['uid'], list):
Expand Down
70 changes: 60 additions & 10 deletions sampledb/datapublisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,42 @@

class DataPublisher(object):
"""
Publish data to a database.
Publish data from a spreadsheet to a MongoDB database.
"""

def __init__(self, collection, schema={}):
"""
Create a DataPublisher.
"""Create a DataPublisher.

Parameters
----------
collection : pymongo.collection.Collection
The MongoDB collection to which data is published.
schema : dict, optional
A json schema against which data is validated. Defaults to an empty dict, which accepts all data.

Returns
-------
DataPublisher
A DataPublisher object that publishes to the input MongoDB collection.
"""
self.collection = collection
self.schema = schema

@classmethod
def get_SAF(cls, filename):
"""Get the SAF number of the samples in the spreadsheet if it is in the filename.

Parameters
----------
filename : str
The name of the spreadsheet containing the data to be published.

Returns
-------
str
The SAF number of the data in the spreadsheet, or None if it cannot be found.
"""

splt = filename.split('_')
if len(splt) != 2:
return None
Expand All @@ -28,9 +52,16 @@ def get_SAF(cls, filename):

@classmethod
def parse_sheet(cls, sheet):
"""
Converts each row in a sheet to a dictionary.
Returns a list of the dictionaries.
"""Converts each row in a single sheet of a workbook to a dictionary.

Parameters
----------
sheet : pandas.core.frame.DataFrame

Returns
-------
list of dict
A list of dictionaries of data for each sample in the sheet.
"""
keys = {}
for key in sheet.columns:
Expand All @@ -53,9 +84,17 @@ def parse_sheet(cls, sheet):

@classmethod
def parse_wb(cls, wb):
"""
Converts each row in all sheets of a workbook to a dictionary.
"""Converts each row in all sheets of a workbook to a dictionary.
Returns a list of the dictionaries.

Parameters
----------
wb : pandas.io.excel.ExcelFile

Returns
-------
list of dict
A list of dictionaries of data for each sample in a workbook.
"""
samples = []

Expand All @@ -65,11 +104,22 @@ def parse_wb(cls, wb):
return samples

def get_schema(self):
"""Return the schema against which this DataPublisher validates.

Returns
-------
dict
The json schema against which this DataPublisher validates.
"""
return self.schema

def publish(self, filename):
"""
Publish a spreadsheet to the database.
"""Publish a spreadsheet to the database.

Parameters
----------
filename : str
The name of the spreadsheet containing the data to be published.
"""
saf = self.get_SAF(filename)
wb = pd.ExcelFile(filename)
Expand Down
59 changes: 59 additions & 0 deletions sampledb/sampledatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,82 @@


class SampleDatabase(object):
"""
Search and publish data to a MongoDB database.
"""

def __init__(self, hostname=None, db='sampleDB', collection='samples'):
"""Create a SampleDatabase.

Parameters
----------
hostname : str, optional
The hostname of IP address of the server hosting the database. Defaults to None, which is equivalent to localhost.
db : str, optional
The name of the MongoDB database. Defaults to 'sampleDB'.
collection : str, optional
The name of the MongoDB collection. Defaults to 'samples'.

Returns
-------
SampleDatabase
A SampleDatabase object for the specified MongoDB collection.
"""
c = MongoClient(hostname)
collection = c[db][collection]
self.searcher = DatabaseSearcher(collection)
self.publisher = DataPublisher(collection)

def load_schema(self, schema_file):
"""Loads a json schema from a specified file as a dict.

Parameters
----------
schema_file : str
The name of the json schema file.

Returns
-------
dict
The json schema as a dict.
"""
with open(schema_file) as sch:
schema = json.load(sch)
self.publisher.schema = schema

def get_schema(self):
"""Return the schema against which this DataPublisher validates.

Returns
-------
dict
The json schema against which this DataPublisher validates.
"""
return self.publisher.get_schema()

def search(self, **kwargs):
"""Search the database for entries with the specified key, value pairs. Returns a cursor with the results.

Parameters
----------
startdate : str
A date in 'YYYY-MM-DD' format. Search for entries on or after this date.
enddate : str
A date in 'YYYY-MM-DD' format. Search for entries on or before this date.

Returns
-------
SearchResult
The entries matching the input query.
"""
return self.searcher.search(**kwargs)

def publish(self, filename):
"""Publish a spreadsheet to the database.

Parameters
----------
filename : str
The name of the spreadsheet containing the data to be published.
"""
self.publisher.publish(filename)
44 changes: 34 additions & 10 deletions sampledb/searchresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@

class SearchResult(object):
"""
An object containing the matching results of a search on the database.
An object containing the results of a search on the database.
"""

def __init__(self, results):
"""
Create a SearchResult.
"""Create a SearchResult.

Parameters
----------
results : list of dict or pandas.DataFrame

Returns
-------
SearchResult
A SearchResult object for the input results.
"""
self.results = pd.DataFrame(results)
if self.results.size == 0:
Expand All @@ -34,22 +42,38 @@ def __ne__(self, other):
return not self.__eq__(other)

def count(self):
"""
Returns the number of samples that match the search.
"""Get the number of samples that match the search.

Returns
-------
int
The number of samples that match the search.
"""
return len(self.results)

def filter(self, indices):
"""
Filter the search results.
Returns new SearchResult with only the filtered results.
"""Filter the search results.

Parameters
----------
indices : list of int
A list of indices of the entries to keep.

Returns
-------
SearchResult
A new SearchResult object with only the filtered results.
"""
df = self.results.filter(items=indices, axis=0)
return SearchResult(df.reset_index(drop=True))

def download(self, filename):
"""
Download the search results as a spreadsheet.
"""Download the search results as a spreadsheet.

Parameters
----------
filename : str
The name of the spreadsheet to write the results to.
"""
frames = []
for name in self.results:
Expand Down