|
| 1 | +#!/usr/bin/env python |
| 2 | +############################################################################## |
| 3 | +# |
| 4 | +# diffpy.utils by DANSE Diffraction group |
| 5 | +# Simon J. L. Billinge |
| 6 | +# (c) 2010 The Trustees of Columbia University |
| 7 | +# in the City of New York. All rights reserved. |
| 8 | +# |
| 9 | +# File coded by: |
| 10 | +# |
| 11 | +# See AUTHORS.txt for a list of people who contributed. |
| 12 | +# See LICENSE_DANSE.txt for license information. |
| 13 | +# |
| 14 | +############################################################################## |
| 15 | + |
| 16 | +import pathlib |
| 17 | +import json |
| 18 | + |
| 19 | +from .custom_exceptions import UnsupportedTypeError, ImproperSizeError |
| 20 | + |
| 21 | +# FIXME: add support for yaml, xml |
| 22 | +supported_formats = ['.json'] |
| 23 | + |
| 24 | + |
| 25 | +def serialize_data(filename, hdata: dict, data_table: list, show_path=True, dt_colnames=None, serial_file=None): |
| 26 | + """Serialize file data into a dictionary. Can also save dictionary into a serial language file. |
| 27 | + Dictionary is formatted as {filename: data}. |
| 28 | +
|
| 29 | + Requires hdata and data_table generated from loadData. |
| 30 | +
|
| 31 | + filename -- name of the file whose data is being serialized. |
| 32 | + hdata -- Dictionary of PDF metadata generated by loadData. |
| 33 | + data_table -- List storing parsed by loadData. |
| 34 | + dt_colnames -- List containing names of each column in data_table. Every name in |
| 35 | + data_table_cols will be put into the Dictionary as a key with a value |
| 36 | + of that column in data_table (stored as a List). Put None for |
| 37 | + columns without names. If dt_cols has less non-None entries |
| 38 | + than columns in data_table, the pair {'data table': data_table} will be put |
| 39 | + in the dictionary. (Default None: only entry {'data table': data_table} |
| 40 | + will be added to dictionary.) |
| 41 | + show_path -- include a path element in the database entry (default True). |
| 42 | + If 'path' is not included in hddata, extract path from filename. |
| 43 | + serial_file -- serial language file to dump dictionary into. |
| 44 | +
|
| 45 | + Returns the dictionary loaded from/into the updated database file. |
| 46 | + """ |
| 47 | + |
| 48 | + # compile data_table and hddata together |
| 49 | + data = {} |
| 50 | + |
| 51 | + # handle getting name of file for variety of filename types |
| 52 | + abs_path = pathlib.Path(filename).resolve() |
| 53 | + # add path to start of data if requested |
| 54 | + if show_path and 'path' not in hdata.keys(): |
| 55 | + data.update({'path': abs_path.as_posix()}) |
| 56 | + # title the entry with name of file (taken from end of path) |
| 57 | + title = abs_path.name |
| 58 | + |
| 59 | + # first add named columns in dt_cols |
| 60 | + named_columns = 0 # initial value |
| 61 | + max_columns = 1 # higher than named_columns to trigger 'data table' entry |
| 62 | + if dt_colnames is not None: |
| 63 | + num_columns = [len(row) for row in data_table] |
| 64 | + max_columns = max(num_columns) |
| 65 | + num_col_names = len(dt_colnames) |
| 66 | + if max_columns < num_col_names: # assume numpy.loadtxt gives non-irregular array |
| 67 | + raise ImproperSizeError("More entries in dt_colnames than columns in data_table.") |
| 68 | + named_columns = 0 |
| 69 | + for idx in range(num_col_names): |
| 70 | + colname = dt_colnames[idx] |
| 71 | + if colname is not None: |
| 72 | + data.update({colname: list(data_table[:, idx])}) |
| 73 | + named_columns += 1 |
| 74 | + |
| 75 | + # second add data in hddata dict |
| 76 | + data.update(hdata) |
| 77 | + |
| 78 | + # finally add data_table as an entry named 'data table' if not all columns were parsed |
| 79 | + if named_columns < max_columns: |
| 80 | + if 'data table' not in data.keys(): |
| 81 | + data.update({'data table': data_table}) |
| 82 | + else: # if 'data table' is already a key, keep adding primes to the end |
| 83 | + dt_name = 'data table' |
| 84 | + while dt_name in data.keys(): |
| 85 | + dt_name += " prime" |
| 86 | + data.update({dt_name: data_table}) |
| 87 | + |
| 88 | + # parse name using pathlib and generate dictionary entry |
| 89 | + entry = {title: data} |
| 90 | + |
| 91 | + # no save |
| 92 | + if serial_file is None: |
| 93 | + return entry |
| 94 | + |
| 95 | + # saving/updating file |
| 96 | + # check if supported type |
| 97 | + sf = pathlib.Path(serial_file) |
| 98 | + sf_name = sf.name |
| 99 | + extension = sf.suffix |
| 100 | + if extension not in supported_formats: |
| 101 | + raise UnsupportedTypeError(sf_name, supported_formats) |
| 102 | + |
| 103 | + # new file or update |
| 104 | + existing = False |
| 105 | + try: |
| 106 | + open(serial_file) |
| 107 | + existing = True |
| 108 | + except FileNotFoundError: |
| 109 | + pass |
| 110 | + |
| 111 | + # json |
| 112 | + if extension == '.json': |
| 113 | + # dump if non-existing |
| 114 | + if not existing: |
| 115 | + with open(serial_file, 'w') as jsonfile: |
| 116 | + file_data = entry # for return |
| 117 | + json.dump(file_data, jsonfile, indent=2) |
| 118 | + |
| 119 | + # update if existing |
| 120 | + else: |
| 121 | + with open(serial_file, 'r') as json_read: |
| 122 | + file_data = json.load(json_read) |
| 123 | + file_data.update(entry) |
| 124 | + with open(serial_file, 'w') as json_write: |
| 125 | + # dump to string first for formatting |
| 126 | + json.dump(file_data, json_write, indent=2) |
| 127 | + |
| 128 | + return file_data |
| 129 | + |
| 130 | + |
| 131 | +def deserialize_data(filename): |
| 132 | + """Load a dictionary from a serial file. |
| 133 | +
|
| 134 | + filename -- database file to load from. |
| 135 | +
|
| 136 | + Returns a dictionary of database information. |
| 137 | + """ |
| 138 | + |
| 139 | + # check if supported type |
| 140 | + f = pathlib.Path(filename) |
| 141 | + f_name = f.name |
| 142 | + extension = f.suffix |
| 143 | + if extension not in supported_formats: |
| 144 | + raise UnsupportedTypeError(f_name, supported_formats) |
| 145 | + |
| 146 | + # json |
| 147 | + if extension == '.json': |
| 148 | + with open(filename, 'r') as json_file: |
| 149 | + j_dict = json.load(json_file) |
| 150 | + |
| 151 | + return j_dict |
0 commit comments