diffpy
diff --git a/‎CHANGELOG.md‎
Lines changed: 14 additions & 6 deletions b/‎CHANGELOG.md‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎conda-recipe/run_test.py‎
Lines changed: 4 additions & 0 deletions b/‎conda-recipe/run_test.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/diffpy/utils/parsers/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/diffpy/utils/parsers/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/diffpy/utils/parsers/custom_exceptions.py‎
Lines changed: 45 additions & 0 deletions b/‎src/diffpy/utils/parsers/custom_exceptions.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎src/diffpy/utils/parsers/loaddata.py‎
Lines changed: 40 additions & 27 deletions b/‎src/diffpy/utils/parsers/loaddata.py‎
Lines changed: 40 additions & 27 deletions
diff --git a/‎src/diffpy/utils/parsers/serialization.py‎
Lines changed: 151 additions & 0 deletions b/‎src/diffpy/utils/parsers/serialization.py‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎src/diffpy/utils/tests/test_loaddata.py‎
Lines changed: 1 addition & 1 deletion b/‎src/diffpy/utils/tests/test_loaddata.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,21 +1,29 @@
 # Release notes
 
-## Version 3.1.0 – 2022-12-09
+## Version 3.2.0 – 2023-08-**
 
 ### Added
 
-- Compatibility with Python 3.10, 3.9, 3.8
+- CI Coverage.
+- New tests for loadData function.
+- loadData function now toggleable. Can return either (a) data read from data blocks or (b) header
+information stored above the data block.
 
-### Changed
+### Removed
 
-### Deprecated
+- Remove use of pkg_resources (deprecated).
+- No longer use Travis.
+
+## Version 3.1.0 – 2022-12-09
+
+### Added
+
+- Compatibility with Python 3.10, 3.9, 3.8
 
 ### Removed
 
 - Remove the support for Python 3.5, 3.6.
 
-### Fixed
-
 ## Version 3.0.0 -- 2019-03-12
 
 Differences from version 1.2.2.
 
@@ -1,4 +1,8 @@
 #!/usr/bin/env python
 
+import sys
+import pathlib
+sys.path.append((pathlib.Path.cwd().parent.absolute() / "src").as_posix())
+
 import diffpy.utils.tests
 assert diffpy.utils.tests.test().wasSuccessful()
@@ -17,9 +17,11 @@
 """
 
 from .loaddata import loadData
+from .serialization import serialize_data, deserialize_data
 from .resample import resample
 
 # silence the pyflakes syntax checker
 assert loadData or resample or True
+assert serialize_data or deserialize_data or True
 
 # End of file
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+##############################################################################
+#
+# diffpy.utils      by DANSE Diffraction group
+#                   Simon J. L. Billinge
+#                   (c) 2010 The Trustees of Columbia University
+#                   in the City of New York.  All rights reserved.
+#
+# File coded by:
+#
+# See AUTHORS.txt for a list of people who contributed.
+# See LICENSE_DANSE.txt for license information.
+#
+##############################################################################
+
+class UnsupportedTypeError(Exception):
+    """For file types not supported by our parsers.
+
+    supported_types     -- List of supported types.
+    file                -- file triggering the error.
+    message             -- for writing a custom message.
+    """
+
+    def __init__(self, file, supported_types=None, message=None):
+        if message is None:
+            self.message = f"The file {file} is not supported."
+            if supported_types is not None:
+                self.message += " Supported file types include: "
+            for t in supported_types:
+                self.message += t + ", "
+            self.message = self.message[:-2] + "."
+        super().__init__(self.message)
+
+
+class ImproperSizeError(Exception):
+    """When the size of an object does not match expectations.
+
+    bad_object          -- Object with improper size.
+    message             -- for writing a custom message.
+    """
+
+    def __init__(self, bad_object, message=None):
+        if message is None:
+            self.message = f"The size of {bad_object} is different than expected."
+        super().__init__(self.message)
@@ -19,27 +19,40 @@
 def loadData(filename, minrows=10, headers=False, hdel='=', hignore=None, **kwargs):
     """Find and load data from a text file.
 
-    The data reading starts at the first matrix block of at least minrows rows
-    and constant number of columns.  This seems to work for most of the
-    datafiles including those generated by PDFGetX2.
+    The data block is identified as the first matrix block of at least minrows rows
+    and constant number of columns. This seems to work for most of the datafiles including
+    those generated by diffpy programs.
 
-    filename -- name of the file we want to load data from.
-    minrows  -- minimum number of rows in the first data block.
-                All rows must have the same number of floating point values.
-    headers  -- return also a dictionary of parameters specified in header
-    hdel     -- delimiter for parsing header information
-    hignore  -- ignore header rows beginning with any elements in the hignore list
-    usecols  -- zero-based index of columns to be loaded, by default use
-                all detected columns.  The reading skips data blocks that
-                do not have the usecols-specified columns.
-    unpack   -- return data as a sequence of columns that allows tuple
-                unpacking such as  x, y = loadData(FILENAME, unpack=True).
-                Note transposing the loaded array as loadData(FILENAME).T
-                has the same effect.
-    kwargs   -- keyword arguments that are passed to numpy.loadtxt
+    filename    -- name of the file we want to load data from.
+    minrows     -- minimum number of rows in the first data block.
+                   All rows must have the same number of floating point values.
+    headers     -- when False (defualt), the function returns a numpy array of the
+                   data in the data block. When True, the function instead returns a
+                   dictionary of parameters and their corresponding values parsed from
+                   header (information prior the data block). See hdel and hignore for
+                   options to help with parsing header information.
+    hdel        -- (only used when headers enabled) delimiter for parsing header
+                   information (default '='). e.g. using default hdel, the line
+                   'parameter = p_value' is put into the dictionary as
+                   {parameter: p_value}.
+    hignore     -- (only used when headers enabled) ignore header rows beginning
+                   with any elements in the hignore list. e.g. hignore=['# ', '[']
+                   means the following lines are skipped: '# qmax=10', '[defaults]'.
+    kwargs      -- keyword arguments that are passed to numpy.loadtxt including
+                   the following arguments below. (See also numpy.loadtxt for more
+                   details.)
+    delimiter   -- delimiter for the data in the block (default use whitespace).
+                   For comma-separated data blocks, set delimiter to ','.
+    usecols     -- zero-based index of columns to be loaded, by default use
+                   all detected columns. The reading skips data blocks that
+                   do not have the usecols-specified columns.
+    unpack      -- return data as a sequence of columns that allows tuple
+                   unpacking such as  x, y = loadData(FILENAME, unpack=True).
+                   Note transposing the loaded array as loadData(FILENAME).T
+                   has the same effect.
 
-    Return a numpy array of the data.
-    See also numpy.loadtxt for more details.
+    Return a numpy array of the data (data_block). If headers enabled, instead returns a
+    dictionary of parameters read from the header (hddata).
     """
     from numpy import array, loadtxt
     # for storing header data
@@ -124,22 +137,22 @@ def countcolumnsvalues(line):
             # block was found here!
             if nrows >= minrows:
                 break
+
+        # Return header data if requested
+        if headers:
+            return hdata  # Return, so do not proceed to reading datablock
+
         # Return an empty array when no data found.
         # loadtxt would otherwise raise an exception on loading from EOF.
         if start is None:
-            rv = array([], dtype=float)
+            data_block = array([], dtype=float)
         else:
             fid.seek(start)
             # always use usecols argument so that loadtxt does not crash
             # in case of trailing delimiters.
             kwargs.setdefault('usecols', list(range(ncvblock[0])))
-            rv = loadtxt(fid, **kwargs)
-
-    # return headers if requested
-    if headers:
-        return hdata, rv
-    # otherwise do not
-    return rv
+            data_block = loadtxt(fid, **kwargs)
+    return data_block
 
 
 class TextDataLoader(object):
 
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+##############################################################################
+#
+# diffpy.utils      by DANSE Diffraction group
+#                   Simon J. L. Billinge
+#                   (c) 2010 The Trustees of Columbia University
+#                   in the City of New York.  All rights reserved.
+#
+# File coded by:
+#
+# See AUTHORS.txt for a list of people who contributed.
+# See LICENSE_DANSE.txt for license information.
+#
+##############################################################################
+
+import pathlib
+import json
+
+from .custom_exceptions import UnsupportedTypeError, ImproperSizeError
+
+# FIXME: add support for yaml, xml
+supported_formats = ['.json']
+
+
+def serialize_data(filename, hdata: dict, data_table: list, show_path=True, dt_colnames=None, serial_file=None):
+    """Serialize file data into a dictionary. Can also save dictionary into a serial language file.
+    Dictionary is formatted as {filename: data}.
+
+    Requires hdata and data_table generated from loadData.
+
+    filename        -- name of the file whose data is being serialized.
+    hdata          -- Dictionary of PDF metadata generated by loadData.
+    data_table      -- List storing  parsed by loadData.
+    dt_colnames     -- List containing names of each column in data_table. Every name in
+                       data_table_cols will be put into the Dictionary as a key with a value
+                       of that column in data_table (stored as a List). Put None for
+                       columns without names. If dt_cols has less non-None entries
+                       than columns in data_table, the pair {'data table': data_table} will be put
+                       in the dictionary. (Default None: only entry {'data table': data_table}
+                       will be added to dictionary.)
+    show_path       -- include a path element in the database entry (default True).
+                       If 'path' is not included in hddata, extract path from filename.
+    serial_file     -- serial language file to dump dictionary into.
+
+    Returns the dictionary loaded from/into the updated database file.
+    """
+
+    # compile data_table and hddata together
+    data = {}
+
+    # handle getting name of file for variety of filename types
+    abs_path = pathlib.Path(filename).resolve()
+    # add path to start of data if requested
+    if show_path and 'path' not in hdata.keys():
+        data.update({'path': abs_path.as_posix()})
+    # title the entry with name of file (taken from end of path)
+    title = abs_path.name
+
+    # first add named columns in dt_cols
+    named_columns = 0  # initial value
+    max_columns = 1  # higher than named_columns to trigger 'data table' entry
+    if dt_colnames is not None:
+        num_columns = [len(row) for row in data_table]
+        max_columns = max(num_columns)
+        num_col_names = len(dt_colnames)
+        if max_columns < num_col_names:  # assume numpy.loadtxt gives non-irregular array
+            raise ImproperSizeError("More entries in dt_colnames than columns in data_table.")
+        named_columns = 0
+        for idx in range(num_col_names):
+            colname = dt_colnames[idx]
+            if colname is not None:
+                data.update({colname: list(data_table[:, idx])})
+                named_columns += 1
+
+    # second add data in hddata dict
+    data.update(hdata)
+
+    # finally add data_table as an entry named 'data table' if not all columns were parsed
+    if named_columns < max_columns:
+        if 'data table' not in data.keys():
+            data.update({'data table': data_table})
+        else:  # if 'data table' is already a key, keep adding primes to the end
+            dt_name = 'data table'
+            while dt_name in data.keys():
+                dt_name += " prime"
+            data.update({dt_name: data_table})
+
+    # parse name using pathlib and generate dictionary entry
+    entry = {title: data}
+
+    # no save
+    if serial_file is None:
+        return entry
+
+    # saving/updating file
+    # check if supported type
+    sf = pathlib.Path(serial_file)
+    sf_name = sf.name
+    extension = sf.suffix
+    if extension not in supported_formats:
+        raise UnsupportedTypeError(sf_name, supported_formats)
+
+    # new file or update
+    existing = False
+    try:
+        open(serial_file)
+        existing = True
+    except FileNotFoundError:
+        pass
+
+    # json
+    if extension == '.json':
+        # dump if non-existing
+        if not existing:
+            with open(serial_file, 'w') as jsonfile:
+                file_data = entry  # for return
+                json.dump(file_data, jsonfile, indent=2)
+
+        # update if existing
+        else:
+            with open(serial_file, 'r') as json_read:
+                file_data = json.load(json_read)
+                file_data.update(entry)
+            with open(serial_file, 'w') as json_write:
+                # dump to string first for formatting
+                json.dump(file_data, json_write, indent=2)
+
+    return file_data
+
+
+def deserialize_data(filename):
+    """Load a dictionary from a serial file.
+
+    filename    -- database file to load from.
+
+    Returns a dictionary of database information.
+    """
+
+    # check if supported type
+    f = pathlib.Path(filename)
+    f_name = f.name
+    extension = f.suffix
+    if extension not in supported_formats:
+        raise UnsupportedTypeError(f_name, supported_formats)
+
+    # json
+    if extension == '.json':
+        with open(filename, 'r') as json_file:
+            j_dict = json.load(json_file)
+
+    return j_dict
@@ -51,7 +51,7 @@ def test_loadData_headers(self):
         """
         hignore = ["# ", "// ", "["]  # ignore lines beginning with these strings
         delimiter = ": "  # what our data should be separated by
-        hdata, rv = loadData(loaddatawithheaders, headers=True, hdel=delimiter, hignore=hignore)
+        hdata = loadData(loaddatawithheaders, headers=True, hdel=delimiter, hignore=hignore)
         # only fourteen lines of data are formatted properly
         assert len(hdata) == 14
         # check the following are floats