Merge branch 'loaddata_headers' into loadmetadata

Andrew Yang · Andrew Yang · commit 6ab58e4b6dbd · 2023-08-06T00:36:23.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,20 +1,32 @@
 # Release notes
 
-## Version 3.1.0 – 2022-12-09
+## Version 3.2.0 – 2023-8-**
 
 ### Added
 
-- Compatibility with Python 3.10, 3.9, 3.8
+- CI Coverage.
+- New tests for loadData function.
 
 ### Changed
 
-### Deprecated
+- loadData function now toggleable. Can return either (a) data read from data blocks or (b) header
+information stored above the data block.
+- Exclude wx from tests.
 
 ### Removed
 
-- Remove the support for Python 3.5, 3.6.
+- Remove use of pkg_resources (deprecated).
+- No longer use Travis.
 
-### Fixed
+## Version 3.1.0 – 2022-12-09
+
+### Added
+
+- Compatibility with Python 3.10, 3.9, 3.8
+
+### Removed
+
+- Remove the support for Python 3.5, 3.6.
 
 ## Version 3.0.0 -- 2019-03-12
 
diff --git a/src/diffpy/utils/parsers/loaddata.py b/src/diffpy/utils/parsers/loaddata.py
@@ -19,27 +19,40 @@
 def loadData(filename, minrows=10, headers=False, hdel='=', hignore=None, **kwargs):
     """Find and load data from a text file.
 
-    The data reading starts at the first matrix block of at least minrows rows
-    and constant number of columns.  This seems to work for most of the
-    datafiles including those generated by PDFGetX2.
+    The data block is identified as the first matrix block of at least minrows rows
+    and constant number of columns. This seems to work for most of the datafiles including
+    those generated by PDFGetX2.
 
-    filename -- name of the file we want to load data from.
-    minrows  -- minimum number of rows in the first data block.
-                All rows must have the same number of floating point values.
-    headers  -- return also a dictionary of parameters specified in header
-    hdel     -- delimiter for parsing header information
-    hignore  -- ignore header rows beginning with any elements in the hignore list
-    usecols  -- zero-based index of columns to be loaded, by default use
-                all detected columns.  The reading skips data blocks that
-                do not have the usecols-specified columns.
-    unpack   -- return data as a sequence of columns that allows tuple
-                unpacking such as  x, y = loadData(FILENAME, unpack=True).
-                Note transposing the loaded array as loadData(FILENAME).T
-                has the same effect.
-    kwargs   -- keyword arguments that are passed to numpy.loadtxt
+    filename    -- name of the file we want to load data from.
+    minrows     -- minimum number of rows in the first data block.
+                   All rows must have the same number of floating point values.
+    headers     -- when False (defualt), the function returns a numpy array of the
+                   data in the data block. When True, the function instead returns a
+                   dictionary of parameters and their corresponding values parsed from
+                   header (information prior the data block). See hdel and hignore for
+                   options to help with parsing header information.
+    hdel        -- (only used when headers enabled) delimiter for parsing header
+                   information (default '='). e.g. using default hdel, the line
+                   'parameter = p_value' is put into the dictionary as
+                   {parameter: p_value}.
+    hignore     -- (only used when headers enabled) ignore header rows beginning
+                   with any elements in the hignore list. e.g. hignore=['# ', '[']
+                   means the following lines are skipped: '# qmax=10', '[defaults]'.
+    kwargs      -- keyword arguments that are passed to numpy.loadtxt including
+                   the following arguments below. (See also numpy.loadtxt for more
+                   details.)
+    delimiter   -- delimiter for the data in the block (default use whitespace).
+                   For comma-separated data blocks, set delimiter to ','.
+    usecols     -- zero-based index of columns to be loaded, by default use
+                   all detected columns. The reading skips data blocks that
+                   do not have the usecols-specified columns.
+    unpack      -- return data as a sequence of columns that allows tuple
+                   unpacking such as  x, y = loadData(FILENAME, unpack=True).
+                   Note transposing the loaded array as loadData(FILENAME).T
+                   has the same effect.
 
-    Return a numpy array of the data.
-    See also numpy.loadtxt for more details.
+    Return a numpy array of the data. If headers enabled, instead returns a
+    dictionary of parameters read from the header.
     """
     from numpy import array, loadtxt
     # for storing header data
@@ -124,6 +137,11 @@ def countcolumnsvalues(line):
             # block was found here!
             if nrows >= minrows:
                 break
+
+        # Return header data if requested
+        if headers:
+            return hdata  # Return, so do not proceed to reading datablock
+
         # Return an empty array when no data found.
         # loadtxt would otherwise raise an exception on loading from EOF.
         if start is None:
@@ -134,11 +152,6 @@ def countcolumnsvalues(line):
             # in case of trailing delimiters.
             kwargs.setdefault('usecols', list(range(ncvblock[0])))
             rv = loadtxt(fid, **kwargs)
-
-    # return headers if requested
-    if headers:
-        return hdata, rv
-    # otherwise do not
     return rv
 
 
diff --git a/src/diffpy/utils/tests/test_loaddata.py b/src/diffpy/utils/tests/test_loaddata.py
@@ -51,7 +51,7 @@ def test_loadData_headers(self):
         """
         hignore = ["# ", "// ", "["]  # ignore lines beginning with these strings
         delimiter = ": "  # what our data should be separated by
-        hdata, rv = loadData(loaddatawithheaders, headers=True, hdel=delimiter, hignore=hignore)
+        hdata = loadData(loaddatawithheaders, headers=True, hdel=delimiter, hignore=hignore)
         # only fourteen lines of data are formatted properly
         assert len(hdata) == 14
         # check the following are floats