Skip to content

Commit 5681d9d

Browse files
author
Andrew Yang
committed
Add header metadata parsing to loaddata
1 parent 185c129 commit 5681d9d

File tree

4 files changed

+10099
-5
lines changed

4 files changed

+10099
-5
lines changed

src/diffpy/utils/parsers/loaddata.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import numpy
1717

1818

19-
def loadData(filename, minrows=10, **kwargs):
19+
def loadData(filename, minrows=10, headers=False, hdel='=', hignore=None, **kwargs):
2020
"""Find and load data from a text file.
2121
2222
The data reading starts at the first matrix block of at least minrows rows
@@ -26,6 +26,9 @@ def loadData(filename, minrows=10, **kwargs):
2626
filename -- name of the file we want to load data from.
2727
minrows -- minimum number of rows in the first data block.
2828
All rows must have the same number of floating point values.
29+
headers -- return also a dictionary of parameters specified in header
30+
hdel -- delimiter for parsing header information
31+
hignore -- ignore header rows beginning with any elements in the hignore list
2932
usecols -- zero-based index of columns to be loaded, by default use
3033
all detected columns. The reading skips data blocks that
3134
do not have the usecols-specified columns.
@@ -39,6 +42,8 @@ def loadData(filename, minrows=10, **kwargs):
3942
See also numpy.loadtxt for more details.
4043
"""
4144
from numpy import array, loadtxt
45+
# for storing header data
46+
hdata = {}
4247
# determine the arguments
4348
delimiter = kwargs.get('delimiter')
4449
usecols = kwargs.get('usecols')
@@ -72,8 +77,40 @@ def countcolumnsvalues(line):
7277
fpos = (0, 0)
7378
nrows = 0
7479
for line in fid:
80+
# decode line
81+
dline = line.decode()
82+
# find header information if requested
83+
if headers:
84+
hpair = dline.split(hdel)
85+
flag = True
86+
# ensure number of non-blank arguments is two
87+
if len(hpair) != 2:
88+
flag = False
89+
else:
90+
# ignore if an argument is blank
91+
hpair[0] = hpair[0].strip() # name of data entry
92+
hpair[1] = hpair[1].strip() # value of entry
93+
if not hpair[0] or not hpair[1]:
94+
flag = False
95+
else:
96+
# check if row has an ignore tag
97+
if hignore is not None:
98+
for tag in hignore:
99+
taglen = len(tag)
100+
if len(hpair[0]) >= taglen and hpair[0][:taglen] == tag:
101+
flag = False
102+
# add header data
103+
if flag:
104+
name = hpair[0]
105+
value = hpair[1]
106+
# can be stored as float if only one decimal
107+
if hpair[1].replace(".", "", 1).isnumeric():
108+
value = float(hpair[1])
109+
# check if data value should be stored as float
110+
hdata.update({name: value})
111+
# continue search for the start of datablock
75112
fpos = (fpos[1], fpos[1] + len(line))
76-
line = line.decode()
113+
line = dline
77114
ncv = countcolumnsvalues(line)
78115
if ncv < mincv:
79116
start = None
@@ -98,6 +135,11 @@ def countcolumnsvalues(line):
98135
# in case of trailing delimiters.
99136
kwargs.setdefault('usecols', list(range(ncvblock[0])))
100137
rv = loadtxt(fid, **kwargs)
138+
139+
# return headers if requested
140+
if headers:
141+
return hdata, rv
142+
# otherwise do not
101143
return rv
102144

103145

@@ -247,4 +289,4 @@ def isfloat(s):
247289
pass
248290
return False
249291

250-
# End of file
292+
# End of file

src/diffpy/utils/tests/test_loaddata.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
import unittest
77
import numpy
88
from diffpy.utils.parsers import loadData
9-
from diffpy.utils.tests.t_helpers import datafile
9+
from diffpy.utils.tests.testhelpers import datafile
1010

1111
loaddata01 = datafile('loaddata01.txt')
12+
loaddatawithheaders = datafile('loaddatawithheaders.txt')
1213

1314
##############################################################################
1415
class TestLoadData(unittest.TestCase):
@@ -44,9 +45,28 @@ def test_loadData_1column(self):
4445
self.assertFalse(numpy.array_equal(d1c, d))
4546
return
4647

48+
49+
def test_loadData_headers(self):
50+
"""check loadData() with headers options enabled
51+
"""
52+
hignore = ["# ", "// ", "["] # ignore lines beginning with these strings
53+
delimiter = ": " # what our data should be separated by
54+
hdata, rv = loadData(loaddatawithheaders, headers=True, hdel=delimiter, hignore=hignore)
55+
# only fourteen lines of data are formatted properly
56+
assert len(hdata) == 14
57+
# check the following are floats
58+
vfloats = ["wavelength", "qmaxinst", "qmin", "qmax", "bgscale"]
59+
for name in vfloats:
60+
assert isinstance(hdata.get(name), float)
61+
# check the following are NOT floats
62+
vnfloats = ["composition", "rmax", "rmin", "rstep", "rpoly"]
63+
for name in vnfloats:
64+
assert not isinstance(hdata.get(name), float)
65+
66+
4767
# End of class TestRoutines
4868

4969
if __name__ == '__main__':
5070
unittest.main()
5171

52-
# End of file
72+
# End of file

0 commit comments

Comments
 (0)