Skip to content

Commit c963063

Browse files
author
Andrew Yang
committed
Merge branch 'loaddata_headers' of https://github.com/Sparks29032/diffpy.utils
2 parents 8bafe53 + 6c092b9 commit c963063

File tree

3 files changed

+10097
-4
lines changed

3 files changed

+10097
-4
lines changed

src/diffpy/utils/parsers/loaddata.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import numpy
1717

1818

19-
def loadData(filename, minrows=10, **kwargs):
19+
def loadData(filename, minrows=10, headers=False, hdel='=', hignore=None, **kwargs):
2020
"""Find and load data from a text file.
2121
2222
The data reading starts at the first matrix block of at least minrows rows
@@ -26,6 +26,9 @@ def loadData(filename, minrows=10, **kwargs):
2626
filename -- name of the file we want to load data from.
2727
minrows -- minimum number of rows in the first data block.
2828
All rows must have the same number of floating point values.
29+
headers -- return also a dictionary of parameters specified in header
30+
hdel -- delimiter for parsing header information
31+
hignore -- ignore header rows beginning with any elements in the hignore list
2932
usecols -- zero-based index of columns to be loaded, by default use
3033
all detected columns. The reading skips data blocks that
3134
do not have the usecols-specified columns.
@@ -39,6 +42,8 @@ def loadData(filename, minrows=10, **kwargs):
3942
See also numpy.loadtxt for more details.
4043
"""
4144
from numpy import array, loadtxt
45+
# for storing header data
46+
hdata = {}
4247
# determine the arguments
4348
delimiter = kwargs.get('delimiter')
4449
usecols = kwargs.get('usecols')
@@ -72,8 +77,39 @@ def countcolumnsvalues(line):
7277
fpos = (0, 0)
7378
nrows = 0
7479
for line in fid:
80+
# decode line
81+
dline = line.decode()
82+
# find header information if requested
83+
if headers:
84+
hpair = dline.split(hdel)
85+
flag = True
86+
# ensure number of non-blank arguments is two
87+
if len(hpair) != 2:
88+
flag = False
89+
else:
90+
# ignore if an argument is blank
91+
hpair[0] = hpair[0].strip() # name of data entry
92+
hpair[1] = hpair[1].strip() # value of entry
93+
if not hpair[0] or not hpair[1]:
94+
flag = False
95+
else:
96+
# check if row has an ignore tag
97+
if hignore is not None:
98+
for tag in hignore:
99+
taglen = len(tag)
100+
if len(hpair[0]) >= taglen and hpair[0][:taglen] == tag:
101+
flag = False
102+
# add header data
103+
if flag:
104+
name = hpair[0]
105+
value = hpair[1]
106+
# check if data value should be stored as float
107+
if isfloat(hpair[1]):
108+
value = float(hpair[1])
109+
hdata.update({name: value})
110+
# continue search for the start of datablock
75111
fpos = (fpos[1], fpos[1] + len(line))
76-
line = line.decode()
112+
line = dline
77113
ncv = countcolumnsvalues(line)
78114
if ncv < mincv:
79115
start = None
@@ -98,6 +134,11 @@ def countcolumnsvalues(line):
98134
# in case of trailing delimiters.
99135
kwargs.setdefault('usecols', list(range(ncvblock[0])))
100136
rv = loadtxt(fid, **kwargs)
137+
138+
# return headers if requested
139+
if headers:
140+
return hdata, rv
141+
# otherwise do not
101142
return rv
102143

103144

@@ -247,4 +288,4 @@ def isfloat(s):
247288
pass
248289
return False
249290

250-
# End of file
291+
# End of file

src/diffpy/utils/tests/test_loaddata.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from diffpy.utils.tests.testhelpers import datafile
1010

1111
loaddata01 = datafile('loaddata01.txt')
12+
loaddatawithheaders = datafile('loaddatawithheaders.txt')
1213

1314
##############################################################################
1415
class TestLoadData(unittest.TestCase):
@@ -44,9 +45,28 @@ def test_loadData_1column(self):
4445
self.assertFalse(numpy.array_equal(d1c, d))
4546
return
4647

48+
49+
def test_loadData_headers(self):
50+
"""check loadData() with headers options enabled
51+
"""
52+
hignore = ["# ", "// ", "["] # ignore lines beginning with these strings
53+
delimiter = ": " # what our data should be separated by
54+
hdata, rv = loadData(loaddatawithheaders, headers=True, hdel=delimiter, hignore=hignore)
55+
# only fourteen lines of data are formatted properly
56+
assert len(hdata) == 14
57+
# check the following are floats
58+
vfloats = ["wavelength", "qmaxinst", "qmin", "qmax", "bgscale"]
59+
for name in vfloats:
60+
assert isinstance(hdata.get(name), float)
61+
# check the following are NOT floats
62+
vnfloats = ["composition", "rmax", "rmin", "rstep", "rpoly"]
63+
for name in vnfloats:
64+
assert not isinstance(hdata.get(name), float)
65+
66+
4767
# End of class TestRoutines
4868

4969
if __name__ == '__main__':
5070
unittest.main()
5171

52-
# End of file
72+
# End of file

0 commit comments

Comments
 (0)