Edit functionality, use tmp_path fixture for tests

Andrew Yang · Andrew Yang · commit ac4d2ff74fc1 · 2023-08-07T09:27:16.000-04:00
diff --git a/conda-recipe/run_test.py b/conda-recipe/run_test.py
@@ -1,4 +1,8 @@
 #!/usr/bin/env python
 
+import sys
+import pathlib
+sys.path.append((pathlib.Path.cwd().parent.absolute() / "src").as_posix())
+
 import diffpy.utils.tests
 assert diffpy.utils.tests.test().wasSuccessful()
diff --git a/src/diffpy/utils/parsers/__init__.py b/src/diffpy/utils/parsers/__init__.py
@@ -17,7 +17,7 @@
 """
 
 from .loaddata import loadData
-from .loadmetafile import load_PDF_into_db, markup_PDF, apply_schema
+from .loadmetafile import load_PDF_into_db, markup_PDF, apply_schema_to_file, markup_oneline
 from .resample import resample
 
 # silence the pyflakes syntax checker
diff --git a/src/diffpy/utils/parsers/loadmetafile.py b/src/diffpy/utils/parsers/loadmetafile.py
@@ -16,23 +16,22 @@
 import pathlib
 import json
 
-from diffpy.utils.parsers import loadData
-
 # FIXME: add support for yaml, xml
 supported_formats = ['.json']
 
 
-def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, oneline=True, show_path=True):
-    """Load PDF header and base data into a database file.
+def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, show_path=True):
+    """Load an entry consisting of PDF header and base data into a database file.
 
     Requires hdata and rv generated from loadData.
 
-    dbname      -- name of the database file to load into.
-    pdfname     -- name of the PDF file.
-    hddata      -- Dictionary of PDF metadata generated by loadData.
-    rv          -- List of PDF (r, gr) pairs generated by loadData.
-    oneline     -- store r and gr arrays in a single line for compactness (default True).
-    show_path   -- include a PDF_path element in the database entry (default True).
+    dbname          -- name of the database file to load an entry into.
+    pdfname         -- name of the PDF file.
+    hddata          -- Dictionary of PDF metadata generated by loadData.
+    rv              -- List of PDF (r, gr) pairs generated by loadData.
+    show_path       -- include a PDF_path element in the database entry (default True).
+
+    Returns the dictionary loaded from/into the updated database file.
     """
     # new file or update
     existing = False
@@ -49,10 +48,7 @@ def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, oneline=True, show
             data.update({'PDF_path': grpath})
 
         # add r, gr, and header metadata
-        if oneline:
-            data.update({'r': str(list(rv[:, 0])), 'gr': str(list(rv[:, 1]))})
-        else:
-            data.update({'r': list(rv[:, 0]), 'gr': list(rv[:, 1])})
+        data.update({'r': list(rv[:, 0]), 'gr': list(rv[:, 1])})
         data.update(hddata)
 
         # parse name using pathlib and generate json entry
@@ -69,7 +65,8 @@ def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, oneline=True, show
         # dump if non-existing
         if not existing:
             with open(dbname, 'w') as jsonfile:
-                jsonfile.write(json.dumps(entry, indent=2))
+                pdfs = entry  # for return
+                json.dump(pdfs, jsonfile, indent=2)
 
         # update if existing
         else:
@@ -80,20 +77,30 @@ def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, oneline=True, show
                 # dump to string first for formatting
                 json.dump(pdfs, json_write, indent=2)
 
+    return pdfs
 
-def markup_PDF(muname, hddata: dict, rv: list):
-    # FIXME: for REST API, remove if better ways to implement
-    """Put PDF file information in a markup language file.
 
-    mumane  -- name of markup file to put data into.
+def markup_PDF(hddata: dict, rv: list, muname=None):
+    # FIXME: may be better suited for REST API package, not diffpy.utils
+    """Put PDF file information into a dictionary.
+
     hddata  -- Dictionary of metadata.
     rv      -- List of (r, gr) pairs.
+    muname  -- file to save into (default None, no saving occurs).
+
+    Returns the dictionary loaded from/into markup file.
     """
 
     # gather data
     data = {}
-    data.update({'r': str(list(rv[:, 0])), 'gr': str(list(rv[:, 1]))})
+    data.update({'r': list(rv[:, 0]), 'gr': list(rv[:, 1])})
     data.update(hddata)
+
+    # return directly
+    if muname is None:
+        return data
+
+    # save to disk when enabled
     extension = pathlib.Path(muname).suffix
     if extension not in supported_formats:
         raise Exception(f"Format of {muname} is not supported.")
@@ -103,15 +110,54 @@ def markup_PDF(muname, hddata: dict, rv: list):
         with open(muname, 'w') as json_write:
             json.dump(data, json_write, indent=2)
 
+    return data
+
+
+def markup_oneline(filename):
+    """Reformat lists in markup languages to take up only one line.
 
-def apply_schema(filename, schemaname, multiple_entries=False):
+    Works well when only lists are surrounded by square brackets and no other data is comma and newline separated.
+
+    filename    -- name of markup file to reformat.
+    """
+
+    # check file type
+    extension = pathlib.Path(filename).suffix
+    if extension not in supported_formats:
+        raise Exception(f"Format of {filename} is not supported.")
+
+    if extension == '.json':
+        # cannot easily do regex substitution since lists are of floats
+        with open(filename, 'r+') as json_file:
+            lines = json_file.readlines()
+            json_file.seek(0)
+            json_file.truncate()
+
+            s_flag = False
+            for line in lines:
+                if "\"r\": [" in line or "\"gr\": [" in line:
+                    s_flag = True
+                    updated_line = line[:-1]
+                elif "]," in line:
+                    s_flag = False
+                    updated_line = f"{updated_line[:-1]}{line.strip()}\n"
+                    json_file.write(updated_line)
+                elif s_flag:
+                    updated_line += f"{line[:-1].strip()} "
+                else:
+                    json_file.write(line)
+
+
+def apply_schema_to_file(filename, schemaname, multiple_entries=False):
     """ Reformat a file so relevant entries match the same order as a schema file.
     Other entries are put at the end in the same order.
 
     filename            -- name of file to apply the schema to.
     schemaname          -- name of schema to apply.
     multiple_entries    -- True if database file (i.e. those generated by load_PDF_into_db).
                            False if data from a single file (i.e. those generated by markup_PDF).
+
+    Returns the dictionary loaded from/into the reformatted file.
     """
 
     # ensure proper extension
@@ -162,3 +208,5 @@ def apply_schema(filename, schemaname, multiple_entries=False):
                 reformatted_dict.update(data_dict)
             with open(filename, 'w') as json_write:
                 json.dump(reformatted_dict, json_write, indent=2)
+
+    return reformatted_dict
diff --git a/src/diffpy/utils/tests/test_loadmetafile.py b/src/diffpy/utils/tests/test_loadmetafile.py
@@ -1,4 +1,4 @@
-from diffpy.utils.parsers import load_PDF_into_db, markup_PDF, apply_schema
+from diffpy.utils.parsers import load_PDF_into_db, markup_PDF, apply_schema_to_file, markup_oneline
 from diffpy.utils.parsers import loadData
 from diffpy.utils.tests.testhelpers import datafile
 
@@ -7,40 +7,52 @@
 
 tests_dir = os.path.dirname(os.path.abspath(locals().get('__file__', 'file.py')))
 
-generatedjson = datafile('tljson.json')
 targetjson = datafile('targetdb.json')
 
 schemaname = datafile('strumining.json')
 muload = datafile('loadmu.txt')
-generatedmu = datafile('tmujson.json')
 targetmu = datafile('targetmu.json')
 
 
-def test_load_gr():
+def test_load_gr(tmp_path):
     # generate json and apply schema
+    generatedjson = tmp_path / "generated_db.json"
     tddbload_list = os.listdir(os.path.join(tests_dir, "testdata", "dbload"))
     tddbload_list.sort()
-    print(tddbload_list)
     for headerfile in tddbload_list:
         headerfile = os.path.join(tests_dir, "testdata", "dbload", headerfile)
-        hdata, rv = loadData(headerfile, headers=True)
-        load_PDF_into_db(generatedjson, headerfile, hdata, rv, show_path=False)
-    apply_schema(generatedjson, schemaname, multiple_entries=True)
+        hdata = loadData(headerfile, headers=True)
+        rv = loadData(headerfile)
+        db_data = load_PDF_into_db(generatedjson, headerfile, hdata, rv, show_path=False)
+    apply_schema_to_file(generatedjson, schemaname, multiple_entries=True)
+    markup_oneline(generatedjson)
 
     # compare to target
+    # first compare if base data is same
+    import json
+    with open(targetjson, 'r') as target:
+        target_db_data = json.load(target)
+        assert target_db_data == db_data
+    # then compare file structure/organization
     assert filecmp.cmp(generatedjson, targetjson)
 
-    # cleanup
-    os.remove(generatedjson)
-
 
-def test_markup_gr():
+def test_markup_gr(tmp_path):
     # put into json and apply schema
-    hdata, rv = loadData(muload, headers=True)
-    markup_PDF(generatedmu, hdata, rv)
-    apply_schema(generatedmu, schemaname)
+    generatedmu = tmp_path / "generated_markup.json"
+    hdata = loadData(muload, headers=True)
+    rv = loadData(muload)
+    data = markup_PDF(hdata, rv, generatedmu)
+    apply_schema_to_file(generatedmu, schemaname)
+    markup_oneline(generatedmu)
 
     # check against target
+    # first compare data is same
+    import json
+    with open(targetmu, 'r') as target:
+        target_data = json.load(target)
+        assert target_data == data
+    # then compare structure
     assert filecmp.cmp(generatedmu, targetmu)
 
     # cleanup
diff --git a/src/diffpy/utils/tests/testdata/targetdb.json b/src/diffpy/utils/tests/testdata/targetdb.json
@@ -1,23 +1,23 @@
 {
   "e1.gr": {
-    "r": "[0.0, 1.0, 2.0, 3.0]",
-    "gr": "[0.0, 0.0, 0.0, 0.0]",
+    "r": [0.0, 1.0, 2.0, 3.0],
+    "gr": [0.0, 0.0, 0.0, 0.0],
     "qmax": 10.0,
     "qmin": 0.0,
     "rmax": 10.0,
     "rmin": 0.0
   },
   "e2.gr": {
-    "r": "[0.0, 1.0, 2.0, 3.0]",
-    "gr": "[1.0, 2.0, 3.0, 4.0]",
+    "r": [0.0, 1.0, 2.0, 3.0],
+    "gr": [1.0, 2.0, 3.0, 4.0],
     "qmax": 11.0,
     "qmin": 1.0,
     "rmax": 11.0,
     "rmin": 1.0
   },
   "e3.gr": {
-    "r": "[0.0, 1.0, 2.0, 3.0]",
-    "gr": "[0.0, 5.0, 4.0, 3.0]",
+    "r": [0.0, 1.0, 2.0, 3.0],
+    "gr": [0.0, 5.0, 4.0, 3.0],
     "qmax": 12.0,
     "qmin": 2.0,
     "rmax": 12.0,
diff --git a/src/diffpy/utils/tests/testdata/targetmu.json b/src/diffpy/utils/tests/testdata/targetmu.json
@@ -1,6 +1,6 @@
 {
-  "r": "[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]",
-  "gr": "[0.0, 1e-05, 3e-05, 5e-05, 1e-05, -3e-05, -6e-05]",
+  "r": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+  "gr": [0.0, 1e-05, 3e-05, 5e-05, 1e-05, -3e-05, -6e-05],
   "stru_str": "baddata",
   "stype": "Neutron",
   "qdamp": 0.0001,

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`		`- "r": "[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]",`
`3`		`- "gr": "[0.0, 1e-05, 3e-05, 5e-05, 1e-05, -3e-05, -6e-05]",`
	`2`	`+ "r": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0],`
	`3`	`+ "gr": [0.0, 1e-05, 3e-05, 5e-05, 1e-05, -3e-05, -6e-05],`
`4`	`4`	`"stru_str": "baddata",`
`5`	`5`	`"stype": "Neutron",`
`6`	`6`	`"qdamp": 0.0001,`