1616import pathlib
1717import json
1818
19- from diffpy .utils .parsers import loadData
20-
2119# FIXME: add support for yaml, xml
2220supported_formats = ['.json' ]
2321
2422
25- def load_PDF_into_db (dbname , pdfname , hddata : dict , rv : list , oneline = True , show_path = True ):
26- """Load PDF header and base data into a database file.
23+ def load_PDF_into_db (dbname , pdfname , hddata : dict , rv : list , show_path = True ):
24+ """Load an entry consisting of PDF header and base data into a database file.
2725
2826 Requires hdata and rv generated from loadData.
2927
30- dbname -- name of the database file to load into.
31- pdfname -- name of the PDF file.
32- hddata -- Dictionary of PDF metadata generated by loadData.
33- rv -- List of PDF (r, gr) pairs generated by loadData.
34- oneline -- store r and gr arrays in a single line for compactness (default True).
35- show_path -- include a PDF_path element in the database entry (default True).
28+ dbname -- name of the database file to load an entry into.
29+ pdfname -- name of the PDF file.
30+ hddata -- Dictionary of PDF metadata generated by loadData.
31+ rv -- List of PDF (r, gr) pairs generated by loadData.
32+ show_path -- include a PDF_path element in the database entry (default True).
33+
34+ Returns the dictionary loaded from/into the updated database file.
3635 """
3736 # new file or update
3837 existing = False
@@ -49,10 +48,7 @@ def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, oneline=True, show
4948 data .update ({'PDF_path' : grpath })
5049
5150 # add r, gr, and header metadata
52- if oneline :
53- data .update ({'r' : str (list (rv [:, 0 ])), 'gr' : str (list (rv [:, 1 ]))})
54- else :
55- data .update ({'r' : list (rv [:, 0 ]), 'gr' : list (rv [:, 1 ])})
51+ data .update ({'r' : list (rv [:, 0 ]), 'gr' : list (rv [:, 1 ])})
5652 data .update (hddata )
5753
5854 # parse name using pathlib and generate json entry
@@ -69,7 +65,8 @@ def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, oneline=True, show
6965 # dump if non-existing
7066 if not existing :
7167 with open (dbname , 'w' ) as jsonfile :
72- jsonfile .write (json .dumps (entry , indent = 2 ))
68+ pdfs = entry # for return
69+ json .dump (pdfs , jsonfile , indent = 2 )
7370
7471 # update if existing
7572 else :
@@ -80,20 +77,30 @@ def load_PDF_into_db(dbname, pdfname, hddata: dict, rv: list, oneline=True, show
8077 # dump to string first for formatting
8178 json .dump (pdfs , json_write , indent = 2 )
8279
80+ return pdfs
8381
84- def markup_PDF (muname , hddata : dict , rv : list ):
85- # FIXME: for REST API, remove if better ways to implement
86- """Put PDF file information in a markup language file.
8782
88- mumane -- name of markup file to put data into.
83+ def markup_PDF (hddata : dict , rv : list , muname = None ):
84+ # FIXME: may be better suited for REST API package, not diffpy.utils
85+ """Put PDF file information into a dictionary.
86+
8987 hddata -- Dictionary of metadata.
9088 rv -- List of (r, gr) pairs.
89+ muname -- file to save into (default None, no saving occurs).
90+
91+ Returns the dictionary loaded from/into markup file.
9192 """
9293
9394 # gather data
9495 data = {}
95- data .update ({'r' : str ( list (rv [:, 0 ])) , 'gr' : str ( list (rv [:, 1 ]) )})
96+ data .update ({'r' : list (rv [:, 0 ]), 'gr' : list (rv [:, 1 ])})
9697 data .update (hddata )
98+
99+ # return directly
100+ if muname is None :
101+ return data
102+
103+ # save to disk when enabled
97104 extension = pathlib .Path (muname ).suffix
98105 if extension not in supported_formats :
99106 raise Exception (f"Format of { muname } is not supported." )
@@ -103,15 +110,54 @@ def markup_PDF(muname, hddata: dict, rv: list):
103110 with open (muname , 'w' ) as json_write :
104111 json .dump (data , json_write , indent = 2 )
105112
113+ return data
114+
115+
116+ def markup_oneline (filename ):
117+ """Reformat lists in markup languages to take up only one line.
106118
107- def apply_schema (filename , schemaname , multiple_entries = False ):
119+ Works well when only lists are surrounded by square brackets and no other data is comma and newline separated.
120+
121+ filename -- name of markup file to reformat.
122+ """
123+
124+ # check file type
125+ extension = pathlib .Path (filename ).suffix
126+ if extension not in supported_formats :
127+ raise Exception (f"Format of { filename } is not supported." )
128+
129+ if extension == '.json' :
130+ # cannot easily do regex substitution since lists are of floats
131+ with open (filename , 'r+' ) as json_file :
132+ lines = json_file .readlines ()
133+ json_file .seek (0 )
134+ json_file .truncate ()
135+
136+ s_flag = False
137+ for line in lines :
138+ if "\" r\" : [" in line or "\" gr\" : [" in line :
139+ s_flag = True
140+ updated_line = line [:- 1 ]
141+ elif "]," in line :
142+ s_flag = False
143+ updated_line = f"{ updated_line [:- 1 ]} { line .strip ()} \n "
144+ json_file .write (updated_line )
145+ elif s_flag :
146+ updated_line += f"{ line [:- 1 ].strip ()} "
147+ else :
148+ json_file .write (line )
149+
150+
151+ def apply_schema_to_file (filename , schemaname , multiple_entries = False ):
108152 """ Reformat a file so relevant entries match the same order as a schema file.
109153 Other entries are put at the end in the same order.
110154
111155 filename -- name of file to apply the schema to.
112156 schemaname -- name of schema to apply.
113157 multiple_entries -- True if database file (i.e. those generated by load_PDF_into_db).
114158 False if data from a single file (i.e. those generated by markup_PDF).
159+
160+ Returns the dictionary loaded from/into the reformatted file.
115161 """
116162
117163 # ensure proper extension
@@ -162,3 +208,5 @@ def apply_schema(filename, schemaname, multiple_entries=False):
162208 reformatted_dict .update (data_dict )
163209 with open (filename , 'w' ) as json_write :
164210 json .dump (reformatted_dict , json_write , indent = 2 )
211+
212+ return reformatted_dict
0 commit comments