Source code for idmtools_calibra.utilities.parsers.JSON

import re
import json
from json import JSONDecoder
from functools import partial


# consider configuring a data-path variable listing directories of interest
# that may store json and other data files (?)

# would per scenario directory structure schema be feasible (?) 
# e.g.

# scnenario_name\
#         \data
#         \scripts
#         \workflow
#         \simulations
#         \figures
#         .dtk_proj        # project json/xml schema (similar to vs sln file)


# remove all white space that is outside quotes
[docs]def strip(pieces): sub_pieces = pieces.split('"') for i, sub_piece in enumerate(sub_pieces): if not i % 2: sub_pieces[i] = re.sub(r"\s+", "", sub_piece) return '"'.join(sub_pieces)
# parse the json file object by object, 1024 bytes at a time # every 1024 bytes check if pieces contain a valid json object # if not, accumulate more pieces 1024 bytes at a time # # save memory in the process and allow # various types of objects to be parsed # that way we can get a dictionary of mixed types # (e.g. lists, other dictionaries, etc. # # by default REMOVE all new lines
[docs]def parse(input_data, decoder=JSONDecoder(), pieces_mem=1024): pieces = '' for piece in iter(partial(input_data.read, pieces_mem), ''): pieces = pieces + piece # pieces = pieces.replace('\n','') pieces = strip(pieces) # print(pieces) while pieces: try: j, idx = decoder.raw_decode(pieces) yield j pieces = pieces[idx:] except ValueError: break
# reads json file to a dictionary # if as_is is True read whatever object is stored in the json file and return it; # otherwise, if as_is is False, read the json file object by object and return # a dictionary of objects # # dict = { # 1 : obj_1, # 2: obj_2, # ... # } # # if as_is is False and func is not None, apply func to data # return a tuple containing the object to be stored in the dictionary and boolean value; # if boolean is True keep parsing the json file, else break and stop parsing the json file # (may be useful for filtering or searching until a certain item is found after which we do # not process the rest of the json file or continue processing it) # if func returns None stop parsing the json file and return the dictionary; else create a new dictionary entry with the returned object # # Example: with input file "cluster_tags_alt.json" containing ''' {"B":{"a":1}} {"a":{"tags a":{"alt":2}}} { "idx0":4, "idx1":0.324 } {"B":{"b":1}} [1, 2, 3, 4] {"4 ": 435435,"asd":"asdasd"} ''' ''' test = json2dict("cluster_tags_alt.json", False, (lambda data: (data, False) if "B" in data else (None, False))) Output: {0: {u'B': {u'a': 1}}, 3: {u'B': {u'b': 1}}} '''
[docs]def json2dict(json_file, as_is=True, func=None): with open(json_file, 'Ur') as input_data: if as_is: data = json.load(input_data) return data else: data_dict = {} for i, data in enumerate(parse(input_data)): if func is not None: data, do_break = func(data) if data is not None: data_dict[i] = data if do_break: break else: data_dict[i] = data return data_dict
[docs]def dict2json(filename, dict_content): with open(filename, 'w') as f: f.write(dict_content)