Source code for idmtools_calibra.utilities.parsers.JSON

import re
import json
from json import JSONDecoder
from functools import partial


# consider configuring a data-path variable listing directories of interest
# that may store json and other data files (?)

# would per scenario directory structure schema be feasible (?) 
# e.g.

# scnenario_name\
#         \data
#         \scripts
#         \workflow
#         \simulations
#         \figures
#         .dtk_proj        # project json/xml schema (similar to vs sln file)


# remove all white space that is outside quotes
[docs]def strip(pieces):
    sub_pieces = pieces.split('"')
    for i, sub_piece in enumerate(sub_pieces):
        if not i % 2:
            sub_pieces[i] = re.sub(r"\s+", "", sub_piece)
    return '"'.join(sub_pieces)


# parse the json file object by object, 1024 bytes at a time
# every 1024 bytes check if pieces contain a valid json object
# if not, accumulate more pieces 1024 bytes at a time 
#
# save memory in the process and allow 
# various types of objects to be parsed
# that way we can get a dictionary of mixed types
# (e.g. lists, other dictionaries, etc.
#
# by default REMOVE all new lines
[docs]def parse(input_data, decoder=JSONDecoder(), pieces_mem=1024):
    pieces = ''
    for piece in iter(partial(input_data.read, pieces_mem), ''):
        pieces = pieces + piece
        # pieces =  pieces.replace('\n','')
        pieces = strip(pieces)
        # print(pieces)
        while pieces:
            try:
                j, idx = decoder.raw_decode(pieces)
                yield j
                pieces = pieces[idx:]
            except ValueError:
                break


# reads json file to a dictionary

# if as_is is True read whatever object is stored in the json file and return it;
# otherwise, if as_is is False, read the json file object by object and return 
# a dictionary of objects
#
#   dict = {
#            1 : obj_1,
#            2: obj_2,
#            ...
#            }
#
# if as_is is False and func is not None, apply func to data 
# return a tuple containing the object to be stored in the dictionary and boolean value;
# if boolean is True keep parsing the json file, else break and stop parsing the json file
# (may be useful for filtering or searching until a certain item is found after which we do
# not process the rest of the json file or continue processing it)
# if func returns None stop parsing the json file and return the dictionary; else create a new dictionary entry with the returned object
#
# Example: with input file "cluster_tags_alt.json" containing 
'''
{"B":{"a":1}}


{"a":{"tags  a":{"alt":2}}}

{
    "idx0":4,
    "idx1":0.324
}

{"B":{"b":1}}

[1, 2, 3, 4]  

  {"4 ":   435435,"asd":"asdasd"}
'''
'''

test = json2dict("cluster_tags_alt.json", False, (lambda data: (data, False) if "B" in data else (None, False)))

Output:

{0: {u'B': {u'a': 1}}, 3: {u'B': {u'b': 1}}}

'''


[docs]def json2dict(json_file, as_is=True, func=None):
    with open(json_file, 'Ur') as input_data:
        if as_is:
            data = json.load(input_data)
            return data
        else:
            data_dict = {}
            for i, data in enumerate(parse(input_data)):
                if func is not None:
                    data, do_break = func(data)
                    if data is not None:
                        data_dict[i] = data
                    if do_break:
                        break
                else:
                    data_dict[i] = data
            return data_dict


[docs]def dict2json(filename, dict_content):
    with open(filename, 'w') as f:
        f.write(dict_content)