Source code for emodpy_malaria.migration.convert_json_to_bin

# convert_json_to_bin.py
# -----------------------------------------------------------------------------
# This script converts a JSON formatted txt file to an EMOD binary-formatted migration file.
# It also creates the required metadata file.
#
# The JSON file allows the user to specify different rates for different ages
# and genders.
# 
# The output binary file has one or two Gender Data sections depending on whether
# the JSON file has different data for each gender.  Each Gender Data section has
# one Age Data section for each age specified in the JSON file.  Each Age Data
# section has one Node Data section for each node that individuals can migrate
# from.  Each Node Data section has one chunk of data
#    [1-unint32_t (4-bytes) plus 1-double (8-bytes)]
# for each destination where each Node Data section has DestinationsPerNode chunks.
# In other words, each Node Data section is 12-bytes times DestinationsPerNode
# -----------------------------------------------------------------------------


import collections
import datetime
import json
import os
import struct
import sys
from enum import Enum

# -----------------------------------------------------------------------------
# Age Limits
# -----------------------------------------------------------------------------
AGE_Min = 0.0
AGE_Max = 125.0


# -----------------------------------------------------------------------------
# CheckAge
# -----------------------------------------------------------------------------
[docs] def check_age(age): if age < AGE_Min: print(f"Invalid age={age} < {AGE_Min}") exit(-1) if age > AGE_Max: print(f"Invalid age={age} > {AGE_Max}") exit(-1)
# ----------------------------------------------------------------------------- # CheckAgeArray # -----------------------------------------------------------------------------
[docs] def check_ages_array(ages_years): errmsg = JSON_AgesYears + " must be an array of ages in years and in increasing order." if len(ages_years) == 0: print(errmsg) exit(-1) prev = 0.0 for age in ages_years: check_age(age) if age < prev: print(errmsg) exit(-1) prev = age
# ----------------------------------------------------------------------------- # Enum Types # -----------------------------------------------------------------------------
[docs] class GenderDataType(Enum): SAME_FOR_BOTH_GENDERS = "SAME_FOR_BOTH_GENDERS" ONE_FOR_EACH_GENDER = "ONE_FOR_EACH_GENDER" VECTOR_MIGRATION_BY_GENETICS = "VECTOR_MIGRATION_BY_GENETICS"
[docs] class InterpolationTypes(Enum): LINEAR_INTERPOLATION = "LINEAR_INTERPOLATION" PIECEWISE_CONSTANT = "PIECEWISE_CONSTANT"
[docs] class MigrationTypes(Enum): LOCAL_MIGRATION = "LOCAL_MIGRATION" AIR_MIGRATION = "AIR_MIGRATION" REGIONAL_MIGRATION = "REGIONAL_MIGRATION" SEA_MIGRATION = "SEA_MIGRATION"
# ----------------------------------------------------------------------------- # CheckGenderDataType # -----------------------------------------------------------------------------
[docs] def check_gender_data_type(gdt): if gdt not in GenderDataType: print(f"Invalid GenderDataType = {gdt}, valid GenderDataTypes are: " f"{GenderDataType.SAME_FOR_BOTH_GENDERS}, {GenderDataType.ONE_FOR_EACH_GENDER}, " f"{GenderDataType.VECTOR_MIGRATION_BY_GENETICS} (only for vector migration).") exit(-1)
# ----------------------------------------------------------------------------- # CheckInterpolationType # -----------------------------------------------------------------------------
[docs] def check_interpolation_type(interp_type): if interp_type not in InterpolationTypes: print(f"Invalid InterpolationType = {interp_type}, valid InterpolationTypes are: " f"{InterpolationTypes.LINEAR_INTERPOLATION}, {InterpolationTypes.PIECEWISE_CONSTANT}.") exit(-1)
# ----------------------------------------------------------------------------- # CheckMigrationType # -----------------------------------------------------------------------------
[docs] def check_migration_type(mig_type): if mig_type not in MigrationTypes: print(f"Invalid MigrationType = {mig_type}, valid MigrationTypes are: " f"{MigrationTypes.LOCAL_MIGRATION}, {MigrationTypes.REGIONAL_MIGRATION}," f"{MigrationTypes.SEA_MIGRATION}, {MigrationTypes.AIR_MIGRATION}.") exit(-1)
# ----------------------------------------------------------------------------- # JSON Element Names # ----------------------------------------------------------------------------- # NOTE: The indention below indicates where the tag is used in the JSON JSON_IdRef = "IdReference" JSON_InterpType = "Interpolation_Type" JSON_GenderDataType = "Gender_Data_Type" JSON_AgesYears = "Ages_Years" JSON_NodeData = "Node_Data" JSON_ND_FromNodeId = "From_Node_ID" JSON_ND_RateData = "Rate_Data" JSON_RD_ToNodeId = "To_Node_ID" JSON_RD_RatesBoth = "Avg_Num_Trips_Per_Day_Both" JSON_RD_RatesMale = "Avg_Num_Trips_Per_Day_Male" JSON_RD_RatesFemale = "Avg_Num_Trips_Per_Day_Female" # ----------------------------------------------------------------------------- # CheckInJson # -----------------------------------------------------------------------------
[docs] def check_in_json(fn, data, key): if key not in data: print(f"Could not find {key} in file {fn}.") exit(-1)
# ----------------------------------------------------------------------------- # CheckRatesSize # -----------------------------------------------------------------------------
[docs] def check_rates_size(num_ages, rd_data, key): if len(rd_data[key]) != num_ages: print( f"{JSON_AgesYears} has {num_ages} values and one of the {key} has {len(rd_data[key])} values. " f" They must have the same number.") exit(-1)
# ----------------------------------------------------------------------------- # ReadJson # -----------------------------------------------------------------------------
[docs] def read_json(json_fn): json_file = open(json_fn, 'r') json_data = json.load(json_file) json_file.close() check_in_json(json_fn, json_data, JSON_IdRef) check_in_json(json_fn, json_data, JSON_InterpType) check_in_json(json_fn, json_data, JSON_GenderDataType) check_in_json(json_fn, json_data, JSON_AgesYears) check_in_json(json_fn, json_data, JSON_NodeData) check_interpolation_type(json_data[JSON_InterpType]) check_gender_data_type(json_data[JSON_GenderDataType]) check_ages_array(json_data[JSON_AgesYears]) if len(json_data[JSON_NodeData]) == 0: print(f"{JSON_NodeData} has no elements so there would be no migration data.") exit(-1) num_ages = len(json_data[JSON_AgesYears]) for nd_data in json_data[JSON_NodeData]: check_in_json(json_fn, nd_data, JSON_ND_FromNodeId) check_in_json(json_fn, nd_data, JSON_ND_RateData) if len(nd_data[JSON_ND_RateData]) == 0: print(f"{JSON_ND_RateData} has no elements so there would be no migration data.") exit(-1) for rd_data in nd_data[JSON_ND_RateData]: check_in_json(json_fn, rd_data, JSON_RD_ToNodeId) if json_data[JSON_GenderDataType] == GenderDataType.ONE_FOR_EACH_GENDER.value: check_in_json(json_fn, rd_data, JSON_RD_RatesMale) check_in_json(json_fn, rd_data, JSON_RD_RatesFemale) check_rates_size(num_ages, rd_data, JSON_RD_RatesMale) check_rates_size(num_ages, rd_data, JSON_RD_RatesFemale) else: check_in_json(json_fn, rd_data, JSON_RD_RatesBoth) check_rates_size(num_ages, rd_data, JSON_RD_RatesBoth) return json_data
# ----------------------------------------------------------------------------- # SummaryData # -----------------------------------------------------------------------------
[docs] class SummaryData: def __init__(self, node_count, offset_str, max_destinations_per_node): self.num_nodes = node_count self.offset_str = offset_str self.max_destinations_per_node = max_destinations_per_node
# ----------------------------------------------------------------------------- # GetSummaryData # -----------------------------------------------------------------------------
[docs] def get_summary_data(json_data): from_node_id_list = [] # ------------------------------------------------------------------------- # Find the list node that individuals can migrate from # Also find the maximum number of nodes that one can go to from a give node. # This max is used in determine the layout of the binary data. # ------------------------------------------------------------------------- max_destinations = 0 for node_data in json_data[JSON_NodeData]: from_node_id_list.append(int(node_data[JSON_ND_FromNodeId])) destinations = len(node_data[JSON_ND_RateData]) if destinations > max_destinations: max_destinations = destinations print(f"max_destinations = {max_destinations}") # ------------------------------------------------------------------- # Create NodeOffsets string # This contains the location of each From Node's data in the bin file # ------------------------------------------------------------------- offset_str = "" nodecount = 0 for from_node_id in from_node_id_list: offset_str += '%0.8X' % from_node_id offset_str += '%0.8X' % (nodecount * max_destinations * 12) # 12 -> sizeof(uint32_t) + sizeof(double) nodecount += 1 return SummaryData(nodecount, offset_str, max_destinations)
# ----------------------------------------------------------------------------- # WriteBinFile # -----------------------------------------------------------------------------
[docs] def write_bin_file(bin_fn, json_data, summary): bin_file = open(bin_fn, 'wb') if json_data[JSON_GenderDataType] == GenderDataType.ONE_FOR_EACH_GENDER.value: write_bin_file_gender(bin_file, json_data, summary, JSON_RD_RatesMale) write_bin_file_gender(bin_file, json_data, summary, JSON_RD_RatesFemale) else: write_bin_file_gender(bin_file, json_data, summary, JSON_RD_RatesBoth) bin_file.close()
# ----------------------------------------------------------------------------- # WriteBinFileGender # -----------------------------------------------------------------------------
[docs] def write_bin_file_gender(bin_file, json_data, summary, rates_key): for age_index in range(len(json_data[JSON_AgesYears])): for node_data in json_data[JSON_NodeData]: array_id = [] array_rt = [] # Initialize with zeros for i in range(summary.max_destinations_per_node): array_id.append(0) array_rt.append(0) # Populate arrays with data index = 0 for rate_data in node_data[JSON_ND_RateData]: array_id[index] = int(rate_data[JSON_RD_ToNodeId]) array_rt[index] = rate_data[rates_key][age_index] index += 1 # Format data into binary bin_data_id = struct.pack('I' * len(array_id), *array_id) bin_data_rt = struct.pack('d' * len(array_rt), *array_rt) bin_file.write(bin_data_id) bin_file.write(bin_data_rt)
# ----------------------------------------------------------------------------- # WriteMetadataFile # -----------------------------------------------------------------------------
[docs] def write_metadata_file(metadata_fn, mig_type, json_data, rate_data): output_json = collections.OrderedDict([]) output_json["Metadata"] = {} output_json["Metadata"]["IdReference"] = json_data[JSON_IdRef] output_json["Metadata"]["DateCreated"] = datetime.datetime.now().ctime() output_json["Metadata"]["Tool"] = os.path.basename(sys.argv[0]) output_json["Metadata"]["DatavalueCount"] = rate_data.max_destinations_per_node output_json["Metadata"]["MigrationType"] = mig_type output_json["Metadata"]["GenderDataType"] = json_data[JSON_GenderDataType] output_json["Metadata"]["InterpolationType"] = json_data[JSON_InterpType] output_json["Metadata"]["AgesYears"] = json_data[JSON_AgesYears] output_json["Metadata"]["NodeCount"] = rate_data.num_nodes output_json["NodeOffsets"] = rate_data.offset_str with open(metadata_fn, 'w') as file: json.dump(output_json, file, indent=4)
# ----------------------------------------------------------------------------- # Main # ----------------------------------------------------------------------------- if __name__ == "__main__": if len(sys.argv) != 4: print("\nUsage: %s [input-json] [output-bin] [migration-type]" % os.path.basename(sys.argv[0])) exit(0) json_fn = sys.argv[1] bin_fn = sys.argv[2] mig_type = sys.argv[3] metadata_fn = bin_fn + ".json" check_migration_type(mig_type) json_data = read_json(json_fn) summary = get_summary_data(json_data) write_bin_file(bin_fn, json_data, summary) write_metadata_file(metadata_fn, mig_type, json_data, summary) print(f"Finished converting {json_fn} to {bin_fn} and {metadata_fn}")