Source code for emodpy_malaria.migration.convert_txt_to_bin

# convert_txt_to_bin.py
# -----------------------------------------------------------------------------
# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file.
# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file.
# It also creates the required metadata file.
#
# The CSV file has three columns
#    From_Node_ID, To_Node_ID, Rate (Average # of Trips Per Day)
# where the node IDs are the external IDs found in the demographics file.
# Each node ID in the migration file must exist in the demographics file.
# One can have node ID's in the demographics that don't exist in the migration file.
#
# The CSV file does not have to have the same number of entries for each From_Node.
# The script will find the From_Node that has the most and use that for the 
# DestinationsPerNode.  The binary file will have DestinationsPerNode entries
# per node.
# -----------------------------------------------------------------------------

import collections
import datetime
import json
import os
import struct
import sys
from enum import Enum


[docs] class MigrationTypes(Enum): LOCAL_MIGRATION = "LOCAL_MIGRATION" AIR_MIGRATION = "AIR_MIGRATION" REGIONAL_MIGRATION = "REGIONAL_MIGRATION" SEA_MIGRATION = "SEA_MIGRATION"
[docs] def show_usage(): print('\nUsage: %s [input-migration-csv] [output-bin] [migration-type] ' '[idreference]' % os.path.basename(sys.argv[0]))
if __name__ == "__main__": if len(sys.argv) != 5: show_usage() exit(0) filename = sys.argv[1] outfilename = sys.argv[2] mig_type = sys.argv[3] id_ref = sys.argv[4] if mig_type not in MigrationTypes: print(f"Invalid MigrationType = {mig_type}, valid MigrationTypes are: " f"{MigrationTypes.LOCAL_MIGRATION}, {MigrationTypes.REGIONAL_MIGRATION}," f"{MigrationTypes.SEA_MIGRATION}, {MigrationTypes.AIR_MIGRATION}.") exit(-1) max_destinations_per_node = 0 destinations_per_node = 0 fopen = open(filename) fout = open(outfilename, 'wb') net = {} net_rate = {} # ---------------------------- # collect data from CSV file # ---------------------------- node_id_list = [] prev_id = -1 for line in fopen: s = line.strip().split(',') ID1 = int(float(s[0])) ID2 = int(float(s[1])) rate = float(s[2]) if ID1 not in net: net[ID1] = [] net_rate[ID1] = [] net[ID1].append(ID2) net_rate[ID1].append(rate) if prev_id != ID1: if destinations_per_node > max_destinations_per_node: max_destinations_per_node = destinations_per_node node_id_list.append(ID1) print(prev_id, max_destinations_per_node) prev_id = ID1 destinations_per_node = 0 destinations_per_node += 1 # --------------- # Write bin file # --------------- for ID in net: ID_write = [] ID_rate_write = [] for i in range(max_destinations_per_node): ID_write.append(0) ID_rate_write.append(0) for i in range(len(net[ID])): ID_write[i] = net[ID][i] ID_rate_write[i] = net_rate[ID][i] # The type needs to be 'I' because Linux handles 'L' differently than Windows. s_write = struct.pack('I' * len(ID_write), *ID_write) s_rate_write = struct.pack('d' * len(ID_rate_write), *ID_rate_write) fout.write(s_write) fout.write(s_rate_write) fopen.close() fout.close() # ------------------------------------------------------------------- # Create NodeOffsets string # This contains the location of each From Node's data in the bin file # ------------------------------------------------------------------- offset_str = "" nodecount = 0 for ID in net: offset_str += '%0.8X' % ID offset_str += '%0.8X' % (nodecount * max_destinations_per_node * 12) # 12 -> sizeof(uint32_t) + sizeof(double) nodecount += 1 # ------------------- # Write Metadata file # ------------------- migjson = collections.OrderedDict([]) migjson['Metadata'] = {} if os.name == "nt": migjson['Metadata']['Author'] = os.environ['USERNAME'] else: migjson['Metadata']['Author'] = os.environ['USER'] migjson['Metadata']['NodeCount'] = len(node_id_list) migjson['Metadata']['IdReference'] = id_ref migjson['Metadata']['DateCreated'] = datetime.datetime.now().ctime() migjson['Metadata']['Tool'] = os.path.basename(sys.argv[0]) migjson['Metadata']['DatavalueCount'] = max_destinations_per_node migjson['Metadata']['MigrationType'] = mig_type migjson['NodeOffsets'] = offset_str with open(outfilename + ".json", 'w') as file: json.dump(migjson, file, indent=4)