Source code for emodpy_malaria.migration.convert_txt_to_bin

# convert_txt_to_bin.py
# -----------------------------------------------------------------------------
# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file.
# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file.
# It also creates the required metadata file.
#
# The CSV file has three columns
#    From_Node_ID, To_Node_ID, Rate (Average # of Trips Per Day)
# where the node IDs are the external IDs found in the demographics file.
# Each node ID in the migration file must exist in the demographics file.
# One can have node ID's in the demographics that don't exist in the migration file.
#
# The CSV file does not have to have the same number of entries for each From_Node.
# The script will find the From_Node that has the most and use that for the 
# DestinationsPerNode.  The binary file will have DestinationsPerNode entries
# per node.
# -----------------------------------------------------------------------------

import collections
import datetime
import json
import os
import struct
import sys
from enum import Enum



[docs]
class MigrationTypes(Enum):
    LOCAL_MIGRATION = "LOCAL_MIGRATION"
    AIR_MIGRATION = "AIR_MIGRATION"
    REGIONAL_MIGRATION = "REGIONAL_MIGRATION"
    SEA_MIGRATION = "SEA_MIGRATION"




[docs]
def show_usage():
    print('\nUsage: %s [input-migration-csv] [output-bin] [migration-type] '
          '[idreference]' % os.path.basename(sys.argv[0]))



if __name__ == "__main__":
    if len(sys.argv) != 5:
        show_usage()
        exit(0)

    filename = sys.argv[1]
    outfilename = sys.argv[2]
    mig_type = sys.argv[3]
    id_ref = sys.argv[4]

    if mig_type not in MigrationTypes:
        print(f"Invalid MigrationType = {mig_type}, valid MigrationTypes are: "
              f"{MigrationTypes.LOCAL_MIGRATION}, {MigrationTypes.REGIONAL_MIGRATION},"
              f"{MigrationTypes.SEA_MIGRATION}, {MigrationTypes.AIR_MIGRATION}.")
        exit(-1)

    max_destinations_per_node = 0
    destinations_per_node = 0

    fopen = open(filename)
    fout = open(outfilename, 'wb')
    net = {}
    net_rate = {}

    # ----------------------------
    # collect data from CSV file
    # ----------------------------
    node_id_list = []
    prev_id = -1
    for line in fopen:
        s = line.strip().split(',')
        ID1 = int(float(s[0]))
        ID2 = int(float(s[1]))
        rate = float(s[2])
        if ID1 not in net:
            net[ID1] = []
            net_rate[ID1] = []
        net[ID1].append(ID2)
        net_rate[ID1].append(rate)
        if prev_id != ID1:
            if destinations_per_node > max_destinations_per_node:
                max_destinations_per_node = destinations_per_node
            node_id_list.append(ID1)
            print(prev_id, max_destinations_per_node)
            prev_id = ID1
            destinations_per_node = 0
        destinations_per_node += 1

    # ---------------
    # Write bin file
    # ---------------
    for ID in net:
        ID_write = []
        ID_rate_write = []
        for i in range(max_destinations_per_node):
            ID_write.append(0)
            ID_rate_write.append(0)
        for i in range(len(net[ID])):
            ID_write[i] = net[ID][i]
            ID_rate_write[i] = net_rate[ID][i]
        # The type needs to be 'I' because Linux handles 'L' differently than Windows.
        s_write = struct.pack('I' * len(ID_write), *ID_write)
        s_rate_write = struct.pack('d' * len(ID_rate_write), *ID_rate_write)
        fout.write(s_write)
        fout.write(s_rate_write)

    fopen.close()
    fout.close()

    # -------------------------------------------------------------------
    # Create NodeOffsets string
    # This contains the location of each From Node's data in the bin file
    # -------------------------------------------------------------------
    offset_str = ""
    nodecount = 0

    for ID in net:
        offset_str += '%0.8X' % ID
        offset_str += '%0.8X' % (nodecount * max_destinations_per_node * 12)  # 12 -> sizeof(uint32_t) + sizeof(double)
        nodecount += 1

    # -------------------
    # Write Metadata file
    # -------------------
    migjson = collections.OrderedDict([])
    migjson['Metadata'] = {}

    if os.name == "nt":
        migjson['Metadata']['Author'] = os.environ['USERNAME']
    else:
        migjson['Metadata']['Author'] = os.environ['USER']

    migjson['Metadata']['NodeCount'] = len(node_id_list)
    migjson['Metadata']['IdReference'] = id_ref
    migjson['Metadata']['DateCreated'] = datetime.datetime.now().ctime()
    migjson['Metadata']['Tool'] = os.path.basename(sys.argv[0])
    migjson['Metadata']['DatavalueCount'] = max_destinations_per_node
    migjson['Metadata']['MigrationType'] = mig_type
    migjson['NodeOffsets'] = offset_str

    with open(outfilename + ".json", 'w') as file:
        json.dump(migjson, file, indent=4)