Source code for emodpy_malaria.migration.convert_txt_to_bin
# convert_txt_to_bin.py# -----------------------------------------------------------------------------# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file.# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file.# It also creates the required metadata file.## The CSV file has three columns# From_Node_ID, To_Node_ID, Rate (Average # of Trips Per Day)# where the node IDs are the external IDs found in the demographics file.# Each node ID in the migration file must exist in the demographics file.# One can have node ID's in the demographics that don't exist in the migration file.## The CSV file does not have to have the same number of entries for each From_Node.# The script will find the From_Node that has the most and use that for the # DestinationsPerNode. The binary file will have DestinationsPerNode entries# per node.# -----------------------------------------------------------------------------importcollectionsimportdatetimeimportjsonimportosimportstructimportsysfromenumimportEnum
if__name__=="__main__":iflen(sys.argv)!=5:show_usage()exit(0)filename=sys.argv[1]outfilename=sys.argv[2]mig_type=sys.argv[3]id_ref=sys.argv[4]ifmig_typenotinMigrationTypes:print(f"Invalid MigrationType = {mig_type}, valid MigrationTypes are: "f"{MigrationTypes.LOCAL_MIGRATION}, {MigrationTypes.REGIONAL_MIGRATION},"f"{MigrationTypes.SEA_MIGRATION}, {MigrationTypes.AIR_MIGRATION}.")exit(-1)max_destinations_per_node=0destinations_per_node=0fopen=open(filename)fout=open(outfilename,'wb')net={}net_rate={}# ----------------------------# collect data from CSV file# ----------------------------node_id_list=[]prev_id=-1forlineinfopen:s=line.strip().split(',')ID1=int(float(s[0]))ID2=int(float(s[1]))rate=float(s[2])ifID1notinnet:net[ID1]=[]net_rate[ID1]=[]net[ID1].append(ID2)net_rate[ID1].append(rate)ifprev_id!=ID1:ifdestinations_per_node>max_destinations_per_node:max_destinations_per_node=destinations_per_nodenode_id_list.append(ID1)print(prev_id,max_destinations_per_node)prev_id=ID1destinations_per_node=0destinations_per_node+=1# ---------------# Write bin file# ---------------forIDinnet:ID_write=[]ID_rate_write=[]foriinrange(max_destinations_per_node):ID_write.append(0)ID_rate_write.append(0)foriinrange(len(net[ID])):ID_write[i]=net[ID][i]ID_rate_write[i]=net_rate[ID][i]# The type needs to be 'I' because Linux handles 'L' differently than Windows.s_write=struct.pack('I'*len(ID_write),*ID_write)s_rate_write=struct.pack('d'*len(ID_rate_write),*ID_rate_write)fout.write(s_write)fout.write(s_rate_write)fopen.close()fout.close()# -------------------------------------------------------------------# Create NodeOffsets string# This contains the location of each From Node's data in the bin file# -------------------------------------------------------------------offset_str=""nodecount=0forIDinnet:offset_str+='%0.8X'%IDoffset_str+='%0.8X'%(nodecount*max_destinations_per_node*12)# 12 -> sizeof(uint32_t) + sizeof(double)nodecount+=1# -------------------# Write Metadata file# -------------------migjson=collections.OrderedDict([])migjson['Metadata']={}ifos.name=="nt":migjson['Metadata']['Author']=os.environ['USERNAME']else:migjson['Metadata']['Author']=os.environ['USER']migjson['Metadata']['NodeCount']=len(node_id_list)migjson['Metadata']['IdReference']=id_refmigjson['Metadata']['DateCreated']=datetime.datetime.now().ctime()migjson['Metadata']['Tool']=os.path.basename(sys.argv[0])migjson['Metadata']['DatavalueCount']=max_destinations_per_nodemigjson['Metadata']['MigrationType']=mig_typemigjson['NodeOffsets']=offset_strwithopen(outfilename+".json",'w')asfile:json.dump(migjson,file,indent=4)