Source code for idmtools_calibra.utilities.parsers.malaria_summary

import itertools
from collections import OrderedDict
import logging

import pandas as pd
import numpy as np

logger = logging.getLogger(__name__)


[docs]def summary_channel_to_pandas(data, channel): """ A function to return a hierarchical binned pandas.Series for a specified MalariaSummaryReport.json channel :param data: parsed data from summary report :param channel: channel in summary report :return: pd.Series with MultiIndex binning taken from summary metadata """ grouping = get_grouping_for_summary_channel(data, channel) bins = get_bins_for_summary_grouping(data, grouping) channel_series = json_to_pandas(data[grouping][channel], bins, channel) # Append some other useful metadata to the output Series metadata = data['Metadata'] channel_series.Start_Day = metadata.get('Start_Day') channel_series.Reporting_Interval = metadata.get('Reporting_Interval') return channel_series
[docs]def json_to_pandas(channel_data, bins, channel=None): """ A function to convert nested array channel data from a json file to a pandas.Series with the specified MultiIndex binning. """ logger.debug("Converting JSON data from '%s' channel to pandas.Series with %s MultiIndex.", channel, bins.keys()) bin_tuples = list(itertools.product(*bins.values())) multi_index = pd.MultiIndex.from_tuples(bin_tuples, names=bins.keys()) channel_series = pd.Series(np.array(channel_data).flatten(), index=multi_index, name=channel) logger.debug('\n%s', channel_series) return channel_series
[docs]def get_grouping_for_summary_channel(data, channel): """ A function to find the grouping to which a channel belongs in MalariaSummaryReport.json :param data: parsed data from summary report :param channel: channel to find :return: grouping or exception if not found Example: >>> get_grouping_for_summary_channel(data, channel='Average Population by Age Bin') 'DataByTimeAndAgeBins' """ for group, group_data in data.items(): if channel in group_data.keys(): return group raise Exception('Unable to find channel %s in groupings %s' % (channel, data.keys()))
[docs]def get_bins_for_summary_grouping(data, grouping): """ A function to get the dimensions and binning of data for a specified MalariaSummaryReport.json grouping :param data: parsed data from summary report :param grouping: group name :return: an OrderedDict of dimensions and bins Example: >>> get_bins_for_summary_grouping(data, grouping='DataByTimeAndAgeBins') OrderedDict([('Time', [31, 61, 92, ..., 1095]), ('Age Bin', [0, 10, 20, ..., 1000])]) """ metadata = data['Metadata'] time = data['DataByTime']['Time Of Report'] if grouping == 'DataByTime': return OrderedDict([ ('Time', time) ]) elif grouping == 'DataByTimeAndAgeBins': return OrderedDict([ ('Time', time), ('Age Bin', metadata['Age Bins']) ]) elif grouping == 'DataByTimeAndPfPRBinsAndAgeBins': return OrderedDict([ ('Time', time), ('PfPR Bin', metadata['Parasitemia Bins']), ('Age Bin', metadata['Age Bins']) ]) raise Exception('Unable to find grouping %s in %s' % (grouping, data.keys()))