Source code for emodpy.analyzers.timeseries_analyzer

import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from idmtools.analysis.analyze_manager import AnalyzeManager
from idmtools.core import ItemType
from idmtools.core.platform_factory import Platform
from idmtools.entities import IAnalyzer


[docs]class TimeseriesAnalyzer(IAnalyzer):
    data_group_names = ['group', 'sim_id', 'channel']
    ordered_levels = ['channel', 'group', 'sim_id']
    output_file = 'timeseries.csv'

    def __init__(self, filenames=[os.path.join('output', 'InsetChart.json')], channels=('Statistical Population',
                                                                                        'Infectious Population',
                                                                                        'Infected',
                                                                                        'Waning Population'),
                 save_output=True):

        super(TimeseriesAnalyzer, self).__init__(filenames=filenames)
        self.channels = set(channels)
        self.save_output = save_output

[docs]    def initialize(self):
        if not os.path.exists(os.path.join(self.working_dir, "output")):
            os.mkdir(os.path.join(self.working_dir, "output"))

[docs]    def default_select_fn(self, ts):
        return pd.Series(ts)

[docs]    def default_group_fn(self, k, v):
        return k

[docs]    def default_plot_fn(self, df, ax):
        grouped = df.groupby(level=['group'], axis=1)
        m = grouped.mean()
        m.plot(ax=ax, legend=False)

[docs]    def default_filter_fn(self, md):
        return True

[docs]    def filter(self, simulation):
        return self.default_filter_fn(simulation.tags)

[docs]    def get_channel_data(self, data_by_channel, selected_channels):
        channel_series = [self.default_select_fn(data_by_channel[channel]["Data"]) for channel in selected_channels]
        return pd.concat(channel_series, axis=1, keys=selected_channels)

[docs]    def map(self, data, simulation):
        cdata = data[self.filenames[0]]['Channels']
        selected_channels = self.channels.intersection(cdata.keys()) if self.channels else cdata.keys()
        return self.get_channel_data(cdata, selected_channels)

[docs]    def plot_by_channel(self, channels, plot_fn):

        import matplotlib.pyplot as plt

        ncol = int(1 + len(channels) / 4)
        nrow = int(np.ceil(float(len(channels)) / ncol))

        fig, axs = plt.subplots(figsize=(max(6, min(8, 4 * ncol)), min(6, 3 * nrow)), nrows=nrow, ncols=ncol,
                                sharex=True)

        flat_axes = [axs] if ncol * nrow == 1 else axs.flat
        for (channel, ax) in zip(channels, flat_axes):
            ax.set_title(channel)
            plot_fn(channel, ax)

[docs]    def reduce(self, all_data):
        output_dir = os.path.join(self.working_dir, "output")
        selected = []
        for sim, data in all_data.items():
            # Enrich the data with info
            data.group = self.default_group_fn(sim.uid, sim.tags)
            data.sim_id = sim.uid
            selected.append(data)

        if len(selected) == 0:
            print("\n No data have been returned... Exiting...")
            return

        # Combining selected data...
        combined = pd.concat(selected, axis=1,
                             keys=[(d.group, d.sim_id) for d in selected],
                             names=self.data_group_names)

        # Re-ordering multi-index levels...
        data = combined.reorder_levels(self.ordered_levels, axis=1).sort_index(axis=1)

        if self.save_output:
            data.to_csv(os.path.join(output_dir, self.output_file))

        def plot_fn(channel, ax):
            self.default_plot_fn(data[channel].dropna(), ax)

        channels = data.columns.levels[0]
        self.plot_by_channel(channels, plot_fn)

        plt.legend()
        # plt.show()
        plt.savefig(os.path.join(output_dir, 'timeseries.png'))


if __name__ == "__main__":
    platform = Platform('COMPS2')

    exp_id = '8a7ff62a-fe7f-ea11-a2bf-f0921c167862'  # comps2 exp_id

    filenames = ['output/InsetChart.json']
    analyzers = [TimeseriesAnalyzer(filenames=filenames)]

    manager = AnalyzeManager(platform=platform, ids=[(exp_id, ItemType.EXPERIMENT)], analyzers=analyzers)
    manager.analyze()