Skip to content

plot_icj_means

collect(exp_id, chan='Infected', tag=None, smoothing=True)

Collect all the time series data for a given channel for a given experiment from InsetChart.json files in local subdirectory that have been downoaded from COMPS, assuming following structure.

1
2
3
exp_id/
   sim_id/
       InsetChart.json

Parameters:

Name Type Description Default
exp_id str

Experiment Id that has had data downloaded to current working diretory.

required
chan str

Channel name

'Infected'
tag str

key=value. Using results.db (sqlite3, from emodpy), limit results to just where key=value. If value is set to SWEEP, find all values for key and plot all values separately (but with mean/spread from other tags).

None

Returns:

Type Description
dict

Array of channel data for further processing.

Source code in emod_api/channelreports/plot_icj_means.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def collect(exp_id: str,
            chan: str = "Infected",
            tag: str = None,
            smoothing: bool = True) -> dict:
    """
    Collect all the time series data for a given channel for a given experiment from InsetChart.json
    files in local subdirectory that have been downoaded from COMPS, assuming following structure.

        exp_id/
           sim_id/
               InsetChart.json

    Args:
        exp_id: Experiment Id that has had data downloaded to current working diretory.
        chan:   Channel name
        tag:    key=value. Using results.db (sqlite3, from emodpy), limit results to just where key=value.
                If value is set to SWEEP, find all values for key and plot all values separately (but with mean/spread from other tags).

    Returns:
        Array of channel data for further processing.
    """

    chan_data = {}
    groupby_values = {}
    if tag:
        if len(tag.split("=")) == 1:
            raise ValueError("When passing tag, has to have key=value format.")

        groupby_key = tag.split("=")[0]
        groupby_value = tag.split("=")[1]
        db = os.path.join("latest_experiment", "results.db")
        con = sqlite3.connect(db)
        cur = con.cursor()
        if groupby_value == "SWEEP":
            query = f"SELECT sim_id, {groupby_key} FROM results"
            all_results = cur.execute(query)
            for result in all_results:
                sim_id = result[0]
                groupby_value = result[1]
                if groupby_value not in groupby_values:
                    groupby_values[groupby_value] = list()
                groupby_values[groupby_value].append(sim_id)
        else: # select only sim_id's where gb key == value
            query = f"SELECT sim_id FROM results where {groupby_key} = {groupby_value}"
            all_results = cur.execute(query)
            groupby_values["ref"] = list()
            for result in all_results:
                sim_id = result[0]
                groupby_values["ref"].append(sim_id)
    else:
        groupby_values["ref"] = os.listdir(exp_id)
        groupby_values["ref"].remove("results.db")

    def moving_average(x, w=7):
        return np.convolve(x, np.ones(w), 'valid') / w

    max_len = 0
    # poi = param of interest
    for value in groupby_values:
        simdirs = groupby_values[value]
        for sim in simdirs:
            thedir = os.path.join(exp_id, sim)

            if value not in chan_data:
                chan_data[value] = []
            if not os.path.exists(thedir + "/InsetChart.json"):
                continue
            with open(thedir + "/InsetChart.json") as fp:
                icj = json.loads(fp.read())
            if chan not in icj["Channels"]:
                raise ValueError(f"Can't find channel {chan} in file. Did find {icj['Channels'].keys()}.")
            new_data = np.asarray(icj["Channels"][chan]["Data"])
            if smoothing:
                new_data = moving_average(new_data)
            chan_data[value].append(new_data)
            if len(new_data) > max_len:
                max_len = len(new_data)
    if max_len == 0:
        raise ValueError(f"No InsetChart.json files with channel data for {chan} and experiment {exp_id}.")
    """
    If users run simulations that end when prevalence is zero, the length of the time series can vary
    We need to get them all the same to calc the mean.
    """
    data_for_plotting = {}
    for poi in chan_data:
        data_for_plotting[poi] = []
        for data in chan_data[poi]:
            if len(data) < max_len:
                data = np.pad(data, (0, max_len - len(data)))
            data_for_plotting[poi].append(data)

    return data_for_plotting

display(chan_data, save=False, chan_name='Infected', exp_id=None)

Plot mean and std dev of the array/list of time series-es in chan_data.

Source code in emod_api/channelreports/plot_icj_means.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def display(chan_data, save=False, chan_name="Infected", exp_id=None):
    """
    Plot mean and std dev of the array/list of time series-es in chan_data.
    """
    mean_chan_data = None
    spread_chan_data = None
    fig, ax = plt.subplots(1)
    if exp_id:
        plt.title(exp_id, loc="center")
    for poi_chan_data in sorted(chan_data):
        prev_list = chan_data[poi_chan_data]
        if len(prev_list) == 0:
            raise ValueError("Input channel data array seems to have no data.")
        mean_chan_data = np.mean(np.array(prev_list), axis=0)
        if len(chan_data) == 1 and save:
            ref_json = {"Channels": {"Channel": {"Data": []}}}
            ref_json["Channels"]["Channel"]["Data"] = list(mean_chan_data)
            with open("mean_ref.json", "w") as fp:
                json.dump(ref_json, fp, indent=4)
        spread_chan_data = np.std(np.array(prev_list), axis=0)

        t = np.arange(len(mean_chan_data))
        ax.plot(t, mean_chan_data, label=poi_chan_data)
        plt.xlim(0, len(mean_chan_data))
        ax.fill_between(t, mean_chan_data + spread_chan_data, mean_chan_data - spread_chan_data, facecolor='yellow', alpha=0.5)
    ax.set_xlabel("Simulation Time")
    ax.set_ylabel(chan_name)
    plt.legend()
    plt.show()