Source code for idmtools.analysis.csv_analyzer

"""idmtools CSVAnalyzer.

Example of a csv analyzer to concatenate csv results into one csv from your experiment's simulations.

Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
import os
from typing import Dict
import pandas as pd
from idmtools.entities import IAnalyzer
from idmtools.entities.ianalyzer import ANALYSIS_ITEM_MAP_DATA_TYPE, ANALYZABLE_ITEM

[docs]class CSVAnalyzer(IAnalyzer): """ Provides an analyzer for CSV output. Examples: .. _simple-csv-example: Simple Example This example covers the basic usage of the CSVAnalyzer .. literalinclude:: ../../examples/analyzers/ .. _multiple-csvs: Multiple CSVs This example covers analyzing multiple CSVs .. literalinclude:: ../../examples/analyzers/ """ # Arg option for analyzer init are uid, working_dir, parse (True to leverage the :class:`OutputParser`; # False to get the raw data in the :meth:`select_simulation_data`), and filenames # In this case, we want parse=True, and the filename(s) to analyze
[docs] def __init__(self, filenames, output_path="output_csv"): """ Initialize our analyzer. Args: filenames: Filenames we want to pull output_path: Output path to write the csv """ super().__init__(parse=True, filenames=filenames) # Raise exception early if files are not csv files if not all(['csv' in os.path.splitext(f)[1].lower() for f in self.filenames]): raise Exception('Please ensure all filenames provided to CSVAnalyzer have a csv extension.') self.output_path = output_path
[docs] def initialize(self): """ Initialize on run. Create an output directory. Returns: None """ self.output_path = os.path.join(self.working_dir, self.output_path) # Create the output path if not os.path.exists(self.output_path): os.makedirs(self.output_path)
# Map is called to get for each simulation a data object (all the metadata of the simulations) and simulation object
[docs] def map(self, data: ANALYSIS_ITEM_MAP_DATA_TYPE, simulation: ANALYZABLE_ITEM) -> pd.DataFrame: """ Map each simulation/workitem data here. The data is a mapping of files -> content(in this case, dataframes since it is csvs parsed). Args: data: Data mapping of files -> content simulation: Simulation/Workitem we are mapping Returns: Items joined together into a dataframe. """ # If there are 1 to many csv files, concatenate csv data columns into one dataframe concatenated_df = pd.concat(list(data.values()), axis=0, ignore_index=True, sort=True) return concatenated_df
# In reduce, we are printing the simulation and result data filtered in map
[docs] def reduce(self, all_data: Dict[ANALYZABLE_ITEM, pd.DataFrame]): """ Reduce(combine) all the data from our mapping. Args: all_data: Mapping of our data in form Item(Simulation/Workitem) -> Mapped dataframe Returns: None """ results = pd.concat(list(all_data.values()), axis=0, # Combine a list of all the sims csv data column values keys=[str(k.uid) for k in all_data.keys()], # Add a hierarchical index with the keys option names=['SimId']) # Label the index keys you create with the names option results.index = results.index.droplevel(1) # Remove default index # Make a directory labeled the exp id to write the csv results to first_sim = list(all_data.keys())[0] # get first Simulation exp_id = # Set the exp id from the first sim data output_folder = os.path.join(self.output_path, exp_id) os.makedirs(output_folder, exist_ok=True) results.to_csv(os.path.join(output_folder, self.__class__.__name__ + '.csv'))