Source code for idmtools_calibra.resamplers.base_resampler

from abc import ABCMeta, abstractmethod
from itertools import zip_longest
import os
from idmtools.analysis.analyze_manager import AnalyzeManager
from idmtools.core import ItemType
from idmtools.entities.experiment import Experiment
from idmtools.entities.templated_simulation import TemplatedSimulations
from idmtools_calibra.calib_manager import CalibManager
from idmtools_calibra.resamplers.calibration_point import CalibrationPoint, CalibrationParameter


[docs]class BaseResampler(metaclass=ABCMeta): def __init__(self, calib_manager=None): self.calib_manager: CalibManager = calib_manager self.output_location = None # must be set via setter below self.selection_columns = [] # items to strip off resampled points DataFrame and pass to next resampler self.selection_values = None # a DataFrame, created using self.selection_columns, added to the resampled points for the next resampler to use # strictly required to be defined in subclasses
[docs] @abstractmethod def resample(self, calibrated_points, selection_values, initial_calibration_points): pass
# extend if desired in subclasses
[docs] def post_analysis(self, resampled_points, analyzer_results, from_resample=None): os.makedirs(self.output_location, exist_ok=True)
[docs] def set_calibration_manager(self, calib_manager: CalibManager): self.calib_manager = calib_manager self.output_location = os.path.join(calib_manager.name, 'resampling_output')
def _run(self, points, resample_step) -> Experiment: """ This run method is for running simulations, which is the in-common part of resampling. :param points: The points to run simulations at. :return: The Experiment object for these simulations """ # create a sweep where each point is a separate sim if not self.calib_manager: raise Exception('calibration manager has not set for resampler. Cannot generate simulations.') point_dicts = [point.to_value_dict() for point in points] # ck4, the number of replicates must be 1 for HIV for now; the general solution should allow a user-selected # replicate count, so long as their likelihood analyzer can handle > 1 replicates. exp_builder = self.calib_manager.experiment_builder_function(point_dicts, n_replicates=1) # Create an experiment manager exp_name = self.calib_manager.name + '_resample_step_%d' % resample_step experiment = Experiment.from_template(TemplatedSimulations(builders={exp_builder}), name=exp_name) experiment.run(platform=self.calib_manager.platform) return experiment def _analyze(self, experiment: Experiment, analyzers, points_ran): """ This method is the in-common route for Resamplers to analyze simulations for liklihood. Args: experiment: the experiment to analyze, should be from self._run() analyzers: points_ran: Points objects that were just _run() Returns: The supplied points_ran with their .likelihood attribute set, AND the direct results of the analyzer as a list. """ am = AnalyzeManager(analyzers=analyzers, ids=[(experiment.id, ItemType.EXPERIMENT)]) am.analyze() # compute a single likelihood value from all of the analyzers on a per-simulation basis result_tuples = zip_longest(*[analyzer.results for analyzer in am.analyzers]) try: results = [sum(tup) for tup in result_tuples] except TypeError as e: # if 1+ None values snuck in... raise type(e)('All analyzers must contain one result per simulation. The result list lengths do not match.') for i in range(len(results)): # Add the likelihood points_ran[i].likelihood = results[i] # verify that the returned points all have a likelihood attribute set likelihoods_are_missing = True in {point.likelihood is None for point in points_ran} if likelihoods_are_missing: raise Exception('At least one Point object returned by the provided analyzer does not have ' 'its .likelihood attribute set.') return points_ran, results
[docs] def resample_and_run(self, calibrated_points, resample_step, selection_values, initial_calibration_points): """ Canonical entry method for using the resampler. Args: calibrated_points: Calibrated Points resample_step: selection_values: initial_calibration_points: Returns: """ # 1. resample # The user-provided _resample() method in Resampler subclasses must set the 'Value' in each Point object dict # for keying off of in the _run() method above. # Any _resample() methodology that depends on the likelihood of the provided points should reference # the 'likelihood' attribute on the Point objects (e.g., use mypoint.likelihood, set it in the analyer # return points. points_to_run, for_post_analysis = self.resample(calibrated_points=calibrated_points, selection_values=selection_values, initial_calibration_points=initial_calibration_points) # # 2. run simulations experiment = self._run(points=points_to_run, resample_step=resample_step) experiment.wait() # 3. analyze simulations for likelihood self.resampled_points, self.analyzer_results = self._analyze(experiment=experiment, analyzers=self.calib_manager.analyzer_list, points_ran=points_to_run) # 4. perform any post-analysis processing, if defined self.post_analysis(self.resampled_points, self.analyzer_results, from_resample=for_post_analysis) return self.resampled_points, self.selection_values
def _transform_df_points_to_calibrated_points(self, calibrated_point, df_points): # build calibration points from dataframe, preserving CalibrationParameter metadata from calibrated_point calibrated_points = [] for index, row in df_points.iterrows(): parameters = [] for name in calibrated_point.parameter_names: new_parameter = CalibrationParameter.from_calibration_parameter(calibrated_point.get_parameter(name), value=row[name]) parameters.append(new_parameter) calibrated_points.append(CalibrationPoint(parameters)) self.selection_values = df_points[self.selection_columns].copy() return calibrated_points