Source code for idmtools_platform_comps.utils.lookups
"""idmtools comps lookups.Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved."""fromdatetimeimportdatetime,timedeltafromloggingimportgetLoggerfromtypingimportListimportbackofffromCOMPS.DataimportExperiment,Simulation,QueryCriteriafromrequestsimportTimeout,HTTPErrorfromidmtools_platform_comps.utils.generalimportfatal_codelogger=getLogger(__name__)
[docs]@backoff.on_exception(backoff.constant(1.5),(Timeout,ConnectionError,HTTPError),max_tries=5,giveup=fatal_code)defget_experiment_by_id(exp_id,query_criteria:QueryCriteria=None)->Experiment:"""Get an experiment by id."""returnExperiment.get(exp_id,query_criteria=query_criteria)
[docs]@backoff.on_exception(backoff.constant(1.5),(Timeout,ConnectionError,HTTPError),max_tries=5,giveup=fatal_code)defget_simulation_by_id(sim_id,query_criteria:QueryCriteria=None)->Simulation:""" Fetches simulation by id and optional query criteria. Wrapped in additional Retry Logic. Used by other lookup methods Args: sim_id: query_criteria: Optional QueryCriteria to search with Returns: Simulation with ID """returnSimulation.get(id=sim_id,query_criteria=query_criteria)
[docs]defget_all_experiments_for_user(user:str)->List[Experiment]:""" Returns all the experiments for a specific user. Args: user: username to locate Returns: Experiments for a user """# COMPS limits the retrieval to 1000 so to make sure we get all experiments for a given user, we need to be clever# Also COMPS does not have an order_by so we have to go through all date rangesinterval=365results={}end_date=start_date=datetime.today()limit_date=datetime.strptime("2014-03-31",'%Y-%m-%d')# Oldest simulation in COMPSwhilestart_date>limit_date:start_date=end_date-timedelta(days=interval)batch=Experiment.get(query_criteria=QueryCriteria().where(["owner={}".format(user),"date_created<={}".format(end_date.strftime('%Y-%m-%d')),"date_created>={}".format(start_date.strftime('%Y-%m-%d'))]))iflen(batch)==1000:# We hit a limit, reduce the interval and run againinterval=interval/2continueiflen(batch)==0:interval*=2else:# Add the experiments to the dictforeinbatch:results[e.id]=e# Go from thereend_date=start_datereturnlist(results.values())
[docs]defget_simulations_from_big_experiments(experiment_id):""" Get simulation for large experiment. This allows us to pull simulations in chunks. Args: experiment_id: Experiment id to load Returns: List of simulations """e=get_experiment_by_id(experiment_id)start_date=end_date=e.date_createdimportpytzlimit_date=datetime.today().replace(tzinfo=pytz.utc)interval=60stop_flag=Falseresults={}whilestart_date<limit_date:start_date=end_date+timedelta(minutes=interval)try:batch=Simulation.get(query_criteria=QueryCriteria().select(['id','state','date_created']).select_children('tags').where(["experiment_id={}".format(experiment_id),"date_created>={}".format(end_date.strftime('%Y-%m-%d %T')),"date_created<={}".format(start_date.strftime('%Y-%m-%d %T'))]))exceptExceptionase:logger.exception(e)interval/=2continueifnotbatch:ifstop_flag:breakelse:interval=120stop_flag=Trueelse:stop_flag=Falseforsinbatch:results[s.id]=send_date=start_datereturnresults.values()