Source code for idmtools_platform_comps.utils.lookups

"""idmtools comps lookups.

Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
"""
from datetime import datetime, timedelta
from logging import getLogger
from typing import List
import backoff
from COMPS.Data import Experiment, Simulation, QueryCriteria
from requests import Timeout, HTTPError
from idmtools_platform_comps.utils.general import fatal_code

logger = getLogger(__name__)


[docs]@backoff.on_exception(backoff.constant(1.5), (Timeout, ConnectionError, HTTPError), max_tries=5, giveup=fatal_code) def get_experiment_by_id(exp_id, query_criteria: QueryCriteria = None) -> Experiment: """Get an experiment by id.""" return Experiment.get(exp_id, query_criteria=query_criteria)
[docs]@backoff.on_exception(backoff.constant(1.5), (Timeout, ConnectionError, HTTPError), max_tries=5, giveup=fatal_code) def get_simulation_by_id(sim_id, query_criteria: QueryCriteria = None) -> Simulation: """ Fetches simulation by id and optional query criteria. Wrapped in additional Retry Logic. Used by other lookup methods Args: sim_id: query_criteria: Optional QueryCriteria to search with Returns: Simulation with ID """ return Simulation.get(id=sim_id, query_criteria=query_criteria)
[docs]def get_all_experiments_for_user(user: str) -> List[Experiment]: """ Returns all the experiments for a specific user. Args: user: username to locate Returns: Experiments for a user """ # COMPS limits the retrieval to 1000 so to make sure we get all experiments for a given user, we need to be clever # Also COMPS does not have an order_by so we have to go through all date ranges interval = 365 results = {} end_date = start_date = datetime.today() limit_date = datetime.strptime("2014-03-31", '%Y-%m-%d') # Oldest simulation in COMPS while start_date > limit_date: start_date = end_date - timedelta(days=interval) batch = Experiment.get(query_criteria=QueryCriteria().where(["owner={}".format(user), "date_created<={}".format( end_date.strftime('%Y-%m-%d')), "date_created>={}".format( start_date.strftime('%Y-%m-%d'))])) if len(batch) == 1000: # We hit a limit, reduce the interval and run again interval = interval / 2 continue if len(batch) == 0: interval *= 2 else: # Add the experiments to the dict for e in batch: results[e.id] = e # Go from there end_date = start_date return list(results.values())
[docs]def get_simulations_from_big_experiments(experiment_id): """ Get simulation for large experiment. This allows us to pull simulations in chunks. Args: experiment_id: Experiment id to load Returns: List of simulations """ e = get_experiment_by_id(experiment_id) start_date = end_date = e.date_created import pytz limit_date = datetime.today().replace(tzinfo=pytz.utc) interval = 60 stop_flag = False results = {} while start_date < limit_date: start_date = end_date + timedelta(minutes=interval) try: batch = Simulation.get(query_criteria=QueryCriteria() .select(['id', 'state', 'date_created']).select_children('tags') .where(["experiment_id={}".format(experiment_id), "date_created>={}".format(end_date.strftime('%Y-%m-%d %T')), "date_created<={}".format(start_date.strftime('%Y-%m-%d %T'))]) ) except Exception as e: logger.exception(e) interval /= 2 continue if not batch: if stop_flag: break else: interval = 120 stop_flag = True else: stop_flag = False for s in batch: results[s.id] = s end_date = start_date return results.values()