Source code for idmtools_platform_comps.utils.lookups
"""idmtools comps lookups.
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
"""
from datetime import datetime, timedelta
from logging import getLogger
from typing import List
import backoff
from COMPS.Data import Experiment, Simulation, QueryCriteria
from requests import Timeout, HTTPError
from idmtools_platform_comps.utils.general import fatal_code
logger = getLogger(__name__)
[docs]@backoff.on_exception(backoff.constant(1.5), (Timeout, ConnectionError, HTTPError), max_tries=5, giveup=fatal_code)
def get_experiment_by_id(exp_id, query_criteria: QueryCriteria = None) -> Experiment:
"""Get an experiment by id."""
return Experiment.get(exp_id, query_criteria=query_criteria)
[docs]@backoff.on_exception(backoff.constant(1.5), (Timeout, ConnectionError, HTTPError), max_tries=5, giveup=fatal_code)
def get_simulation_by_id(sim_id, query_criteria: QueryCriteria = None) -> Simulation:
"""
Fetches simulation by id and optional query criteria.
Wrapped in additional Retry Logic. Used by other lookup methods
Args:
sim_id:
query_criteria: Optional QueryCriteria to search with
Returns:
Simulation with ID
"""
return Simulation.get(id=sim_id, query_criteria=query_criteria)
[docs]def get_all_experiments_for_user(user: str) -> List[Experiment]:
"""
Returns all the experiments for a specific user.
Args:
user: username to locate
Returns:
Experiments for a user
"""
# COMPS limits the retrieval to 1000 so to make sure we get all experiments for a given user, we need to be clever
# Also COMPS does not have an order_by so we have to go through all date ranges
interval = 365
results = {}
end_date = start_date = datetime.today()
limit_date = datetime.strptime("2014-03-31", '%Y-%m-%d') # Oldest simulation in COMPS
while start_date > limit_date:
start_date = end_date - timedelta(days=interval)
batch = Experiment.get(query_criteria=QueryCriteria().where(["owner={}".format(user),
"date_created<={}".format(
end_date.strftime('%Y-%m-%d')),
"date_created>={}".format(
start_date.strftime('%Y-%m-%d'))]))
if len(batch) == 1000:
# We hit a limit, reduce the interval and run again
interval = interval / 2
continue
if len(batch) == 0:
interval *= 2
else:
# Add the experiments to the dict
for e in batch:
results[e.id] = e
# Go from there
end_date = start_date
return list(results.values())
[docs]def get_simulations_from_big_experiments(experiment_id):
"""
Get simulation for large experiment. This allows us to pull simulations in chunks.
Args:
experiment_id: Experiment id to load
Returns:
List of simulations
"""
e = get_experiment_by_id(experiment_id)
start_date = end_date = e.date_created
import pytz
limit_date = datetime.today().replace(tzinfo=pytz.utc)
interval = 60
stop_flag = False
results = {}
while start_date < limit_date:
start_date = end_date + timedelta(minutes=interval)
try:
batch = Simulation.get(query_criteria=QueryCriteria()
.select(['id', 'state', 'date_created']).select_children('tags')
.where(["experiment_id={}".format(experiment_id),
"date_created>={}".format(end_date.strftime('%Y-%m-%d %T')),
"date_created<={}".format(start_date.strftime('%Y-%m-%d %T'))])
)
except Exception as e:
logger.exception(e)
interval /= 2
continue
if not batch:
if stop_flag:
break
else:
interval = 120
stop_flag = True
else:
stop_flag = False
for s in batch:
results[s.id] = s
end_date = start_date
return results.values()