"""
Read in data distributions.
"""
import os
import json
import numpy as np
import pandas as pd
import sciris as sc
from collections import Counter
from . import base as spb
from . import config as cfg
from . import defaults
from . import logger
from . import data
[docs]def get_relative_path(datadir):
"""
Get the path relative for the datadir.
Args:
datadir (str): path to a specified data directory
Returns:
str: A path relative to a specified data directory datadir
"""
base_dir = datadir
if len(defaults.settings.relative_path) > 1:
base_dir = os.path.join(datadir, *defaults.settings.relative_path)
return base_dir
[docs]def get_nbrackets():
"""Return the default number of age brackets."""
return defaults.settings.nbrackets
[docs]def calculate_which_nbrackets_to_use(location_data, nbrackets=None):
"""
Calculate the number of age brackets to use by default.
Args:
nbrackets (int): the number of age brackets to use
Returns:
int: The number of age brackets to use.
"""
if nbrackets is None:
nbrackets = [d.num_bins for d in location_data.population_age_distributions if d.num_bins is not None]
if len(nbrackets):
nbrackets = max(nbrackets)
else:
nbrackets = defaults.settings.nbrackets
return nbrackets
[docs]def sanitize_location(location):
"""
Process and return a valid name for a location.
Args:
location (str): name of the location
Returns:
str: A processed location name.
"""
if location is None:
return ""
else:
# No spaces in filenames.
location = location.replace(" ", "_")
# Our convention is to separate location segments with "-".
location = location.replace("-", "_")
return location
[docs]def calculate_location_filename(location, state_location, country_location):
"""
Process a location filename.
Args:
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
Returns:
str: A filename for where the location data reside.
"""
separator = "-"
if location != "":
filepath = separator.join([country_location, state_location, location])
elif state_location != "":
filepath = separator.join([country_location, state_location])
else:
filepath = country_location
return filepath
[docs]def calculate_location_filepath(location, state_location, country_location):
"""
Process a location filepath.
Args:
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
Returns:
str: A filename for where the location data reside.
"""
logger.debug(f"Calculating filepath for (location, state_location, country_location) = "
f"({location}, {state_location}, {country_location})")
location = sanitize_location(location)
state_location = sanitize_location(state_location)
country_location = sanitize_location(country_location)
filename = calculate_location_filename(location, state_location, country_location)
filename = f"{filename}.json"
filepath = filename
logger.debug(f"Filepath = {filepath}")
return filepath
[docs]def load_location(specific_location, state_location, country_location, revert_to_default=None):
"""
Loading json object for the location data.
Args:
specific_location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
revert_to_default (bool) : If True, try to first find location specific data to return otherwise use default data specified by the default location
Returns:
str: A filename for where the location data reside.
"""
if revert_to_default is None:
revert_to_default = False
location_filepath = calculate_location_filepath(specific_location, state_location, country_location)
try:
location_object = data.load_location_from_filepath(location_filepath)
logger.debug(f"Loaded (location, state_location, country_location) = "
f"({specific_location}, {state_location}, {country_location}) "
f"from [{location_filepath}]")
return location_object
except:
logger.warn(f"Failed to load location [{specific_location}], "
f"state_location [{state_location}], "
f"country_location [{country_location}], reverting to default.")
if revert_to_default:
return load_location(defaults.settings.location, defaults.settings.state_location, defaults.settings.country_location, revert_to_default=False)
else:
msg = f"Data unavailable for " \
f"(location, state_location, country_location) = " \
f"({specific_location}, {state_location}, {country_location}). " \
f"Please check input strings, or set use_default to True to use the default values from " \
f"(location, state_location, country_location) = " \
f"({defaults.settings.location}, {defaults.settings.state_location}, {defaults.settings.country_location}). "
raise NotImplementedError(msg)
[docs]def read_age_bracket_distr(datadir=None, location=None, state_location=None, country_location=None, nbrackets=None, file_path=None, use_default=False):
"""
A dict of the age distribution by age brackets. If use_default, then we'll
first try to look for location specific data and if that's not available
we'll use default data from settings.location,
settings.state_location, settings.country_location. This may not
be appropriate for the population under study so it's best to provide as
much data as you can for the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified age bracket distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from the settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the age distribution by age bracket. Keys map to a
range of ages in that age bracket.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
nbrackets = calculate_which_nbrackets_to_use(location_data, nbrackets)
age_brackets = location_data.get_population_age_distribution(nbrackets)
# Use default if no data for this parameter.
if use_default and (age_brackets is None or len(age_brackets) == 0):
return read_age_bracket_distr(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
percent = [age_bracket[2] for age_bracket in age_brackets]
r = dict(zip(np.arange(len(age_brackets)), percent))
return r
# TODO: need to adapt this to new data.py
[docs]def get_smoothed_single_year_age_distr(datadir=None, location=None, state_location=None, country_location=None, nbrackets=None, file_path=None, use_default=False, window_length=7):
"""
A smoothed dict of the age distribution by single years. If use_default,
then we'll first try to look for location specific data and if that's not
available we'll use default data from settings.location,
settings.state_location, settings.country_location. This may not
be appropriate for the population under study so it's best to provide as
much data as you can for the specific population. Using moving windows to
smooth out the age distribution.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified age bracket distribution data
use_default (bool) : If True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from the settings.location, settings.state_location, settings.country_location.
window_length (int) : length of window, in units of years, over which to average or smooth out age distribution
Returns:
dict: A dictionary of the age distribution by age bracket. Keys map to a
range of ages in that age bracket.
"""
age_bracket_distr = read_age_bracket_distr(datadir, location, state_location, country_location, nbrackets, file_path, use_default)
age_brackets = get_census_age_brackets(datadir, country_location=country_location, state_location=state_location, location=location, nbrackets=nbrackets)
age_by_brackets = spb.get_age_by_brackets(age_brackets)
raw_age_distr = dict.fromkeys(age_by_brackets.keys(), 0)
for a in raw_age_distr.keys():
b = age_by_brackets[a]
raw_age_distr[a] = age_bracket_distr[b] / len(age_brackets[b])
smoothed_age_distr = raw_age_distr.copy()
errormsg = f"The window_length should be a non-negative integer value less than 10. The supplied value is: {window_length}. Please try another value between 0 and 10."
if not isinstance(window_length, (int, np.int32, np.int64)) or window_length < 0 or window_length >= 10:
raise ValueError(errormsg)
window_half = window_length // 2
for a in range(window_half, max(smoothed_age_distr.keys()) - window_half + 1):
smoothed_age_distr[a] = np.mean([raw_age_distr[ai] for ai in range(a - window_half, a + window_half + 1)])
# check all values are greater than 0
min_smoothed_val = min(smoothed_age_distr.values())
if min_smoothed_val < 0:
errormsg2 = f"The minimum value of the smoothed age distribution is: {min_smoothed_val}. All values of the distribution should be greater than or equal to 0. Check either the original age distribution or the window_length."
raise ValueError(errormsg2)
smoothed_age_distr = spb.norm_dic(smoothed_age_distr)
return smoothed_age_distr
[docs]def get_household_size_distr(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
A dictionary of the distribution of household sizes. If you don't give the
file_path, then supply the location, state_location, and country_location
strings. If use_default, then we'll first try to look for location specific
data and if that's not available we'll use default data from
settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the
population under study so it's best to provide as much data as you can for
the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified household size distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the household size distribution data. Keys map to
the household size as an integer, values are the percent of households
of that size.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.household_size_distribution is None or len(location_data.household_size_distribution) == 0):
return get_household_size_distr(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
dist = [ [int(entry[0]), entry[1]] for entry in location_data.household_size_distribution ]
r = dict(dist)
return r
[docs]def get_head_age_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get a dictionary of head age brackets either from the file_path directly, or
using the other parameters to figure out what the file_path should be. If
use_default, then we'll first try to look for location specific data and if
that's not available we'll use default data from settings.location,
settings.state_location, settings.country_location. This may not
be appropriate for the population under study so it's best to provide as
much data as you can for the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state
country_location (string) : name of the country the state_location is in
file_path (string) : file path to user specified head age brackets data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from the settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the age brackets for head of household
distribution data. Keys map to the age bracket as an integer, values are
the percent of households which head of household in that age bracket.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.household_head_age_brackets is None or len(location_data.household_head_age_brackets) == 0):
return get_head_age_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
age_brackets = {}
for [bracket_index, bracket_minmax] in enumerate(location_data.household_head_age_brackets):
age_brackets[bracket_index] = np.arange(int(bracket_minmax[0]), int(bracket_minmax[1]) + 1)
return age_brackets
[docs]def get_head_age_by_size_distr(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Create an array of head of household age bracket counts (column) given by
size (row). If use_default, then we'll first try to look for location
specific data and if that's not available we'll use default data from the
settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the
population under study so it's best to provide as much data as you can for
the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state
country_location (string) : name of the country the state_location is in
file_path (string) : file path to user specified age of the head of the household by household size distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
ndarray: An array where each row s represents the age distribution of
the head of households for households of size s-1.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.household_head_age_distribution_by_family_size is None or len(location_data.household_head_age_distribution_by_family_size) == 0):
return get_head_age_by_size_distr(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
dist = [d[1:] for d in location_data.household_head_age_distribution_by_family_size]
return np.array(dist)
[docs]def get_census_age_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False, nbrackets=None):
"""
Get census age brackets: depends on the country or source of the age
distribution and the contact pattern data. If use_default, then we'll first
try to look for location specific data and if that's not available we'll use
default data from settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the
population under study so it's best to provide as much data as you can for
the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state
country_location (string) : name of the country the state_location is in
file_path (string) : file path to user specified census age brackets
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the range of ages that map to each age bracket.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# nbrackets = calculate_which_nbrackets_to_use(nbrackets)
nbrackets = calculate_which_nbrackets_to_use(location_data, nbrackets)
dist = location_data.get_population_age_distribution(nbrackets)
# Use default if no data for this parameter.
if use_default and (dist is None or len(dist) == 0):
return get_census_age_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
age_brackets = {}
for bracket_index, dist in enumerate(dist):
age_min = int(dist[0])
age_max = int(dist[1])
age_brackets[bracket_index] = np.arange(age_min, age_max + 1)
return age_brackets
# TODO: still open question on how to handle these.
# TODO: still open question on how to handle these.
[docs]def get_school_enrollment_rates(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get dictionary of enrollment rates by age. If use_default, then we'll first
try to look for location specific data and if that's not available we'll use
default data from settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the
population under study so it's best to provide as much data as you can for
the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified school enrollment by age data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of school enrollment rates by age.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.enrollment_rates_by_age is None or len(location_data.enrollment_rates_by_age) == 0):
return get_school_enrollment_rates(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
dist = [ [int(d[0]), d[1]] for d in location_data.enrollment_rates_by_age ]
return dict(dist)
[docs]def get_school_size_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get school size brackets: depends on the source/location of the data. If
use_default, then we'll first try to look for location specific data and if
that's not available we'll use default data from settings.location,
settings.state_location, settings.country_location. This may not
be appropriate for the population under study so it's best to provide as
much data as you can for the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified school size brackets data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of school size brackets.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.school_size_brackets is None or len(location_data.school_size_brackets) == 0):
return get_school_size_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
school_size_brackets = {}
for bracket_index, bracket in enumerate(location_data.school_size_brackets):
size_min = int(bracket[0])
size_max = int(bracket[1])
school_size_brackets[bracket_index] = np.arange(size_min, size_max + 1)
return school_size_brackets
[docs]def get_school_size_distr_by_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get distribution of school sizes by size bracket or bin. If use_default,
then we'll first try to look for location specific data and if that's not
available we'll use default data from settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the population under study
so it's best to provide as much data as you can for the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified school size distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the distribution of school sizes by bracket.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.school_size_distribution is None or len(location_data.school_size_distribution) == 0):
return get_school_size_distr_by_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
size_distr = dict(enumerate(location_data.school_size_distribution))
size_distr = spb.norm_dic(size_distr)
return size_distr
# ### Default school type data ### #
[docs]def get_default_school_type_age_ranges():
"""
Define and return default school types and the age range for each.
Return:
dict: A dictionary of default school types and the age range for each.
"""
school_type_age_ranges = {}
school_type_age_ranges['pk'] = np.arange(3, 6)
school_type_age_ranges['es'] = np.arange(6, 11)
school_type_age_ranges['ms'] = np.arange(11, 14)
school_type_age_ranges['hs'] = np.arange(14, 18)
school_type_age_ranges['uv'] = np.arange(18, 101)
return school_type_age_ranges
[docs]def get_default_school_types_distr_by_age():
"""
Define and return default probabilities of school type for each age.
Return:
dict: A dictionary of default probabilities for the school type likely
for each age.
"""
school_type_age_ranges = get_default_school_type_age_ranges()
school_types_distr_by_age = {}
for a in range(101):
school_types_distr_by_age[a] = dict.fromkeys(list(school_type_age_ranges.keys()), 0.)
for k in school_type_age_ranges.keys():
for a in school_type_age_ranges[k]:
school_types_distr_by_age[a][k] = 1.
return school_types_distr_by_age
[docs]def get_default_school_types_by_age_single():
"""
Define and return default school type by age by assigning the school type
with the highest probability.
Return:
dict: A dictionary of default school type by age.
"""
school_types_distr_by_age = get_default_school_types_distr_by_age()
school_types_by_age_single = {}
for a in range(101):
values_to_keys = {school_types_distr_by_age[a][k]: k for k in school_types_distr_by_age[a]}
max_v = max(values_to_keys.keys())
max_k = values_to_keys[max_v]
if max_v != 0:
school_types_by_age_single[a] = max_k
return school_types_by_age_single
[docs]def get_default_school_size_distr_brackets():
"""
Define and return default school size distribution brackets.
Return:
dict: A dictionary of school size brackets.
"""
return get_school_size_brackets(defaults.settings.datadir, country_location=defaults.settings.country_location, state_location=defaults.settings.state_location, location=defaults.settings.location, use_default=True)
[docs]def get_default_school_size_distr_by_type():
"""
Define and return default school size distribution for each school type. The
school size distributions are binned to size groups or brackets.
Return:
dict: A dictionary of school size distributions binned by size groups or
brackets for each type of default school.
"""
school_size_distr_by_type = {}
school_types = ['pk', 'es', 'ms', 'hs', 'uv']
for k in school_types:
school_size_distr_by_type[k] = get_school_size_distr_by_brackets(defaults.settings.datadir, country_location=defaults.settings.country_location, state_location=defaults.settings.state_location, location=defaults.settings.location, use_default=True)
return school_size_distr_by_type
[docs]def get_school_type_age_ranges(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get a dictionary of the school types and the age range for each for the
location specified.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from Seattle, Washington.
Returns:
dict: A dictionary of default school types and the age range for each.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.school_types_by_age is None or len(location_data.school_types_by_age) == 0):
return get_school_type_age_ranges(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
school_type_age_ranges = dict()
for school_type_by_age in location_data.school_types_by_age:
age_min = school_type_by_age.age_range[0]
age_max = school_type_by_age.age_range[1]
school_type_age_ranges[school_type_by_age.school_type] = np.arange(age_min, age_max + 1)
return school_type_age_ranges
[docs]def get_school_size_distr_by_type(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get the school size distribution by school types. If use_default, then we'll
try to look for location specific data first, and if that's not available
we'll use default data from the set default locations (see sp.defaults.py).
This may not be appropriate for the population under study so it's best to
provide as much data as you can for the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location
Returns:
dict: A dictionary of school size distributions binned by size groups or
brackets for each type of default school.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.school_size_distribution_by_type is None or len(location_data.school_size_distribution_by_type) == 0):
return get_school_size_distr_by_type(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
school_size_distr_by_type = {}
for dist_by_type in location_data.school_size_distribution_by_type:
size_dist = dict(enumerate(dist_by_type.size_distribution))
school_size_distr_by_type[dist_by_type.school_type] = size_dist
return school_size_distr_by_type
[docs]def get_employment_rates(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get employment rates by age. If use_default, then we'll first try to look
for location specific data and if that's not available we'll use default
data from settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the
population under study so it's best to provide as much data as you can for
the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in, which should be the 'usa'
file_path (string) : file path to user specified employment by age data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of employment rates by age.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.employment_rates_by_age is None or len(location_data.employment_rates_by_age) == 0):
return get_employment_rates(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
return dict(location_data.employment_rates_by_age)
[docs]def get_workplace_size_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get workplace size brackets. If use_default, then we'll first try to look
for location specific data and if that's not available we'll use default
data from settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the
population under study so it's best to provide as much data as you can for
the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in, which should be the 'usa'
file_path (string) : file path to user specified workplace size brackets data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of workplace size brackets.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.workplace_size_counts_by_num_personnel is None or len(location_data.workplace_size_counts_by_num_personnel) == 0):
return get_workplace_size_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
workplace_size_brackets = dict()
for bracket_index, bracket in enumerate(location_data.workplace_size_counts_by_num_personnel):
size_min = int(bracket[0])
size_max = int(bracket[1])
workplace_size_brackets[bracket_index] = np.arange(size_min, size_max + 1)
return workplace_size_brackets
[docs]def get_workplace_size_distr_by_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get the distribution of workplace size by brackets. If use_default, then
we'll first try to look for location specific data and if that's not
available we'll use default data from settings.location, settings.state_location,
settings.country_location. This may not be appropriate for the population under study
so it's best to provide as much data as you can for the specific population.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified workplace size distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the distribution of workplace sizes by bracket.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.workplace_size_counts_by_num_personnel is None or len(location_data.workplace_size_counts_by_num_personnel) == 0):
return get_workplace_size_distr_by_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
bracket_sizes = [ [bracket[0], bracket[1][2]]
for bracket in enumerate(location_data.workplace_size_counts_by_num_personnel) ]
dist = dict(bracket_sizes)
return dist
[docs]def get_state_postal_code(state_location, country_location):
"""
Get the state postal code.
Args:
state_location (string) : name of the state
country_location (string) : name of the country the state is in
Return:
str: A postal code for the state_location.
"""
file_path = os.path.join(defaults.settings.datadir, country_location, 'postal_codes.csv')
df = pd.read_csv(file_path, delimiter=',')
dic = dict(zip(df.state, df.postal_code))
return dic[state_location]
[docs]def get_long_term_care_facility_residents_distr(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get size distribution of residents per facility for Long Term Care
Facilities.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified LTCF resident size distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the distribution of residents per facility for
Long Term Care Facilities.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.ltcf_num_residents_distribution is None or len(location_data.ltcf_num_residents_distribution) == 0):
return get_long_term_care_facility_residents_distr(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
bin_dist = [ [bracket[0], bracket[1][2]] for bracket in enumerate(location_data.ltcf_num_residents_distribution)]
dist = dict(bin_dist)
return dist
[docs]def get_long_term_care_facility_residents_distr_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get size bins for the distribution of residents per facility for Long Term
Care Facilities.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in, which should be the 'usa'
file_path (string) : file path to user specified LTCF resident size brackets data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of size brackets or bins for residents per facility.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.ltcf_num_residents_distribution is None or len(location_data.ltcf_num_residents_distribution) == 0):
return get_long_term_care_facility_residents_distr_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
num_residents_brackets = dict()
for bracket_index, bracket in enumerate(location_data.ltcf_num_residents_distribution):
min_num_residents = int(bracket[0])
max_num_residents = int(bracket[1])
num_residents_brackets[bracket_index] = np.arange(min_num_residents, max_num_residents + 1)
return num_residents_brackets
[docs]def get_long_term_care_facility_resident_to_staff_ratios_distr(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get size distribution of resident to staff ratios per facility for Long Term
Care Facilities.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in
file_path (string) : file path to user specified resident to staff ratio distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the distribution of residents per facility for
Long Term Care Facilities.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.ltcf_resident_to_staff_ratio_distribution is None or len(location_data.ltcf_resident_to_staff_ratio_distribution) == 0):
return get_long_term_care_facility_resident_to_staff_ratios_distr(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
bin_dist = [ [bracket[0], bracket[1][2]] for bracket in enumerate(location_data.ltcf_resident_to_staff_ratio_distribution)]
dist = dict(bin_dist)
return dist
[docs]def get_long_term_care_facility_resident_to_staff_ratios_brackets(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get size bins for the distribution of resident to staff ratios per facility
for Long Term Care Facilities.
Args:
datadir (string) : file path to the data directory
location (string) : name of the location
state_location (string) : name of the state the location is in
country_location (string) : name of the country the location is in, which should be the 'usa'
file_path (string) : file path to user specified resident to staff ratio brackets data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of size brackets or bins for resident to staff ratios
per facility.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.ltcf_resident_to_staff_ratio_distribution is None or len(location_data.ltcf_resident_to_staff_ratio_distribution) == 0):
return get_long_term_care_facility_resident_to_staff_ratios_brackets(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
ltcf_ratio_brackets = dict()
for bracket_index, bracket in enumerate(location_data.ltcf_resident_to_staff_ratio_distribution):
size_min = bracket[0]
size_max = bracket[1]
ltcf_ratio_brackets[bracket_index] = np.arange(size_min, size_max + 1)
return ltcf_ratio_brackets
[docs]def get_long_term_care_facility_use_rates(datadir=None, location=None, state_location=None, country_location=None, file_path=None, use_default=False):
"""
Get Long Term Care Facility use rates by age for a state.
Args:
datadir (str) : file path to the data directory
location_alias (str) : more commonly known name of the location
state_location (str) : name of the state the location is in
country_location (str) : name of the country the location is in
file_path (string) : file path to user specified gender by age bracket distribution data
use_default (bool) : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from settings.location, settings.state_location, settings.country_location.
Returns:
dict: A dictionary of the Long Term Care Facility usage rates by age.
Note:
Currently only available for the United States.
"""
# Use default if no file for this location.
location_data = load_location(location, state_location, country_location, revert_to_default=use_default)
# Use default if no data for this parameter.
if use_default and (location_data.ltcf_use_rate_distribution is None or len(location_data.ltcf_use_rate_distribution) == 0):
return get_long_term_care_facility_use_rates(location=defaults.settings.location,
state_location=defaults.settings.state_location,
country_location=defaults.settings.country_location,
use_default=False)
dist = [[int(d[0]), d[1]] for d in location_data.ltcf_use_rate_distribution]
return dict(dist)