Source code for laser_measles.demographics.wpp
import numpy as np
import pyvd
from scipy.interpolate import make_interp_spline
[docs]
class WPP:
"""World Population Prospects (WPP) population data access and processing.
This class provides access to United Nations World Population Prospects data
for population trajectories, demographic estimates, and population pyramids.
It uses the pyvd library to retrieve and process population data for specific
countries, including mortality rates, birth rates, and age-structured population
data.
The class supports interpolation of population pyramids for any year within
the available data range, making it useful for demographic modeling and
population projection analysis.
Attributes:
country_code (str): The ISO country code for the selected country.
year_vec (np.ndarray): Vector of years available in the WPP dataset.
pop_mat (np.ndarray): Population matrix with shape (age_bins, years).
vd_tup (tuple): Demographic vital data tuple containing:
- mort_year: Mortality year reference
- mort_mat: Mortality matrix
- birth_rate: Birth rate data
- br_mult_x: Birth rate multiplier x values
- br_mult_y: Birth rate multiplier y values
age_vec (np.ndarray): Age vector in days, representing age bins.
pyramid_spline: Interpolating spline for population pyramid data.
Example:
>>> wpp = WPP("USA")
>>> pyramid_2020 = wpp.get_population_pyramid(2020)
>>> print(f"Population pyramid shape: {pyramid_2020.shape}")
"""
def __init__(self, country_code: str):
"""Initialize WPP data access for a specific country.
Args:
country_code (str): ISO country code (e.g., "USA", "GBR", "CHN").
The code will be converted to uppercase automatically.
Raises:
ValueError: If the country code is invalid or data is unavailable.
Note:
Population data is adjusted by adding 0.1 to avoid zero values
that could cause issues in demographic calculations.
"""
# Get WPP population information from pyvd
pop_input = pyvd.make_pop_dat(country_code.upper())
self.country_code = country_code
self.year_vec = pop_input[0, :]
self.pop_mat = pop_input[1:, :] + 0.1 # age_bins x years
self.vd_tup = pyvd.demog_vd_calc(
self.year_vec, self.year_vec[0], self.pop_mat
) # ('mort_year', 'mort_mat', 'birth_rate', 'br_mult_x', 'br_mult_y')
self.age_vec = np.concatenate([np.array(pyvd.constants.MORT_XVAL)[::2], [pyvd.constants.MORT_XVAL[-1]]]) # in days
self.pyramid_spline = make_interp_spline(self.year_vec, self.pop_mat, axis=1)
[docs]
def get_population_pyramid(self, year: int) -> np.ndarray:
"""Get the population pyramid for a given year.
Retrieves the age-structured population data for the specified year
using spline interpolation. The population pyramid represents the
distribution of population across different age groups.
Args:
year (int): The target year for population pyramid data.
Must be within the available data range.
Returns:
np.ndarray: Population pyramid array with shape (age_bins,),
representing population counts for each age group.
Raises:
AssertionError: If the requested year is outside the available
data range (before first year or after last year).
Example:
>>> wpp = WPP("USA")
>>> pyramid_2020 = wpp.get_population_pyramid(2020)
>>> print(f"Age groups: {len(pyramid_2020)}")
>>> print(f"Total population: {pyramid_2020.sum():.0f}")
"""
assert year >= self.year_vec[0], "Year is before the first year in the WPP data"
assert year <= self.year_vec[-1], "Year is after the last year in the WPP data"
return self.pyramid_spline(year)