Source code for pearl.population

"""Module for population initialization in PEARL."""

from typing import Optional

import numpy as np
import pandas as pd
from typing_extensions import override

from pearl.definitions import (
    ART_NAIVE,
    ART_NONUSER,
    ART_USER,
    DELAYED,
    POPULATION_TYPE_DICT,
    STAGE0,
    STAGE1,
    STAGE2,
    STAGE3,
)
from pearl.engine import Event, EventGrouping
from pearl.interpolate import restricted_cubic_spline_var
from pearl.parameters import Parameters
from pearl.sample import draw_from_trunc_norm


[docs] def add_id(population: pd.DataFrame) -> pd.DataFrame: """Add an id column to the population DataFrame. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population DataFrame with id column added. """ population["id"] = np.array(range(population.index.size)) population = population.set_index(["age_cat", "id"]).sort_index() return population
[docs] def add_age_categories(population: pd.DataFrame) -> pd.DataFrame: """ Add an age_cat column corresponding to the decade age of the agent, truncated at a maximum age category of 7. Parameters ---------- population : pd.DataFrame Population with an age column. Returns ------- pd.DataFrame Population with age_cat column added. """ population["age"] = np.floor(population["age"]) population["age_cat"] = np.floor(population["age"] / 10) population.loc[population["age_cat"] > 7, "age_cat"] = 7 population.loc[population["age_cat"] < 2, "age_cat"] = 2 return population
# TODO combine these add_default_columns
[docs] def add_default_columns(population: pd.DataFrame) -> pd.DataFrame: """ Add default values for columns necessary for simulation to all agents that are in the population at the start of simulation. Parameters ---------- population : pd.DataFrame Population DataFrame to add default columns to. Returns ------- pd.DataFrame Population with added default columns. """ # Add final columns used for calculations and output population["last_h1yy"] = population["h1yy"] population["last_init_sqrtcd4n"] = population["init_sqrtcd4n"] population["init_age"] = population["age"] - (2009 - population["h1yy"]) population["n_lost"] = np.array(0, dtype="int32") population["years_out"] = np.array(0, dtype="int16") population["year_died"] = np.nan population["sqrtcd4n_exit"] = 0 population["ltfu_year"] = np.array(0, dtype="int16") population["return_year"] = np.array(0, dtype="int16") population["intercept"] = 1.0 population["year"] = np.array(2009, dtype="int16") return population
[docs] def add_default_columns_new(population: pd.DataFrame) -> pd.DataFrame: """Add default columns for new agents that are added to the population after the start of simulation. Parameters ---------- population : pd.DataFrame Population DataFrame to add default columns to. Returns ------- pd.DataFrame Population with added default columns. """ # Calculate time varying cd4 count and other needed variables population["last_h1yy"] = population["h1yy"] population["time_varying_sqrtcd4n"] = population["init_sqrtcd4n"] population["last_init_sqrtcd4n"] = population["init_sqrtcd4n"] population["init_age"] = population["age"] population["n_lost"] = 0 population["years_out"] = 0 population["year_died"] = np.nan population["sqrtcd4n_exit"] = 0 population["ltfu_year"] = 0 population["return_year"] = 0 population["intercept"] = 1.0 population["year"] = 2009 return population
[docs] def delta_bmi(population: pd.DataFrame) -> pd.DataFrame: """Calculate the change in BMI for each agent. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population DataFrame with delta_bmi column added. """ population["delta_bmi"] = population["post_art_bmi"] - population["pre_art_bmi"] return population
[docs] def add_multimorbidity(population: pd.DataFrame) -> pd.DataFrame: """Calculate the multimorbidity for each agent. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population DataFrame with mm column added. """ population["mm"] = np.array(population[STAGE2 + STAGE3].sum(axis=1), dtype="int8") return population
[docs] def sort_alphabetically(population: pd.DataFrame) -> pd.DataFrame: """Sort columns alphabetically. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population DataFrame with columns sorted alphabetically. """ # Sort columns alphabetically population = population.reindex(sorted(population), axis=1) return population
[docs] def cast_type(population: pd.DataFrame) -> pd.DataFrame: """Cast population columns to save memory. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Type cast population Dataframe. """ population = population.astype(POPULATION_TYPE_DICT) return population
[docs] class Status(Event): """Assign a status to the populaton.""" def __init__(self, parameters: Parameters, status: int) -> None: """Store parameters and status. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters status : int Status defined in pearl.definitions """ super().__init__(parameters) self.status = status @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Assign status to the population. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population DataFrame with status column added. """ population["status"] = self.status return population
[docs] class SimulateAges(Event): """Simulate ages for the given popeulation size and conditions.""" def __init__( self, parameters: Parameters, population_size: int, h1yy: Optional[bool] = None ) -> None: """ Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters population_size : int Size of the population to simulate. h1yy : Optional[bool], optional Whether or not to simulate ages by h1yy, by default None """ super().__init__(parameters) self.population_size = population_size self.h1yy = h1yy if self.h1yy: self.coeffs = self.parameters.age_by_h1yy.loc[h1yy] else: self.coeffs = self.parameters.age_in_2009 @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Simulate ages. Parameters ---------- population : pd.DataFrame This dataframe is ignored. It currently just serves to maintain the Dataframe in Dataframe out API. Returns ------- pd.DataFrame Population of simulated ages. """ # Draw population size of each normal from the binomial distribution pop_size_1 = self.random_state.binomial( self.population_size, self.coeffs.loc["lambda1", "estimate"] ) pop_size_2 = self.population_size - pop_size_1 # Draw ages from truncated normal ages_1 = draw_from_trunc_norm( 18, 80, self.coeffs.loc["mu1", "estimate"], self.coeffs.loc["sigma1", "estimate"], pop_size_1, self.random_state, ) ages_2 = draw_from_trunc_norm( 18, 80, self.coeffs.loc["mu2", "estimate"], self.coeffs.loc["sigma2", "estimate"], pop_size_2, self.random_state, ) ages = np.concatenate((ages_1, ages_2)) assert ages.min() > 18 assert ages.max() < 85 population["age"] = np.array(ages) return population
[docs] class H1yy(Event): """Assign diagnosis date (H1yy) to the population.""" def __init__(self, parameters: Parameters): """Store parameters and coefficients. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters """ super().__init__(parameters) self.coeffs = self.parameters.h1yy_by_age_2009 @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Assign H1yy to the population. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population DataFrame with h1yy column added. """ # Assign H1YY to match NA-ACCORD distribution from h1yy_by_age_2009 for age_cat, grouped in population.groupby("age_cat"): h1yy_data = self.coeffs.loc[age_cat].reset_index() population.loc[age_cat, "h1yy"] = self.random_state.choice( h1yy_data["h1yy"], size=len(grouped), p=h1yy_data["pct"] ) # Reindex for group operation population["h1yy"] = population["h1yy"].astype(int) population = population.reset_index().set_index(["h1yy", "id"]).sort_index() return population
# TODO combine these sqrtCd4n classes into a single one
[docs] class SqrtCd4nInit(Event): """Assign initial sqrtCD4 counts to the population.""" def __init__(self, parameters: Parameters): """Store parameters and coefficients. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters """ super().__init__(parameters) self.coeffs = self.parameters.cd4n_by_h1yy_2009 @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Assign initial CD4 counts to the population. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population Dataframe with init_sqrtcd4n column added. """ # For each h1yy draw values of sqrt_cd4n from a normal truncated at 0 and sqrt 2000 for h1yy, group in population.groupby(level=0): mu = self.coeffs.loc[(h1yy, "mu"), "estimate"] sigma = self.coeffs.loc[(h1yy, "sigma"), "estimate"] size = group.shape[0] sqrt_cd4n = draw_from_trunc_norm( 0, np.sqrt(2000.0), mu, sigma, size, self.random_state ) population.loc[(h1yy,), "init_sqrtcd4n"] = sqrt_cd4n population = population.reset_index().set_index("id").sort_index() return population
[docs] class SqrtCd4nNew(Event): """Assign sqrtCD4 counts to new agents.""" def __init__(self, parameters: Parameters) -> None: """Store Parameters. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters """ super().__init__(parameters) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Assign sqrtCD4 counts to new agents. Parameters ---------- population : pd.DataFrame Population Datafame. Returns ------- pd.DataFrame Population Dataframe with sqrtcd4n column added. """ population = population.reset_index() unique_h1yy = population["h1yy"].unique() population["init_sqrtcd4n"] = 0.0 for h1yy in unique_h1yy: mu = self.parameters.cd4n_by_h1yy.loc[(h1yy, "mu"), "estimate"] sigma = self.parameters.cd4n_by_h1yy.loc[(h1yy, "sigma"), "estimate"] size = len(population[population["h1yy"] == h1yy]["init_sqrtcd4n"]) sqrt_cd4n = draw_from_trunc_norm( 0, np.sqrt(2000.0), mu, sigma, size, self.random_state ) population.loc[population["h1yy"] == h1yy, "init_sqrtcd4n"] = sqrt_cd4n population = population.reset_index().set_index("id").sort_index() return population
[docs] class Cd4Increase(Event): """Calculate the increase in CD4 count for the population.""" def __init__(self, parameters: Parameters): """Store parameters, knot coefficients, and coefficients. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters """ super().__init__(parameters) self.knots_age = parameters.cd4_increase_knots_age.to_numpy(dtype=float) self.knots_cd4_init = parameters.cd4_increase_knots_cd4_init.to_numpy(dtype=float) self.knots_time_from_h1yy = parameters.cd4_increase_knots_time_from_h1yy.to_numpy( dtype=float ) self.coeffs = parameters.cd4_increase.to_numpy(dtype=float) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Increase CD4 count for the population. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population Dataframe with increased CD4 count. """ pop = population.copy() # Calculate spline variables pop["time_from_h1yy"] = pop["year"] - pop["last_h1yy"] pop["cd4n_ini"] = pop["last_init_sqrtcd4n"] ** 2 # Create all needed intermediate variables pop["age_"] = restricted_cubic_spline_var(pop["age"].to_numpy(), self.knots_age, 1) pop["age__"] = restricted_cubic_spline_var(pop["age"].to_numpy(), self.knots_age, 2) pop["cd4n_ini_"] = restricted_cubic_spline_var( pop["cd4n_ini"].to_numpy(), self.knots_cd4_init, 1 ) pop["cd4n_ini__"] = restricted_cubic_spline_var( pop["cd4n_ini"].to_numpy(), self.knots_cd4_init, 2 ) pop["time_from_h1yy_"] = restricted_cubic_spline_var( pop["time_from_h1yy"].to_numpy(), self.knots_time_from_h1yy, 1 ) pop["time_from_h1yy__"] = restricted_cubic_spline_var( pop["time_from_h1yy"].to_numpy(), self.knots_time_from_h1yy, 2 ) # interaction coefficients pop["cd4n_ini__*time_from_h1yy"] = pop["cd4n_ini__"] * pop["time_from_h1yy"] pop["cd4n_ini__*time_from_h1yy_"] = pop["cd4n_ini__"] * pop["time_from_h1yy_"] pop["cd4n_ini__*time_from_h1yy__"] = pop["cd4n_ini__"] * pop["time_from_h1yy__"] pop["cd4n_ini_*time_from_h1yy"] = pop["cd4n_ini_"] * pop["time_from_h1yy"] pop["cd4n_ini_*time_from_h1yy_"] = pop["cd4n_ini_"] * pop["time_from_h1yy_"] pop["cd4n_ini_*time_from_h1yy__"] = pop["cd4n_ini_"] * pop["time_from_h1yy__"] pop["cd4n_ini*time_from_h1yy"] = pop["cd4n_ini"] * pop["time_from_h1yy"] pop["cd4n_ini*time_from_h1yy_"] = pop["cd4n_ini"] * pop["time_from_h1yy_"] pop["cd4n_ini*time_from_h1yy__"] = pop["cd4n_ini"] * pop["time_from_h1yy__"] pop_matrix = pop[ [ "intercept", "age", "age_", "age__", "cd4n_ini", "cd4n_ini_", "cd4n_ini__", "cd4n_ini__*time_from_h1yy", "cd4n_ini__*time_from_h1yy_", "cd4n_ini__*time_from_h1yy__", "cd4n_ini_*time_from_h1yy", "cd4n_ini_*time_from_h1yy_", "cd4n_ini_*time_from_h1yy__", "cd4n_ini*time_from_h1yy", "cd4n_ini*time_from_h1yy_", "cd4n_ini*time_from_h1yy__", "time_from_h1yy", "time_from_h1yy_", "time_from_h1yy__", ] ].to_numpy(dtype=float) # Perform matrix multiplication new_cd4 = np.matmul(pop_matrix, self.coeffs) new_cd4 = np.clip(new_cd4, 0, np.sqrt(2000)) population["time_varying_sqrtcd4n"] = np.array(new_cd4) return population
[docs] class PreArtBMI(Event): """Calculate pre-ART BMI for the population.""" def __init__(self, parameters: Parameters) -> None: """Store parameters and coefficients. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters """ super().__init__(parameters) self.coeffs = self.parameters.pre_art_bmi.to_numpy(dtype=float) self.t_age = self.parameters.pre_art_bmi_age_knots.to_numpy(dtype=float) self.t_h1yy = parameters.pre_art_bmi_h1yy_knots.to_numpy(dtype=float) self.rse = self.parameters.pre_art_bmi_rse self.model = self.parameters.pre_art_bmi_model @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Calculate pre-art BMI and addd pre_art_bmi column Parameters ---------- population : pd.DataFrame Population Dataframe Returns ------- pd.DataFrame Population Dataframe with pre_art_bmi column added. """ pop = population.copy() pre_art_bmi = np.nan if self.model == 6: pop["age_"] = restricted_cubic_spline_var(pop["init_age"].to_numpy(), self.t_age, 1) pop["age__"] = restricted_cubic_spline_var(pop["init_age"].to_numpy(), self.t_age, 2) h1yy = pop["h1yy"].values pop["h1yy_"] = restricted_cubic_spline_var(h1yy, self.t_h1yy, 1) pop["h1yy__"] = restricted_cubic_spline_var(h1yy, self.t_h1yy, 2) pop_matrix = pop[ ["init_age", "age_", "age__", "h1yy", "h1yy_", "h1yy__", "intercept"] ].to_numpy(dtype=float) log_pre_art_bmi = np.matmul(pop_matrix, self.coeffs) elif self.model == 5: pop["age_"] = restricted_cubic_spline_var(pop["init_age"].to_numpy(), self.t_age, 1) pop["age__"] = restricted_cubic_spline_var(pop["init_age"].to_numpy(), self.t_age, 2) pop_matrix = pop[["init_age", "age_", "age__", "h1yy", "intercept"]].to_numpy( dtype=float ) log_pre_art_bmi = np.matmul(pop_matrix, self.coeffs) elif self.model == 4: h1yy = pop["h1yy"].values pop["h1yy_"] = restricted_cubic_spline_var(h1yy, self.t_h1yy, 1) pop["h1yy__"] = restricted_cubic_spline_var(h1yy, self.t_h1yy, 2) pop_matrix = pop[["init_age", "h1yy", "h1yy_", "h1yy__", "intercept"]].to_numpy( dtype=float ) log_pre_art_bmi = np.matmul(pop_matrix, self.coeffs) elif self.model == 3: pop_matrix = pop[["init_age", "h1yy", "intercept"]].to_numpy(dtype=float) log_pre_art_bmi = np.matmul(pop_matrix, self.coeffs) elif self.model == 2: pop["age_"] = (pop["init_age"] >= 30) & (pop["init_age"] < 40) pop["age__"] = (pop["init_age"] >= 40) & (pop["init_age"] < 50) pop["age___"] = (pop["init_age"] >= 50) & (pop["init_age"] < 60) pop["age____"] = pop["init_age"] >= 60 h1yy = pop["h1yy"].values pop["h1yy_"] = restricted_cubic_spline_var(h1yy, self.t_h1yy, 1) pop["h1yy__"] = restricted_cubic_spline_var(h1yy, self.t_h1yy, 2) pop_matrix = pop[ [ "age_", "age__", "age___", "age____", "h1yy", "h1yy_", "h1yy__", "intercept", ] ].to_numpy(dtype=float) log_pre_art_bmi = np.matmul(pop_matrix, self.coeffs) elif self.model == 1: pop["age_"] = (pop["init_age"] >= 30) & (pop["init_age"] < 40) pop["age__"] = (pop["init_age"] >= 40) & (pop["init_age"] < 50) pop["age___"] = (pop["init_age"] >= 50) & (pop["init_age"] < 60) pop["age____"] = pop["init_age"] >= 60 pop_matrix = pop[["age_", "age__", "age___", "age____", "h1yy", "intercept"]].to_numpy( dtype=float ) log_pre_art_bmi = np.matmul(pop_matrix, self.coeffs) log_pre_art_bmi = log_pre_art_bmi.T[0] log_pre_art_bmi = draw_from_trunc_norm( np.log10(10), np.log10(65), log_pre_art_bmi, self.rse, len(log_pre_art_bmi), self.random_state, ) pre_art_bmi = 10.0**log_pre_art_bmi if self.parameters.sa_variables and "pre_art_bmi" in self.parameters.sa_variables: pre_art_bmi *= self.parameters.sa_scalars["pre_art_bmi"] population["pre_art_bmi"] = np.array(pre_art_bmi) return population
[docs] class PostArtBMI(Event): """Calculate Post-ART BMI for the population.""" def __init__(self, parameters: Parameters) -> None: """Store parameters and coefficients.""" super().__init__(parameters) self.coeffs = self.parameters.post_art_bmi.to_numpy(dtype=float) self.t_age = self.parameters.post_art_bmi_age_knots.to_numpy(dtype=float) self.t_pre_sqrt = self.parameters.post_art_bmi_pre_art_bmi_knots.to_numpy(dtype=float) self.t_sqrtcd4 = self.parameters.post_art_bmi_cd4_knots.to_numpy(dtype=float) self.t_sqrtcd4_post = self.parameters.post_art_bmi_cd4_post_knots.to_numpy(dtype=float) self.rse = self.parameters.post_art_bmi_rse @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Calculate post-ART BMI for the population and add post_art_bmi column. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population Dataframe with post_art_bmi column added. """ pop = population.copy() # Calculate spline variables pop["age_"] = restricted_cubic_spline_var(pop["init_age"].to_numpy(), self.t_age, 1) pop["age__"] = restricted_cubic_spline_var(pop["init_age"].to_numpy(), self.t_age, 2) pop["pre_sqrt"] = pop["pre_art_bmi"] ** 0.5 pop["pre_sqrt_"] = restricted_cubic_spline_var( pop["pre_sqrt"].to_numpy(), self.t_pre_sqrt, 1 ) pop["pre_sqrt__"] = restricted_cubic_spline_var( pop["pre_sqrt"].to_numpy(), self.t_pre_sqrt, 2 ) pop["sqrtcd4"] = pop["init_sqrtcd4n"] pop["sqrtcd4_"] = restricted_cubic_spline_var(pop["sqrtcd4"].to_numpy(), self.t_sqrtcd4, 1) pop["sqrtcd4__"] = restricted_cubic_spline_var( pop["sqrtcd4"].to_numpy(), self.t_sqrtcd4, 2 ) # Calculate cd4 count 2 years after art initiation and its spline terms pop_future = pop.copy().assign(age=pop["init_age"] + 2) pop_future["year"] = pop["h1yy"] + 2 pop_future["age_cat"] = np.floor(pop_future["age"] / 10) pop_future.loc[pop_future["age_cat"] < 2, "age_cat"] = 2 pop_future.loc[pop_future["age_cat"] > 7, "age_cat"] = 7 # TODO fix pop["sqrtcd4_post"] = Cd4Increase(self.parameters)(pop_future)["time_varying_sqrtcd4n"] pop["sqrtcd4_post_"] = restricted_cubic_spline_var( pop["sqrtcd4_post"].to_numpy(), self.t_sqrtcd4_post, 1 ) pop["sqrtcd4_post__"] = restricted_cubic_spline_var( pop["sqrtcd4_post"].to_numpy(), self.t_sqrtcd4_post, 2 ) # Create the population matrix and perform the matrix multiplication pop_matrix = pop[ [ "init_age", "age_", "age__", "h1yy", "intercept", "pre_sqrt", "pre_sqrt_", "pre_sqrt__", "sqrtcd4", "sqrtcd4_", "sqrtcd4__", "sqrtcd4_post", "sqrtcd4_post_", "sqrtcd4_post__", ] ].to_numpy(dtype=float) sqrt_post_art_bmi = np.matmul(pop_matrix, self.coeffs) sqrt_post_art_bmi = sqrt_post_art_bmi.T[0] sqrt_post_art_bmi = draw_from_trunc_norm( np.sqrt(10), np.sqrt(65), sqrt_post_art_bmi, self.rse, len(sqrt_post_art_bmi), self.random_state, ) post_art_bmi = sqrt_post_art_bmi**2.0 if self.parameters.sa_variables and "post_art_bmi" in self.parameters.sa_variables: post_art_bmi *= self.parameters.sa_scalars["post_art_bmi"] population["post_art_bmi"] = np.array(post_art_bmi) return population
[docs] class BasePopulation(Event): """Base population object.""" def __init__(self, parameters: Parameters, population_size: int): """Store parameters, the population size, and the events to be applied. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters population_size : int Size of population to simulate. """ super().__init__(parameters) self.population_size = population_size self.events = EventGrouping( [ SimulateAges(self.parameters, self.population_size), add_age_categories, add_id, H1yy(self.parameters), SqrtCd4nInit(self.parameters), add_default_columns, Cd4Increase(self.parameters), ] ) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Return the base population. Parameters ---------- population : pd.DataFrame This dataframe is ignored. It currently just serves to maintain the Dataframe in Dataframe out API. Returns ------- pd.DataFrame Population Dataframe for the base population. """ return self.events(population)
[docs] class Bmi(Event): """Calculate all BMI related variables.""" def __init__(self, parameters: Parameters): """Store parameters and PreArtBMI, PostArtBMI, and delta_bmi events. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters """ super().__init__(parameters) self.events = EventGrouping( [PreArtBMI(self.parameters), PostArtBMI(self.parameters), delta_bmi] ) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Run all events. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population Dataframe with BMI related variables added. """ return self.events(population)
[docs] class Comorbidity(Event): """Assign comorbidities for a random subset of the population based on each agents characteristics. """ def __init__(self, parameters: Parameters, comorbidity: str, user: bool, new_init: bool): """Store parameters, comorbidity, whether or not the population is ART users, and whether or not the population is new initiators. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters comorbidity : str Comorbidity to assign. user : bool Whether or not the population is ART users. new_init : bool Whether or not the population is new initiators. """ super().__init__(parameters) self.comorbidity = comorbidity self.new_init = new_init self.probability = ( self.parameters.prev_inits_dict[self.comorbidity].values if new_init else self.parameters.prev_users_dict[self.comorbidity].values ) self.user = user @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Assign comorbidity based on probability defined by agent characteristics. "t_comorbidity" is set to -1 if the agent has the comorbidity at initialization, and 0 otherwise. This variable will be updated to reflect the time of comorbidity onset for agents assigend the comorbidity. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population Dataframe with comorbidity assigned. """ population[self.comorbidity] = ( self.random_state.rand(len(population.index)) < self.probability ).astype(int) if self.user: population[f"t_{self.comorbidity}"] = -1 * population[self.comorbidity].astype("int8") else: population[f"t_{self.comorbidity}"] = -1 * population[self.comorbidity].astype("int8") return population
[docs] class ApplyComorbidities(Event): """Apply all comorbidities sequentially""" def __init__(self, paramaters: Parameters, user: bool, new_init: bool) -> None: """_summary_ Parameters ---------- paramaters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters user : bool Whether or not the population is ART users. new_init : bool Whether or not the population is new initiators. """ super().__init__(paramaters) self.user = user self.new_init = new_init self.events = EventGrouping( [ Comorbidity(self.parameters, comorbidity, self.user, self.new_init) for comorbidity in STAGE0 + STAGE1 + STAGE2 + STAGE3 ] ) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Run the sequence of events. Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population Dataframe with comorbidities assigned. """ return self.events(population)
[docs] class Ltfu(Event): """Lost to follow up event.""" def __init__(self, parameters: Parameters, population_size: int) -> None: """Store parameters and population size. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters. population_size : int Size of the population. """ super().__init__(parameters) self.population_size = population_size self.coeffs = self.parameters.years_out_of_care["years"] self.probability = self.parameters.years_out_of_care["probability"] # normalize probabilities to sum to 1 self.probability = self.probability / self.probability.sum() @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Lose a subset of population to follow up. Parameters ---------- population : pd.DataFrame Population Dataframe Returns ------- pd.DataFrame Population Dataframe adjusted after loss to follow up. """ years_out_of_care = self.random_state.choice( a=self.coeffs, size=self.population_size, p=self.probability, ) population["sqrtcd4n_exit"] = population["time_varying_sqrtcd4n"] population["ltfu_year"] = 2009 population["return_year"] = 2009 + years_out_of_care population["n_lost"] += 1 return population
[docs] class YearsOutCare(Event): """Calculate years out of care for delayed start agents.""" def __init__(self, parameters: Parameters) -> None: """Store parameters. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters. """ super().__init__(parameters) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Generate number of years for delayed initiators to wait before beginning care and modify their start year accordingly Parameters ---------- population : pd.DataFrame Population Dataframe. Returns ------- pd.DataFrame Population Dataframe with years out of care adjustment. """ probability = self.parameters.years_out_of_care["probability"] probability = probability / probability.sum() delayed = population["status"] == DELAYED years_out_of_care = self.random_state.choice( a=self.parameters.years_out_of_care["years"], size=len(population.loc[delayed]), p=probability, ) population.loc[delayed, "h1yy"] = population.loc[delayed, "h1yy"] + years_out_of_care population.loc[delayed, "status"] = ART_NAIVE population = population[population["h1yy"] <= self.parameters.final_year].copy() return population
[docs] class NewAges(Event): """Simulate ages for new initiators.""" def __init__(self, parameters: Parameters): """Store parameters. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters. """ super().__init__(parameters) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Simulate ages by h1yy for new initiators. Parameters ---------- population : pd.DataFrame Unused population Dataframe to keep the Dataframe in Dataframe out API. Returns ------- pd.DataFrame Population Dataframe with simulated ages. """ for h1yy in self.parameters.age_by_h1yy.index.levels[0]: grouped_pop = pd.DataFrame() n_initiators = self.parameters.n_new_agents.loc[h1yy, "art_initiators"] n_delayed = self.parameters.n_new_agents.loc[h1yy, "art_delayed"] grouped_pop["age"] = SimulateAges(self.parameters, n_initiators + n_delayed, h1yy)( pd.DataFrame([]) ) grouped_pop["h1yy"] = h1yy grouped_pop["status"] = ART_NAIVE delayed = self.random_state.choice( a=len(grouped_pop.index), size=n_delayed, replace=False ) grouped_pop.loc[delayed, "status"] = DELAYED population = pd.concat([population, grouped_pop]) return population
[docs] class UserPopInit(Event): """Population generator for ART users.""" def __init__(self, parameters: Parameters, population_size: int): """Store parameters, population size, and events to be applied. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters. population_size : int Size of population to simulate. """ super().__init__(parameters) self.population_size = population_size self.events = EventGrouping( [ BasePopulation(self.parameters, self.population_size), Status(self.parameters, ART_USER), ApplyComorbidities(self.parameters, user=True, new_init=False), add_multimorbidity, Bmi(self.parameters), sort_alphabetically, cast_type, ] ) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Generate population. Parameters ---------- population : pd.DataFrame Unused population Dataframe to keep the Dataframe in Dataframe out API. Returns ------- pd.DataFrame ART user population. """ return self.events(population)
[docs] class NonUserPopInit(Event): """Population generator for ART non-users.""" def __init__(self, parameters: Parameters, population_size: int) -> None: """Store parameters, population size, and events to be applied. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters. population_size : int Size of population to simulate. """ super().__init__(parameters) self.population_size = population_size self.events = EventGrouping( [ BasePopulation(self.parameters, self.population_size), Ltfu(self.parameters, self.population_size), Status(self.parameters, ART_NONUSER), ApplyComorbidities(self.parameters, user=False, new_init=False), add_multimorbidity, Bmi(self.parameters), sort_alphabetically, cast_type, ] ) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Generate population. Parameters ---------- population : pd.DataFrame Unused population Dataframe to keep the Dataframe in Dataframe out API. Returns ------- pd.DataFrame ART non-user population. """ return self.events(population)
[docs] class NewPopulation(Event): """Population generator for new initiators.""" def __init__(self, parameters: Parameters) -> None: """Store parameters and events to be applied. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters. """ super().__init__(parameters) self.events = EventGrouping( [ NewAges(self.parameters), YearsOutCare(self.parameters), add_age_categories, add_id, SqrtCd4nNew(self.parameters), add_default_columns_new, ApplyComorbidities(self.parameters, user=False, new_init=True), add_multimorbidity, Bmi(self.parameters), sort_alphabetically, cast_type, ] ) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Generate population based on defined list of events. Parameters ---------- population : pd.DataFrame Unused population Dataframe to keep the Dataframe in Dataframe out API. Returns ------- pd.DataFrame New ART user population. """ return self.events(population)
[docs] class PearlPopulation(Event): """Base PEARL population generator""" def __init__(self, parameters: Parameters): """Store parameters, as well as user, non-user, and new population generators. Parameters ---------- parameters : Parameters Parameters object definining a run as defined in pearl.parameters.Parameters. """ super().__init__(parameters) self.user_pop = UserPopInit(self.parameters, self.parameters.n_initial_users) self.non_user_pop = NonUserPopInit(self.parameters, self.parameters.n_initial_nonusers) self.new_pop = NewPopulation(self.parameters) @override def __call__(self, population: pd.DataFrame) -> pd.DataFrame: """Return the full PEARL base population. Parameters ---------- population : pd.DataFrame Unused population Dataframe to keep the Dataframe in Dataframe out API. Returns ------- pd.DataFrame Base PEARL population. """ user_pop = self.user_pop(pd.DataFrame([])) non_user_pop = self.non_user_pop(pd.DataFrame([])) new_pop = self.new_pop(pd.DataFrame([])) population = ( pd.concat( [ user_pop, non_user_pop, new_pop, ] ) .fillna(0) .drop(columns=["index"]) ) population = population.reset_index() population["id"] = np.array(range(population.index.size)) population = population.set_index(["id"]) return population