Source code for pearl.parameters

"""
Parameters class that stores all parameters needed to run the PEARL model.
"""

from pathlib import Path
from typing import List, Optional, Tuple

import numpy as np
import pandas as pd

from pearl.definitions import ROOT_DIR, STAGE0, STAGE1, STAGE2, STAGE3


[docs] class Parameters: """This class holds all the parameters needed for PEARL to run.""" def __init__( self, output_folder: Path, replication: int, group_name: str, new_dx: str, start_year: int, final_year: int, mortality_model: str, mortality_threshold_flag: bool, idu_threshold: str, seed: int, history: Optional[List[str]] = None, final_state: bool = False, ignore_columns: Optional[List[str]] = None, bmi_intervention_scenario: int = 0, bmi_intervention_start_year: int = 2020, bmi_intervention_end_year: int = 2030, bmi_intervention_coverage: float = 1.0, bmi_intervention_effectiveness: float = 1.0, sa_variables: Optional[list[str]] = None, ): """ Takes the path to the parameters.h5 file, the path to the folder containing rerun data if the run is a rerun, the output folder, the group name, a flag indicating if the simulation is for aim 2, a flag indicating whether to record detailed comorbidity information, the type of new_dx parameter to use, the final year of the model, the mortality model to use, whether to use a mortality threshold, verbosity, the sensitivity analysis dict, the classic sensitivity analysis dict, and the aim 2 sensitivity analysis dict. Parameters ---------- output_folder : Path Folder to write simulation outputs to. replication : int replication number group_name : str Subpopulation name from [msm_white_male, msm_black_male, msm_hisp_male, idu_white_male, idu_black_male, idu_hisp_male, idu_white_female, idu_black_female, idu_hisp_female, het_white_male, het_black_male, het_hisp_male, het_white_female, het_black_female, het_hisp_female]. new_dx : str new diagnosis model from [base, ehe]. start_year : int Start year of simulation. Default is 2009. final_year : int Final year of simulation. The simulation will run from 2009 until the final year. mortality_model : str Which mortality model to run from [by_sex_race_risk, by_sex_race, by_sex, overall] mortality_threshold_flag : bool To use the mortality threshold or not. idu_threshold : str IDU threshold from [2x, 5x, 10x] seed : int Value for random number generation seeding. history: bool Whether or not to store history final_state: bool Whether or not to only store final state ignore_columns: list[str] List of columns to ignore when storing history bmi_intervention_scenario : int, optional BMI intervention to apply from [0 for no intervention, or 1, 2, 3], by default 0 bmi_intervention_start_year : int, optional Year to start BMI intervention, by default 2020 bmi_intervention_end_year : int, optional Year to end BMI intervention, by default 2030 bmi_intervention_coverage : float, optional Probability of eligible population that receives BMI intervention between 0 and 1 , by default 1.0 bmi_intervention_effectiveness : float, optional Efficacy of BMI intervention for those that do receive it between 0 and 1 , by default 1.0 sa_variables : list[str] variables for sensitivity analysis Raises ------ ValueError Raises value error if inputs are outside of the described acceptable values. """ # check to ensure a proper group_name is provided if group_name not in [ "msm_white_male", "msm_black_male", "msm_hisp_male", "idu_white_male", "idu_black_male", "idu_hisp_male", "idu_white_female", "idu_black_female", "idu_hisp_female", "het_white_male", "het_black_male", "het_hisp_male", "het_white_female", "het_black_female", "het_hisp_female", ]: raise ValueError("group_name not supported") # Save inputs as class attributes self.parameters_path: Path = ROOT_DIR / "parameter_weights/parameters.h5" """Path to the parameters file for the PEARL model.""" self.output_folder: Path = output_folder """File path to the folder where PEARL outputs will be saved.""" self.replication: int = replication """Replication number for the model run.""" self.group_name: str = group_name """Group name for the model run.""" self.new_dx_val: str = new_dx """Diagnosis model to use for the model run.""" self.start_year: int = start_year """Start year of the model run.""" self.final_year: int = final_year """Final year of the model run.""" self.year: int = start_year """Current year of the model run, initialized to start_year.""" self.mortality_model: str = mortality_model """Mortality model to use for the model run.""" self.mortality_threshold_flag: bool = mortality_threshold_flag """Mortality threshold flag for the model run.""" self.idu_threshold: float = idu_threshold """IDU threshold for the model run.""" self.seed: int = seed """Random seed for the model run.""" self.random_state: np.random.RandomState = np.random.RandomState(seed=seed) """Random state object for the model run, initialized with seed using np.random.RandomState.""" self.init_random_state: np.random.RandomState = np.random.RandomState(seed=replication) """Random state object for parameter initialization, initialized with replication number using np.random.RandomState.""" self.history: bool = history """Whether or not to store history.""" self.final_state: bool = final_state """Whether or not to only store final state.""" self.ignore_columns: list[str] = ignore_columns """Columns to ignore when storing history.""" self.bmi_intervention_scenario: int = bmi_intervention_scenario """BMI intervention scenario to apply for the model run.""" self.bmi_intervention_start_year: int = bmi_intervention_start_year """BMI intervention start year for the model run.""" self.bmi_intervention_end_year: int = bmi_intervention_end_year """Bmi intervention end year for the model run.""" self.bmi_intervention_coverage: float = bmi_intervention_coverage """BMI intervention coverage for the model run.""" self.bmi_intervention_effectiveness: float = bmi_intervention_effectiveness """BMI intervention effectiveness for the model run.""" self.sa_variables: list[str] = sa_variables """Sensitivity analysis variables for the model run.""" # 2009 population self.on_art_2009: pd.DataFrame = pd.read_hdf(self.parameters_path, "on_art_2009").loc[ group_name ] """Parameter for number of people on ART in 2009 for given group.""" self.age_in_2009: pd.DataFrame = pd.read_hdf(self.parameters_path, "age_in_2009").loc[ group_name ] """Parameter for age distribution of people in 2009 for given group.""" self.h1yy_by_age_2009: pd.DataFrame = pd.read_hdf( self.parameters_path, "h1yy_by_age_2009" ).loc[group_name] """Paramters for year of HIV diagnosis by age in 2009 for given group.""" self.cd4n_by_h1yy_2009: pd.DataFrame = pd.read_hdf( self.parameters_path, "cd4n_by_h1yy_2009" ).loc[group_name] """Parameters for CD4 count by year of HIV diagnosis in 2009 for given group.""" # New initiator statistics self.linkage_to_care: pd.DataFrame = pd.read_hdf( self.parameters_path, "linkage_to_care" ).loc[group_name] """Parameter for linkage to care for given group.""" self.age_by_h1yy: pd.DataFrame = pd.read_hdf(self.parameters_path, "age_by_h1yy").loc[ group_name ] """Parameter for age by year of HIV diagnosis for given group.""" self.cd4n_by_h1yy: pd.DataFrame = pd.read_hdf(self.parameters_path, "cd4n_by_h1yy").loc[ group_name ] """Parameter for CD4 count by year of HIV diagnosis for given group.""" # Choose new ART initiator model self.new_dx: pd.DataFrame """Parameter for new ART initiators. Chosen based on new_dx input, either "base" or "ehe" for given group.""" if new_dx == "base": self.new_dx = pd.read_hdf(self.parameters_path, "new_dx").loc[group_name] elif new_dx == "ehe": self.new_dx = pd.read_hdf(self.parameters_path, "new_dx_ehe").loc[group_name] else: raise ValueError("Invalid new diagnosis file specified") # Choose mortality model mortality_model_str: str """Mortality model for the run.""" if mortality_model == "by_sex_race_risk": mortality_model_str = "" else: mortality_model_str = "_" + mortality_model if (mortality_model != "by_sex_race_risk") and ( mortality_model != "by_sex_race_risk_2015" and (idu_threshold != "2x") ): raise ValueError( "Alternative mortality models with idu threshold changes is not implemented" ) # Mortality In Care self.mortality_in_care: pd.DataFrame = pd.read_hdf( self.parameters_path, f"mortality_in_care{mortality_model_str}" ).loc[group_name] """Parameter for mortality in care for given group. Chosen based on mortality_model input.""" self.mortality_in_care_age: pd.DataFrame = pd.read_hdf( self.parameters_path, f"mortality_in_care_age{mortality_model_str}" ).loc[group_name] """Parameter for mortality in care by age for given group. Chosen based on mortality_model input.""" self.mortality_in_care_sqrtcd4: pd.DataFrame = pd.read_hdf( self.parameters_path, f"mortality_in_care_sqrtcd4{mortality_model_str}" ).loc[group_name] """Parameter for mortality in care by sqrt CD4 count for given group. Chosen based on mortality_model input.""" self.mortality_in_care_vcov: pd.DataFrame = pd.read_hdf( self.parameters_path, "mortality_in_care_vcov" ).loc[group_name] """Parameter for variance-covariance matrix for mortality in care for given group.""" # Mortality Out Of Care self.mortality_out_care: pd.DataFrame = pd.read_hdf( self.parameters_path, f"mortality_out_care{mortality_model_str}" ).loc[group_name] """Parameter for mortality out of care for given group. Chosen based on mortality_model input.""" self.mortality_out_care_age: pd.DataFrame = pd.read_hdf( self.parameters_path, f"mortality_out_care_age{mortality_model_str}" ).loc[group_name] """Parameter for mortality out of care by age for given group. Chosen based on mortality_model input.""" self.mortality_out_care_tv_sqrtcd4: pd.DataFrame = pd.read_hdf( self.parameters_path, f"mortality_out_care_tv_sqrtcd4{mortality_model_str}" ).loc[group_name] """Parameter for mortality out of care by time-varying sqrt CD4 count for given group. Chosen based on mortality_model input.""" self.mortality_out_care_vcov: pd.DataFrame = pd.read_hdf( self.parameters_path, "mortality_out_care_vcov" ).loc[group_name] """Parameter for variance-covariance matrix for mortality out of care for given group.""" # Mortality Threshold self.mortality_threshold: pd.DataFrame """Parameter for mortality threshold for given group. Chosen based on idu_threshold input. If idu_threshold is not 2x, then the mortality threshold is dependent on the idu_threshold. If idu_threshold is 2x, then the mortality threshold is dependent on the mortality_model.""" if idu_threshold != "2x": self.mortality_threshold = pd.read_hdf( self.parameters_path, f"mortality_threshold_idu_{idu_threshold}" ).loc[group_name] else: self.mortality_threshold = pd.read_hdf( self.parameters_path, f"mortality_threshold{mortality_model_str}" ).loc[group_name] # Loss To Follow Up self.loss_to_follow_up = pd.read_hdf(self.parameters_path, "loss_to_follow_up").loc[ group_name ] """Parameter for loss to follow up for given group.""" self.ltfu_knots: pd.DataFrame = pd.read_hdf(self.parameters_path, "ltfu_knots").loc[ group_name ] """Parameter for loss to follow up knots for given group.""" self.loss_to_follow_up_vcov: pd.DataFrame = pd.read_hdf( self.parameters_path, "loss_to_follow_up_vcov" ).loc[group_name] """Parameter for variance-covariance matrix for loss to follow up for given group.""" # Cd4 Increase self.cd4_increase: pd.DataFrame = pd.read_hdf(self.parameters_path, "cd4_increase").loc[ group_name ] """Parameter for CD4 increase for given group.""" self.cd4_increase_vcov: pd.DataFrame = pd.read_hdf( self.parameters_path, "cd4_increase_vcov" ).loc[group_name] """Parameter for variance-covariance matrix for CD4 increase for given group.""" self.cd4_increase_knots_age: pd.DataFrame = pd.read_hdf( self.parameters_path, "cd4_increase_knots_age" ).loc[group_name] """Parameter for CD4 increase knots by age for given group.""" self.cd4_increase_knots_cd4_init: pd.DataFrame = pd.read_hdf( self.parameters_path, "cd4_increase_knots_cd4_init" ).loc[group_name] """Parameter for CD4 increase knots by initial CD4 count for given group.""" self.cd4_increase_knots_time_from_h1yy: pd.DataFrame = pd.read_hdf( self.parameters_path, "cd4_increase_knots_time_from_h1yy" ).loc[group_name] """Parameter for CD4 increase knots by time from HIV diagnosis for given group.""" # Cd4 Decrease self.cd4_decrease: pd.DataFrame = pd.read_hdf(self.parameters_path, "cd4_decrease").loc[ "all" ] """Parameter for CD4 decrease for all groups.""" self.cd4_decrease_vcov: pd.DataFrame = pd.read_hdf( self.parameters_path, "cd4_decrease_vcov" ) """Parameter for variance-covariance matrix for CD4 decrease for all groups.""" # Years out of Care self.years_out_of_care: pd.DataFrame = pd.read_hdf( self.parameters_path, "years_out_of_care" ) """Parameter for years out of care for all groups.""" # BMI self.pre_art_bmi: pd.DataFrame = pd.read_hdf(self.parameters_path, "pre_art_bmi").loc[ group_name ] """Parameter for pre-ART BMI for given group.""" self.pre_art_bmi_model: int = ( pd.read_hdf(self.parameters_path, "pre_art_bmi_model").loc[group_name].values[0] ) """Parameter for pre-ART BMI model for given group.""" self.pre_art_bmi_age_knots: pd.DataFrame = pd.read_hdf( self.parameters_path, "pre_art_bmi_age_knots" ).loc[group_name] """Parameter for pre-ART BMI age knots for given group.""" self.pre_art_bmi_h1yy_knots: pd.DataFrame = pd.read_hdf( self.parameters_path, "pre_art_bmi_h1yy_knots" ).loc[group_name] """Parameter for pre-ART BMI year of HIV diagnosis knots for given group.""" self.pre_art_bmi_rse: float = ( pd.read_hdf(self.parameters_path, "pre_art_bmi_rse").loc[group_name].values[0] ) """Parameter for pre-ART BMI residual standard error for given group.""" self.post_art_bmi: pd.DataFrame = pd.read_hdf(self.parameters_path, "post_art_bmi").loc[ group_name ] """Parameter for post-ART BMI for given group.""" self.post_art_bmi_age_knots: pd.DataFrame = pd.read_hdf( self.parameters_path, "post_art_bmi_age_knots" ).loc[group_name] """Parameter for post-ART BMI age knots for given group.""" self.post_art_bmi_pre_art_bmi_knots: pd.DataFrame = pd.read_hdf( self.parameters_path, "post_art_bmi_pre_art_bmi_knots" ).loc[group_name] """Parameter for post-ART BMI pre-ART BMI knots for given group.""" self.post_art_bmi_cd4_knots: pd.DataFrame = pd.read_hdf( self.parameters_path, "post_art_bmi_cd4_knots" ).loc[group_name] """Parameter for post-ART BMI CD4 count knots for given group.""" self.post_art_bmi_cd4_post_knots: pd.DataFrame = pd.read_hdf( self.parameters_path, "post_art_bmi_cd4_post_knots" ).loc[group_name] """Parameter for post-ART BMI CD4 count after ART initiation knots for given group.""" self.post_art_bmi_rse: float = ( pd.read_hdf(self.parameters_path, "post_art_bmi_rse").loc[group_name].values[0] ) """Parameter for post-ART BMI residual standard error for given group.""" # BMI Intervention parameters if bmi_intervention_scenario not in [0, 1, 2, 3]: raise ValueError("bmi_intervention_scenario values only supported for 0, 1, 2, and 3") self.bmi_intervention_scenario: int = bmi_intervention_scenario """Parameter for BMI intervention scenario to apply for given group. Must be 0, 1, 2, or 3. 0 corresponds to no intervention, 1 corresponds to a lifestyle intervention for those with BMI over 25, 2 corresponds to a lifestyle intervention for those with BMI over 30, and 3 corresponds to a pharmacological intervention for those with BMI over 30.""" self.bmi_intervention_start_year: int = bmi_intervention_start_year """Parameter for BMI intervention start year for given group.""" self.bmi_intervention_end_year: int = bmi_intervention_end_year """Parameter for BMI intervention end year for given group.""" if bmi_intervention_coverage < 0 or bmi_intervention_coverage > 1: raise ValueError("bmi_intervention_coverage must be between 0 and 1 inclusive") self.bmi_intervention_coverage: float = bmi_intervention_coverage """Parameter for BMI intervention coverage for given group. Must be between 0 and 1 inclusive. Represents the proportion of eligible population that receives the BMI intervention.""" if bmi_intervention_effectiveness < 0 or bmi_intervention_effectiveness > 1: raise ValueError("bmi_intervention_effectiveness must be between 0 and 1 inclusive") self.bmi_intervention_effectiveness: float = bmi_intervention_effectiveness """Parameter for BMI intervention effectiveness for given group. Must be between 0 and 1 inclusive. Represents the proportion of eligible population that receives the BMI intervention.""" # Comorbidities self.prev_users_dict: dict[str, pd.DataFrame] = { comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_prev_users").loc[ group_name ] for comorbidity in STAGE0 + STAGE1 + STAGE2 + STAGE3 } """Parameter for prevalence of comorbidity among users for given group. Dictionary with keys for each comorbidity and values as the prevalence of that comorbidity among users for the given group.""" self.prev_inits_dict: dict[str, pd.DataFrame] = { comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_prev_inits").loc[ group_name ] for comorbidity in STAGE0 + STAGE1 + STAGE2 + STAGE3 } """Parameter for prevalence of comorbidity among new initiators for given group. Dictionary with keys for each comorbidity and values as the prevalence of that comorbidity among new initiators for the given group.""" self.comorbidity_coeff_dict: dict[str, pd.DataFrame] = { comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_coeff").loc[group_name] for comorbidity in STAGE1 + STAGE2 + STAGE3 } """Parameter for coefficient for comorbidity in the CD4 decrease model for given group. Dictionary with keys for each comorbidity and values as the coefficient for that comorbidity in the CD4 decrease model for the given group.""" self.delta_bmi_dict: dict[str, pd.DataFrame] = { comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_delta_bmi").loc[ group_name ] for comorbidity in STAGE2 + STAGE3 } """Parameter for change in BMI associated with comorbidity for given group. Dictionary with keys for each comorbidity and values as the change in BMI associated with that comorbidity for the given group.""" self.post_art_bmi_dict: dict[str, pd.DataFrame] = { comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_post_art_bmi").loc[ group_name ] for comorbidity in STAGE2 + STAGE3 } """Parameter for post-ART BMI associated with comorbidity for given group. Dictionary with keys for each comorbidity and values as the post-ART BMI associated with that comorbidity for the given group.""" # Aim 2 Mortality self.mortality_in_care_co: pd.DataFrame = pd.read_hdf( self.parameters_path, "mortality_in_care_co" ).loc[group_name] """Parameter for mortality in care for given group. Coefficients for the mortality in care model for the given group.""" self.mortality_in_care_post_art_bmi: pd.DataFrame = pd.read_hdf( self.parameters_path, "mortality_in_care_post_art_bmi" ).loc[group_name] """Parameter for mortality in care for given group. Coefficients for the post-ART BMI variable in the mortality in care model for the given group.""" self.mortality_out_care_co: pd.DataFrame = pd.read_hdf( self.parameters_path, "mortality_out_care_co" ).loc[group_name] """Parameter for mortality out of care for given group. Coefficients for the mortality out of care model for the given group.""" self.mortality_out_care_post_art_bmi: pd.DataFrame = pd.read_hdf( self.parameters_path, "mortality_out_care_post_art_bmi" ).loc[group_name] """Parameter for mortality out of care for given group. Coefficients for the post-ART BMI variable in the mortality out of care model for the given group.""" # Year and age ranges self.AGES: np.ndarray = np.arange(18, 87) """Parameter for age range of agents in the model. Minimum age is 18 and maximum age is 86.""" self.AGE_CATS: np.ndarray = np.arange(2, 8) """Parameter for age categories for agents in the model. Age categories are defined as 18-29, 30-39, 40-49, 50-59, 60-69, and 70-79.""" self.SIMULATION_YEARS: np.ndarray = np.arange(2010, final_year + 1) """Parameter for years of the simulation. Simulation runs from 2010 to final_year.""" self.ALL_YEARS: np.ndarray = np.arange(2000, final_year + 1) """Parameter for all years in the model. Range from 2000 to final_year.""" self.INITIAL_YEARS: np.ndarray = np.arange(2000, 2010) """Parameter for initial years of the model. Range from 2000 to 2009.""" self.CD4_BINS: np.ndarray = np.arange(2001) """Parameter for CD4 count bins for the model. Range from 0 to 2000.""" # Sensitivity Analysis self.sa_variables: list[str] = sa_variables """Parameter for sensitivity analysis variables. List of variables to include in sensitivity analysis.""" self.sa_scalars: dict[str, float] = {} """Parameter for sensitivity analysis scalars. Dictionary with keys for each variable included in sensitivity analysis and values as the scalar to multiply that variable by for the sensitivity analysis.""" if self.sa_variables: for comorbidity in self.prev_users_dict: if f"{comorbidity}_prevalence_prev" in self.sa_variables: self.sa_scalars[f"{comorbidity}_prevalence_prev"] = ( self.init_random_state.uniform(0.8, 1.2) ) self.prev_users_dict[comorbidity] *= self.sa_scalars[ f"{comorbidity}_prevalence_prev" ] for comorbidity in self.prev_inits_dict: if f"{comorbidity}_prevalence" in self.sa_variables: self.sa_scalars[f"{comorbidity}_prevalence"] = self.init_random_state.uniform( 0.8, 1.2 ) self.prev_inits_dict[comorbidity] *= self.sa_scalars[ f"{comorbidity}_prevalence" ] for comorbidity in STAGE0 + STAGE1 + STAGE2 + STAGE3: if f"{comorbidity}_incidence" in self.sa_variables: self.sa_scalars[f"{comorbidity}_incidence"] = self.init_random_state.uniform( 0.8, 1.2 ) if "pre_art_bmi" in self.sa_variables: self.sa_scalars["pre_art_bmi"] = self.init_random_state.uniform(0.8, 1.2) if "post_art_bmi" in self.sa_variables: self.sa_scalars["post_art_bmi"] = self.init_random_state.uniform(0.8, 1.2) if "art_initiators" in self.sa_variables: self.sa_scalars["art_initiators"] = self.init_random_state.uniform(0.8, 1.2) # Draw a random value between predicted and 2018 predicted value for years greater than # 2018 # TODO refactor this and save it in parameters self.age_by_h1yy["estimate"] = ( self.random_state.rand(len(self.age_by_h1yy.index)) * (self.age_by_h1yy["high_value"] - self.age_by_h1yy["low_value"]) ) + self.age_by_h1yy["low_value"] self.cd4n_by_h1yy["estimate"] = ( self.random_state.rand(len(self.cd4n_by_h1yy.index)) * (self.cd4n_by_h1yy["high_value"] - self.cd4n_by_h1yy["low_value"]) ) + self.cd4n_by_h1yy["low_value"] self.n_initial_users = self.on_art_2009.iloc[0] """Parameter for number of ART users in 2009 for given group, taken from on_art_2009 parameter.""" self.n_initial_nonusers: int """Parameter for number of ART non-users in 2009 for given group. Calculated based on the number of new ART initiators each year and the assumption that those not initiating ART in the first few years of the model are the initial ART non-users.""" self.n_new_agents: int """Parameter for number of new agents entering the model each year. Calculated based on the number of new ART initiators each year and the number of new ART non-users each year.""" # Simulate number of new art initiators and initial nonusers self.n_initial_nonusers: int self.n_new_agents: int self.n_initial_nonusers, self.n_new_agents = self.simulate_new_dx() self.save_parameters()
[docs] def save_parameters(self) -> None: """ Save all parameters as a dataframe. """ param_dict = { "replication": self.replication, "group": self.group_name, "new_dx": self.new_dx_val, "final_year": self.final_year, "mortality_model": self.mortality_model, "mortality_threshold_flag": self.mortality_threshold_flag, "idu_threshold": self.idu_threshold, "seed": self.seed, "bmi_intervention_scenario": self.bmi_intervention_scenario, "bmi_intervention_start_year": self.bmi_intervention_start_year, "bmi_intervention_end_year": self.bmi_intervention_end_year, "bmi_intervention_coverage": self.bmi_intervention_coverage, "bmi_intervention_effectiveness": self.bmi_intervention_effectiveness, } for scalar in self.sa_scalars: param_dict[scalar] = self.sa_scalars[scalar] self.param_dataframe = pd.DataFrame(param_dict, index=[0]) if self.output_folder: self.param_dataframe.to_parquet( self.output_folder / "parameters.parquet", compression="zstd" )
[docs] def simulate_new_dx(self) -> Tuple[int, pd.DataFrame]: """ Return the number of ART non-users in 2009 as an integer and the number of agents entering the model each year as art users and non-users as a dataframe. Draw number of new diagnoses from a uniform distribution between upper and lower bounds. Calculate number of new art initiators by assuming a certain number link in the first year as estimated by a linear regression on CDC data, capped at 95%. We assume that 40% of the remaining population links to care over the next 3 years. We assume that 70% of those linking to care begin ART, rising to 85% in 2011 and 97% afterwards. We take the number of people not initiating ART 2006 - 2009 in this calculation to be the out of care population size in 2009 for our simulation. Parameters ---------- parameters : Parameters Parameter object with new_dx and linkage_to_care attributes. random_state : np.random.RandomState Random State object for random number sampling. Returns ------- Tuple[int, pd.DataFrame] (number of ART non-users in 2009 as an integer, number of agents entering the model each year as art users and non-users as a dataframe) """ new_dx = self.new_dx.copy() linkage_to_care = self.linkage_to_care # Draw new dx from a uniform distribution between upper and lower for 2016-final_year new_dx["n_dx"] = ( new_dx["lower"] + (new_dx["upper"] - new_dx["lower"]) * self.random_state.uniform() ) # Only a proportion of new diagnoses link to care and 40% of the remaining link # in the next 3 years new_dx["unlinked"] = new_dx["n_dx"] * (1 - linkage_to_care["link_prob"]) new_dx["gardner_per_year"] = new_dx["unlinked"] * 0.4 / 3.0 new_dx["year0"] = new_dx["n_dx"] * linkage_to_care["link_prob"] new_dx["year1"] = new_dx["gardner_per_year"].shift(1, fill_value=0) new_dx["year2"] = new_dx["gardner_per_year"].shift(2, fill_value=0) new_dx["year3"] = new_dx["gardner_per_year"].shift(3, fill_value=0) new_dx["total_linked"] = ( new_dx["year0"] + new_dx["year1"] + new_dx["year2"] + new_dx["year3"] ) # Proportion of those linked to care start ART new_dx["art_initiators"] = (new_dx["total_linked"] * linkage_to_care["art_prob"]).astype( int ) new_dx["art_delayed"] = ( new_dx["total_linked"] * (1 - linkage_to_care["art_prob"]) ).astype(int) # TODO make the start and end dates here parametric # Count those not starting art 2006 - 2009 as initial ART nonusers n_initial_nonusers = new_dx.loc[np.arange(2006, 2010), "art_delayed"].sum() # Compile list of number of new agents to be introduced in the model new_agents = new_dx.loc[ np.arange(2010, new_dx.index.max() + 1), ["art_initiators", "art_delayed"] ] if self.sa_variables and "art_initiators" in self.sa_variables: new_agents["art_initiators"] *= self.sa_scalars["art_initiators"] new_agents["art_delayed"] *= self.sa_scalars["art_initiators"] new_agents = new_agents.astype({"art_initiators": int, "art_delayed": int}) return n_initial_nonusers, new_agents