Source code for pearl.parameters

"""
Parameters class that stores all parameters needed to run the PEARL model.
"""

from pathlib import Path
from typing import List, Optional, Tuple

import numpy as np
import pandas as pd

from pearl.definitions import ROOT_DIR, STAGE0, STAGE1, STAGE2, STAGE3



[docs]
class Parameters:
    """This class holds all the parameters needed for PEARL to run."""

    def __init__(
        self,
        output_folder: Path,
        replication: int,
        group_name: str,
        new_dx: str,
        start_year: int,
        final_year: int,
        mortality_model: str,
        mortality_threshold_flag: bool,
        idu_threshold: str,
        seed: int,
        history: Optional[List[str]] = None,
        final_state: bool = False,
        ignore_columns: Optional[List[str]] = None,
        bmi_intervention_scenario: int = 0,
        bmi_intervention_start_year: int = 2020,
        bmi_intervention_end_year: int = 2030,
        bmi_intervention_coverage: float = 1.0,
        bmi_intervention_effectiveness: float = 1.0,
        sa_variables: Optional[list[str]] = None,
    ):
        """
        Takes the path to the parameters.h5 file, the path to the folder containing rerun data
        if the run is a rerun, the output folder, the group name, a flag indicating if the
        simulation is for aim 2, a flag indicating whether to record detailed comorbidity
        information, the type of new_dx parameter to use, the final year of the model, the
        mortality model to use, whether to use a mortality threshold, verbosity, the sensitivity
        analysis dict, the classic sensitivity analysis dict, and the aim 2 sensitivity
        analysis dict.

        Parameters
        ----------
        output_folder : Path
            Folder to write simulation outputs to.
        replication : int
            replication number
        group_name : str
            Subpopulation name from [msm_white_male, msm_black_male, msm_hisp_male, idu_white_male,
            idu_black_male, idu_hisp_male, idu_white_female, idu_black_female, idu_hisp_female,
            het_white_male, het_black_male, het_hisp_male, het_white_female, het_black_female,
            het_hisp_female].
        new_dx : str
            new diagnosis model from [base, ehe].
        start_year : int
            Start year of simulation. Default is 2009.
        final_year : int
            Final year of simulation. The simulation will run from 2009 until the final year.
        mortality_model : str
            Which mortality model to run from [by_sex_race_risk, by_sex_race, by_sex, overall]
        mortality_threshold_flag : bool
            To use the mortality threshold or not.
        idu_threshold : str
            IDU threshold from [2x, 5x, 10x]
        seed : int
            Value for random number generation seeding.
        history: bool
            Whether or not to store history
        final_state: bool
            Whether or not to only store final state
        ignore_columns: list[str]
            List of columns to ignore when storing history
        bmi_intervention_scenario : int, optional
            BMI intervention to apply from [0 for no intervention, or 1, 2, 3], by default 0
        bmi_intervention_start_year : int, optional
            Year to start BMI intervention, by default 2020
        bmi_intervention_end_year : int, optional
            Year to end BMI intervention, by default 2030
        bmi_intervention_coverage : float, optional
            Probability of eligible population that receives BMI intervention between 0 and 1
            , by default 1.0
        bmi_intervention_effectiveness : float, optional
            Efficacy of BMI intervention for those that do receive it between 0 and 1
            , by default 1.0
        sa_variables : list[str]
            variables for sensitivity analysis
        Raises
        ------
        ValueError
            Raises value error if inputs are outside of the described acceptable values.
        """

        # check to ensure a proper group_name is provided
        if group_name not in [
            "msm_white_male",
            "msm_black_male",
            "msm_hisp_male",
            "idu_white_male",
            "idu_black_male",
            "idu_hisp_male",
            "idu_white_female",
            "idu_black_female",
            "idu_hisp_female",
            "het_white_male",
            "het_black_male",
            "het_hisp_male",
            "het_white_female",
            "het_black_female",
            "het_hisp_female",
        ]:
            raise ValueError("group_name not supported")

        # Save inputs as class attributes
        self.parameters_path: Path = ROOT_DIR / "parameter_weights/parameters.h5"
        """Path to the parameters file for the PEARL model."""
        self.output_folder: Path = output_folder
        """File path to the folder where PEARL outputs will be saved."""
        self.replication: int = replication
        """Replication number for the model run."""
        self.group_name: str = group_name
        """Group name for the model run."""
        self.new_dx_val: str = new_dx
        """Diagnosis model to use for the model run."""
        self.start_year: int = start_year
        """Start year of the model run."""
        self.final_year: int = final_year
        """Final year of the model run."""
        self.year: int = start_year
        """Current year of the model run, initialized to start_year."""
        self.mortality_model: str = mortality_model
        """Mortality model to use for the model run."""
        self.mortality_threshold_flag: bool = mortality_threshold_flag
        """Mortality threshold flag for the model run."""
        self.idu_threshold: float = idu_threshold
        """IDU threshold for the model run."""
        self.seed: int = seed
        """Random seed for the model run."""
        self.random_state: np.random.RandomState = np.random.RandomState(seed=seed)
        """Random state object for the model run, initialized with seed using 
        np.random.RandomState."""
        self.init_random_state: np.random.RandomState = np.random.RandomState(seed=replication)
        """Random state object for parameter initialization, initialized with replication number 
        using np.random.RandomState."""
        self.history: bool = history
        """Whether or not to store history."""
        self.final_state: bool = final_state
        """Whether or not to only store final state."""
        self.ignore_columns: list[str] = ignore_columns
        """Columns to ignore when storing history."""
        self.bmi_intervention_scenario: int = bmi_intervention_scenario
        """BMI intervention scenario to apply for the model run."""
        self.bmi_intervention_start_year: int = bmi_intervention_start_year
        """BMI intervention start year for the model run."""
        self.bmi_intervention_end_year: int = bmi_intervention_end_year
        """Bmi intervention end year for the model run."""
        self.bmi_intervention_coverage: float = bmi_intervention_coverage
        """BMI intervention coverage for the model run."""
        self.bmi_intervention_effectiveness: float = bmi_intervention_effectiveness
        """BMI intervention effectiveness for the model run."""
        self.sa_variables: list[str] = sa_variables
        """Sensitivity analysis variables for the model run."""

        # 2009 population
        self.on_art_2009: pd.DataFrame = pd.read_hdf(self.parameters_path, "on_art_2009").loc[
            group_name
        ]
        """Parameter for number of people on ART in 2009 for given group."""
        self.age_in_2009: pd.DataFrame = pd.read_hdf(self.parameters_path, "age_in_2009").loc[
            group_name
        ]
        """Parameter for age distribution of people in 2009 for given group."""
        self.h1yy_by_age_2009: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "h1yy_by_age_2009"
        ).loc[group_name]
        """Paramters for year of HIV diagnosis by age in 2009 for given group."""
        self.cd4n_by_h1yy_2009: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "cd4n_by_h1yy_2009"
        ).loc[group_name]
        """Parameters for CD4 count by year of HIV diagnosis in 2009 for given group."""

        # New initiator statistics
        self.linkage_to_care: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "linkage_to_care"
        ).loc[group_name]
        """Parameter for linkage to care for given group."""
        self.age_by_h1yy: pd.DataFrame = pd.read_hdf(self.parameters_path, "age_by_h1yy").loc[
            group_name
        ]
        """Parameter for age by year of HIV diagnosis for given group."""
        self.cd4n_by_h1yy: pd.DataFrame = pd.read_hdf(self.parameters_path, "cd4n_by_h1yy").loc[
            group_name
        ]
        """Parameter for CD4 count by year of HIV diagnosis for given group."""
        # Choose new ART initiator model
        self.new_dx: pd.DataFrame
        """Parameter for new ART initiators. Chosen based on new_dx input, either "base" or "ehe" 
        for given group."""
        if new_dx == "base":
            self.new_dx = pd.read_hdf(self.parameters_path, "new_dx").loc[group_name]
        elif new_dx == "ehe":
            self.new_dx = pd.read_hdf(self.parameters_path, "new_dx_ehe").loc[group_name]
        else:
            raise ValueError("Invalid new diagnosis file specified")
        # Choose mortality model
        mortality_model_str: str
        """Mortality model for the run."""
        if mortality_model == "by_sex_race_risk":
            mortality_model_str = ""
        else:
            mortality_model_str = "_" + mortality_model

        if (mortality_model != "by_sex_race_risk") and (
            mortality_model != "by_sex_race_risk_2015" and (idu_threshold != "2x")
        ):
            raise ValueError(
                "Alternative mortality models with idu threshold changes is not implemented"
            )

        # Mortality In Care
        self.mortality_in_care: pd.DataFrame = pd.read_hdf(
            self.parameters_path, f"mortality_in_care{mortality_model_str}"
        ).loc[group_name]
        """Parameter for mortality in care for given group. Chosen based on mortality_model 
        input."""
        self.mortality_in_care_age: pd.DataFrame = pd.read_hdf(
            self.parameters_path, f"mortality_in_care_age{mortality_model_str}"
        ).loc[group_name]
        """Parameter for mortality in care by age for given group. Chosen based on mortality_model 
        input."""
        self.mortality_in_care_sqrtcd4: pd.DataFrame = pd.read_hdf(
            self.parameters_path, f"mortality_in_care_sqrtcd4{mortality_model_str}"
        ).loc[group_name]
        """Parameter for mortality in care by sqrt CD4 count for given group. 
        Chosen based on mortality_model input."""
        self.mortality_in_care_vcov: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "mortality_in_care_vcov"
        ).loc[group_name]
        """Parameter for variance-covariance matrix for mortality in care for given group."""

        # Mortality Out Of Care
        self.mortality_out_care: pd.DataFrame = pd.read_hdf(
            self.parameters_path, f"mortality_out_care{mortality_model_str}"
        ).loc[group_name]
        """Parameter for mortality out of care for given group. Chosen based on mortality_model 
        input."""
        self.mortality_out_care_age: pd.DataFrame = pd.read_hdf(
            self.parameters_path, f"mortality_out_care_age{mortality_model_str}"
        ).loc[group_name]
        """Parameter for mortality out of care by age for given group. Chosen based on 
        mortality_model input."""
        self.mortality_out_care_tv_sqrtcd4: pd.DataFrame = pd.read_hdf(
            self.parameters_path, f"mortality_out_care_tv_sqrtcd4{mortality_model_str}"
        ).loc[group_name]
        """Parameter for mortality out of care by time-varying sqrt CD4 count for given group. 
        Chosen based on mortality_model input."""
        self.mortality_out_care_vcov: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "mortality_out_care_vcov"
        ).loc[group_name]
        """Parameter for variance-covariance matrix for mortality out of care for given group."""

        # Mortality Threshold
        self.mortality_threshold: pd.DataFrame
        """Parameter for mortality threshold for given group. Chosen based on idu_threshold input. 
        If idu_threshold is not 2x, then the mortality threshold is dependent on the idu_threshold. 
        If idu_threshold is 2x, then the mortality threshold is dependent on the mortality_model."""
        if idu_threshold != "2x":
            self.mortality_threshold = pd.read_hdf(
                self.parameters_path, f"mortality_threshold_idu_{idu_threshold}"
            ).loc[group_name]
        else:
            self.mortality_threshold = pd.read_hdf(
                self.parameters_path, f"mortality_threshold{mortality_model_str}"
            ).loc[group_name]

        # Loss To Follow Up
        self.loss_to_follow_up = pd.read_hdf(self.parameters_path, "loss_to_follow_up").loc[
            group_name
        ]
        """Parameter for loss to follow up for given group."""
        self.ltfu_knots: pd.DataFrame = pd.read_hdf(self.parameters_path, "ltfu_knots").loc[
            group_name
        ]
        """Parameter for loss to follow up knots for given group."""
        self.loss_to_follow_up_vcov: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "loss_to_follow_up_vcov"
        ).loc[group_name]
        """Parameter for variance-covariance matrix for loss to follow up for given group."""

        # Cd4 Increase
        self.cd4_increase: pd.DataFrame = pd.read_hdf(self.parameters_path, "cd4_increase").loc[
            group_name
        ]
        """Parameter for CD4 increase for given group."""
        self.cd4_increase_vcov: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "cd4_increase_vcov"
        ).loc[group_name]
        """Parameter for variance-covariance matrix for CD4 increase for given group."""
        self.cd4_increase_knots_age: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "cd4_increase_knots_age"
        ).loc[group_name]
        """Parameter for CD4 increase knots by age for given group."""
        self.cd4_increase_knots_cd4_init: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "cd4_increase_knots_cd4_init"
        ).loc[group_name]
        """Parameter for CD4 increase knots by initial CD4 count for given group."""
        self.cd4_increase_knots_time_from_h1yy: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "cd4_increase_knots_time_from_h1yy"
        ).loc[group_name]
        """Parameter for CD4 increase knots by time from HIV diagnosis for given group."""

        # Cd4 Decrease
        self.cd4_decrease: pd.DataFrame = pd.read_hdf(self.parameters_path, "cd4_decrease").loc[
            "all"
        ]
        """Parameter for CD4 decrease for all groups."""
        self.cd4_decrease_vcov: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "cd4_decrease_vcov"
        )
        """Parameter for variance-covariance matrix for CD4 decrease for all groups."""

        # Years out of Care
        self.years_out_of_care: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "years_out_of_care"
        )
        """Parameter for years out of care for all groups."""

        # BMI
        self.pre_art_bmi: pd.DataFrame = pd.read_hdf(self.parameters_path, "pre_art_bmi").loc[
            group_name
        ]
        """Parameter for pre-ART BMI for given group."""
        self.pre_art_bmi_model: int = (
            pd.read_hdf(self.parameters_path, "pre_art_bmi_model").loc[group_name].values[0]
        )
        """Parameter for pre-ART BMI model for given group."""
        self.pre_art_bmi_age_knots: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "pre_art_bmi_age_knots"
        ).loc[group_name]
        """Parameter for pre-ART BMI age knots for given group."""
        self.pre_art_bmi_h1yy_knots: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "pre_art_bmi_h1yy_knots"
        ).loc[group_name]
        """Parameter for pre-ART BMI year of HIV diagnosis knots for given group."""
        self.pre_art_bmi_rse: float = (
            pd.read_hdf(self.parameters_path, "pre_art_bmi_rse").loc[group_name].values[0]
        )
        """Parameter for pre-ART BMI residual standard error for given group."""
        self.post_art_bmi: pd.DataFrame = pd.read_hdf(self.parameters_path, "post_art_bmi").loc[
            group_name
        ]
        """Parameter for post-ART BMI for given group."""
        self.post_art_bmi_age_knots: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "post_art_bmi_age_knots"
        ).loc[group_name]
        """Parameter for post-ART BMI age knots for given group."""
        self.post_art_bmi_pre_art_bmi_knots: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "post_art_bmi_pre_art_bmi_knots"
        ).loc[group_name]
        """Parameter for post-ART BMI pre-ART BMI knots for given group."""
        self.post_art_bmi_cd4_knots: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "post_art_bmi_cd4_knots"
        ).loc[group_name]
        """Parameter for post-ART BMI CD4 count knots for given group."""
        self.post_art_bmi_cd4_post_knots: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "post_art_bmi_cd4_post_knots"
        ).loc[group_name]
        """Parameter for post-ART BMI CD4 count after ART initiation knots for given group."""
        self.post_art_bmi_rse: float = (
            pd.read_hdf(self.parameters_path, "post_art_bmi_rse").loc[group_name].values[0]
        )
        """Parameter for post-ART BMI residual standard error for given group."""

        # BMI Intervention parameters
        if bmi_intervention_scenario not in [0, 1, 2, 3]:
            raise ValueError("bmi_intervention_scenario values only supported for 0, 1, 2, and 3")
        self.bmi_intervention_scenario: int = bmi_intervention_scenario
        """Parameter for BMI intervention scenario to apply for given group. Must be 0, 1, 2, or 3.
        0 corresponds to no intervention, 1 corresponds to a lifestyle intervention for those with 
        BMI over 25, 2 corresponds to a lifestyle intervention for those with BMI over 30, 
        and 3 corresponds to a pharmacological intervention for those with BMI over 30."""
        self.bmi_intervention_start_year: int = bmi_intervention_start_year
        """Parameter for BMI intervention start year for given group."""
        self.bmi_intervention_end_year: int = bmi_intervention_end_year
        """Parameter for BMI intervention end year for given group."""
        if bmi_intervention_coverage < 0 or bmi_intervention_coverage > 1:
            raise ValueError("bmi_intervention_coverage must be between 0 and 1 inclusive")
        self.bmi_intervention_coverage: float = bmi_intervention_coverage
        """Parameter for BMI intervention coverage for given group. Must be between 0 and 1 
        inclusive. Represents the proportion of eligible population that receives the BMI 
        intervention."""
        if bmi_intervention_effectiveness < 0 or bmi_intervention_effectiveness > 1:
            raise ValueError("bmi_intervention_effectiveness must be between 0 and 1 inclusive")
        self.bmi_intervention_effectiveness: float = bmi_intervention_effectiveness
        """Parameter for BMI intervention effectiveness for given group. Must be between 0 and 1 
        inclusive. Represents the proportion of eligible population that receives the BMI 
        intervention."""

        # Comorbidities
        self.prev_users_dict: dict[str, pd.DataFrame] = {
            comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_prev_users").loc[
                group_name
            ]
            for comorbidity in STAGE0 + STAGE1 + STAGE2 + STAGE3
        }
        """Parameter for prevalence of comorbidity among users for given group. Dictionary with 
        keys for each comorbidity and values as the prevalence of that comorbidity among users 
        for the given group."""
        self.prev_inits_dict: dict[str, pd.DataFrame] = {
            comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_prev_inits").loc[
                group_name
            ]
            for comorbidity in STAGE0 + STAGE1 + STAGE2 + STAGE3
        }
        """Parameter for prevalence of comorbidity among new initiators for given group. Dictionary 
        with keys for each comorbidity and values as the prevalence of that comorbidity among new 
        initiators for the given group."""
        self.comorbidity_coeff_dict: dict[str, pd.DataFrame] = {
            comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_coeff").loc[group_name]
            for comorbidity in STAGE1 + STAGE2 + STAGE3
        }
        """Parameter for coefficient for comorbidity in the CD4 decrease model for given group. 
        Dictionary with keys for each comorbidity and values as the coefficient for that 
        comorbidity in the CD4 decrease model for the given group."""
        self.delta_bmi_dict: dict[str, pd.DataFrame] = {
            comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_delta_bmi").loc[
                group_name
            ]
            for comorbidity in STAGE2 + STAGE3
        }
        """Parameter for change in BMI associated with comorbidity for given group. Dictionary with 
        keys for each comorbidity and values as the change in BMI associated with that comorbidity 
        for the given group."""
        self.post_art_bmi_dict: dict[str, pd.DataFrame] = {
            comorbidity: pd.read_hdf(self.parameters_path, f"{comorbidity}_post_art_bmi").loc[
                group_name
            ]
            for comorbidity in STAGE2 + STAGE3
        }
        """Parameter for post-ART BMI associated with comorbidity for given group. Dictionary with 
        keys for each comorbidity and values as the post-ART BMI associated with that comorbidity 
        for the given group."""

        # Aim 2 Mortality
        self.mortality_in_care_co: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "mortality_in_care_co"
        ).loc[group_name]
        """Parameter for mortality in care for given group. Coefficients for the mortality in care 
        model for the given group."""
        self.mortality_in_care_post_art_bmi: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "mortality_in_care_post_art_bmi"
        ).loc[group_name]
        """Parameter for mortality in care for given group. Coefficients for the post-ART BMI variable
        in the mortality in care model for the given group."""
        self.mortality_out_care_co: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "mortality_out_care_co"
        ).loc[group_name]
        """Parameter for mortality out of care for given group. Coefficients for the mortality out 
        of care model for the given group."""
        self.mortality_out_care_post_art_bmi: pd.DataFrame = pd.read_hdf(
            self.parameters_path, "mortality_out_care_post_art_bmi"
        ).loc[group_name]
        """Parameter for mortality out of care for given group. Coefficients for the post-ART BMI variable
        in the mortality out of care model for the given group."""

        # Year and age ranges
        self.AGES: np.ndarray = np.arange(18, 87)
        """Parameter for age range of agents in the model. Minimum age is 18 and maximum age is 
        86."""
        self.AGE_CATS: np.ndarray = np.arange(2, 8)
        """Parameter for age categories for agents in the model. Age categories are defined as 
        18-29, 30-39, 40-49, 50-59, 60-69, and 70-79."""
        self.SIMULATION_YEARS: np.ndarray = np.arange(2010, final_year + 1)
        """Parameter for years of the simulation. Simulation runs from 2010 to final_year."""
        self.ALL_YEARS: np.ndarray = np.arange(2000, final_year + 1)
        """Parameter for all years in the model. Range from 2000 to final_year."""
        self.INITIAL_YEARS: np.ndarray = np.arange(2000, 2010)
        """Parameter for initial years of the model. Range from 2000 to 2009."""
        self.CD4_BINS: np.ndarray = np.arange(2001)
        """Parameter for CD4 count bins for the model. Range from 0 to 2000."""

        # Sensitivity Analysis
        self.sa_variables: list[str] = sa_variables
        """Parameter for sensitivity analysis variables. List of variables to include in 
        sensitivity analysis."""
        self.sa_scalars: dict[str, float] = {}
        """Parameter for sensitivity analysis scalars. Dictionary with keys for each variable 
        included in sensitivity analysis and values as the scalar to multiply that variable by for 
        the sensitivity analysis."""

        if self.sa_variables:
            for comorbidity in self.prev_users_dict:
                if f"{comorbidity}_prevalence_prev" in self.sa_variables:
                    self.sa_scalars[f"{comorbidity}_prevalence_prev"] = (
                        self.init_random_state.uniform(0.8, 1.2)
                    )
                    self.prev_users_dict[comorbidity] *= self.sa_scalars[
                        f"{comorbidity}_prevalence_prev"
                    ]

            for comorbidity in self.prev_inits_dict:
                if f"{comorbidity}_prevalence" in self.sa_variables:
                    self.sa_scalars[f"{comorbidity}_prevalence"] = self.init_random_state.uniform(
                        0.8, 1.2
                    )
                    self.prev_inits_dict[comorbidity] *= self.sa_scalars[
                        f"{comorbidity}_prevalence"
                    ]

            for comorbidity in STAGE0 + STAGE1 + STAGE2 + STAGE3:
                if f"{comorbidity}_incidence" in self.sa_variables:
                    self.sa_scalars[f"{comorbidity}_incidence"] = self.init_random_state.uniform(
                        0.8, 1.2
                    )

            if "pre_art_bmi" in self.sa_variables:
                self.sa_scalars["pre_art_bmi"] = self.init_random_state.uniform(0.8, 1.2)

            if "post_art_bmi" in self.sa_variables:
                self.sa_scalars["post_art_bmi"] = self.init_random_state.uniform(0.8, 1.2)

            if "art_initiators" in self.sa_variables:
                self.sa_scalars["art_initiators"] = self.init_random_state.uniform(0.8, 1.2)

        # Draw a random value between predicted and 2018 predicted value for years greater than
        # 2018
        # TODO refactor this and save it in parameters
        self.age_by_h1yy["estimate"] = (
            self.random_state.rand(len(self.age_by_h1yy.index))
            * (self.age_by_h1yy["high_value"] - self.age_by_h1yy["low_value"])
        ) + self.age_by_h1yy["low_value"]

        self.cd4n_by_h1yy["estimate"] = (
            self.random_state.rand(len(self.cd4n_by_h1yy.index))
            * (self.cd4n_by_h1yy["high_value"] - self.cd4n_by_h1yy["low_value"])
        ) + self.cd4n_by_h1yy["low_value"]

        self.n_initial_users = self.on_art_2009.iloc[0]
        """Parameter for number of ART users in 2009 for given group, taken from on_art_2009 
        parameter."""

        self.n_initial_nonusers: int
        """Parameter for number of ART non-users in 2009 for given group. Calculated based on the 
        number of new ART initiators each year and the assumption that those not initiating ART in 
        the first few years of the model are the initial ART non-users."""

        self.n_new_agents: int
        """Parameter for number of new agents entering the model each year. Calculated based on the 
        number of new ART initiators each year and the number of new ART non-users each year."""

        # Simulate number of new art initiators and initial nonusers
        self.n_initial_nonusers: int
        self.n_new_agents: int
        self.n_initial_nonusers, self.n_new_agents = self.simulate_new_dx()

        self.save_parameters()


[docs]
    def save_parameters(self) -> None:
        """
        Save all parameters as a dataframe.
        """

        param_dict = {
            "replication": self.replication,
            "group": self.group_name,
            "new_dx": self.new_dx_val,
            "final_year": self.final_year,
            "mortality_model": self.mortality_model,
            "mortality_threshold_flag": self.mortality_threshold_flag,
            "idu_threshold": self.idu_threshold,
            "seed": self.seed,
            "bmi_intervention_scenario": self.bmi_intervention_scenario,
            "bmi_intervention_start_year": self.bmi_intervention_start_year,
            "bmi_intervention_end_year": self.bmi_intervention_end_year,
            "bmi_intervention_coverage": self.bmi_intervention_coverage,
            "bmi_intervention_effectiveness": self.bmi_intervention_effectiveness,
        }

        for scalar in self.sa_scalars:
            param_dict[scalar] = self.sa_scalars[scalar]

        self.param_dataframe = pd.DataFrame(param_dict, index=[0])

        if self.output_folder:
            self.param_dataframe.to_parquet(
                self.output_folder / "parameters.parquet", compression="zstd"
            )



[docs]
    def simulate_new_dx(self) -> Tuple[int, pd.DataFrame]:
        """
        Return the number of ART non-users in 2009 as an integer and the number of agents entering
        the model each year as art users and non-users as a dataframe. Draw number of new diagnoses
        from a uniform distribution between upper and lower bounds. Calculate number of new art
        initiators by assuming a certain number link in the first year as estimated by a linear
        regression on CDC data, capped at 95%. We assume that 40% of the remaining population links
        to care over the next 3 years. We assume that 70% of those linking to care begin ART,
        rising to 85% in 2011 and 97% afterwards. We take the number of people not initiating ART
        2006 - 2009 in this calculation to be the out of care population size in 2009 for our
        simulation.

        Parameters
        ----------
        parameters : Parameters
            Parameter object with new_dx and linkage_to_care attributes.
        random_state : np.random.RandomState
            Random State object for random number sampling.

        Returns
        -------
        Tuple[int, pd.DataFrame]
            (number of ART non-users in 2009 as an integer, number of agents entering the model
            each year as art users and non-users as a dataframe)
        """
        new_dx = self.new_dx.copy()
        linkage_to_care = self.linkage_to_care

        # Draw new dx from a uniform distribution between upper and lower for 2016-final_year
        new_dx["n_dx"] = (
            new_dx["lower"] + (new_dx["upper"] - new_dx["lower"]) * self.random_state.uniform()
        )

        # Only a proportion of new diagnoses link to care and 40% of the remaining link
        # in the next 3 years
        new_dx["unlinked"] = new_dx["n_dx"] * (1 - linkage_to_care["link_prob"])
        new_dx["gardner_per_year"] = new_dx["unlinked"] * 0.4 / 3.0
        new_dx["year0"] = new_dx["n_dx"] * linkage_to_care["link_prob"]
        new_dx["year1"] = new_dx["gardner_per_year"].shift(1, fill_value=0)
        new_dx["year2"] = new_dx["gardner_per_year"].shift(2, fill_value=0)
        new_dx["year3"] = new_dx["gardner_per_year"].shift(3, fill_value=0)
        new_dx["total_linked"] = (
            new_dx["year0"] + new_dx["year1"] + new_dx["year2"] + new_dx["year3"]
        )

        # Proportion of those linked to care start ART
        new_dx["art_initiators"] = (new_dx["total_linked"] * linkage_to_care["art_prob"]).astype(
            int
        )
        new_dx["art_delayed"] = (
            new_dx["total_linked"] * (1 - linkage_to_care["art_prob"])
        ).astype(int)

        # TODO make the start and end dates here parametric
        # Count those not starting art 2006 - 2009 as initial ART nonusers
        n_initial_nonusers = new_dx.loc[np.arange(2006, 2010), "art_delayed"].sum()

        # Compile list of number of new agents to be introduced in the model
        new_agents = new_dx.loc[
            np.arange(2010, new_dx.index.max() + 1), ["art_initiators", "art_delayed"]
        ]

        if self.sa_variables and "art_initiators" in self.sa_variables:
            new_agents["art_initiators"] *= self.sa_scalars["art_initiators"]
            new_agents["art_delayed"] *= self.sa_scalars["art_initiators"]

            new_agents = new_agents.astype({"art_initiators": int, "art_delayed": int})

        return n_initial_nonusers, new_agents