diff --git a/src/scripts/longterm_projections/longterm_projections_draws.json b/src/scripts/longterm_projections/longterm_projections_draws.json new file mode 100644 index 0000000000..5193d4bf77 --- /dev/null +++ b/src/scripts/longterm_projections/longterm_projections_draws.json @@ -0,0 +1,43 @@ +{ + "scenario_script_path": "/Users/rem76/PycharmProjects/TLOmodel/src/scripts/longterm_projections/longterm_projections.py", + "scenario_seed": 0, + "arguments": [], + "runs_per_draw": 1, + "draws": [ + { + "draw_number": 0, + "parameters": { + "SymptomManager": { + "spurious_symptoms": true + }, + "HealthSystem": { + "Service_Availability": [ + "*" + ], + "use_funded_or_actual_staffing": "funded_plus", + "mode_appt_constraints": 1, + "cons_availability": "default", + "beds_availability": "all", + "equip_availability": "default", + "cons_availability_postSwitch": "all", + "year_cons_availability_switch": 2020, + "equip_availability_postSwitch": "all", + "year_equip_availability_switch": 2020 + }, + "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": { + "max_healthsystem_function": [ + false, + true + ], + "max_healthcare_seeking": [ + false, + true + ], + "year_of_switch": 2020 + } + } + } + ], + "commit": "7ea3f5ae200444bf4a4688dbc2e40961fef84fd8", + "github": "https://github.com/UCL/TLOmodel/tree/7ea3f5ae200444bf4a4688dbc2e40961fef84fd8" +} \ No newline at end of file diff --git a/src/tlo/methods/alri.py b/src/tlo/methods/alri.py index 70ac14fe2d..c8ec2d10db 100644 --- a/src/tlo/methods/alri.py +++ b/src/tlo/methods/alri.py @@ -50,6 +50,7 @@ to_odds = lambda pr: pr / (1.0 - pr) # noqa: E731 to_prob = lambda odds: odds / (1.0 + odds) # noqa: E731 + # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITION # --------------------------------------------------------------------------------------------------------- @@ -995,6 +996,15 @@ def report_daly_values(self): daly_values_by_pathogen = daly_values_by_pathogen.add_prefix('ALRI_') return daly_values_by_pathogen + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of ALRI for all individuals + df = self.sim.population.props + total_prev = len( + df[(df['is_alive']) & (df['ri_current_infection_status'])] + ) / len(df[df['is_alive']]) + + return {'ALRI': total_prev} + def over_ride_availability_of_certain_consumables(self): """Over-ride the availability of certain consumables, according the parameter values provided.""" p = self.parameters @@ -1121,7 +1131,6 @@ def look_up_consumables(self): get_item_code(item='Salbutamol sulphate 1mg/ml, 5ml_each_CMST'): 2 } - def end_episode(self, person_id): """End the episode infection for a person (i.e. reset all properties to show no current infection or complications). @@ -1336,7 +1345,7 @@ def _ultimate_treatment_indicated_for_patient(classification_for_treatment_decis elif classification_for_treatment_decision == 'chest_indrawing_pneumonia': return { 'antibiotic_indicated': ( - 'Amoxicillin_tablet_or_suspension_5days', # <-- # <-- First choice antibiotic + 'Amoxicillin_tablet_or_suspension_5days', # <-- # <-- First choice antibiotic ), 'oxygen_indicated': False } @@ -1441,11 +1450,11 @@ def make_linear_model(age_effects=None): 'age_years', conditions_are_mutually_exclusive=True, conditions_are_exhaustive=True).when(0, age_effects[0]) - .when(1, age_effects[1]) - .when(2, age_effects[2]) - .when(3, age_effects[3]) - .when(4, age_effects[4]) - .when('>= 5', 0.0), + .when(1, age_effects[1]) + .when(2, age_effects[2]) + .when(3, age_effects[3]) + .when(4, age_effects[4]) + .when('>= 5', 0.0), Predictor('li_wood_burn_stove').when(False, p['rr_ALRI_indoor_air_pollution']), Predictor().when('(va_measles_all_doses == False) & (age_years >= 1)', p['rr_ALRI_incomplete_measles_immunisation']), @@ -1675,13 +1684,14 @@ def prob_die_of_alri(self, def get_odds_of_death(age_in_whole_months): """Returns odds of death given age in whole months.""" + def get_odds_of_death_for_under_two_month_old(age_in_whole_months): return p[f'base_odds_death_ALRI_{_age_}'] * \ - (p[f'or_death_ALRI_{_age_}_by_month_increase_in_age'] ** age_in_whole_months) + (p[f'or_death_ALRI_{_age_}_by_month_increase_in_age'] ** age_in_whole_months) def get_odds_of_death_for_over_two_month_old(age_in_whole_months): return p[f'base_odds_death_ALRI_{_age_}'] * \ - (p[f'or_death_ALRI_{_age_}_by_month_increase_in_age'] ** (age_in_whole_months - 2)) + (p[f'or_death_ALRI_{_age_}_by_month_increase_in_age'] ** (age_in_whole_months - 2)) return get_odds_of_death_for_under_two_month_old(age_in_whole_months=age_in_whole_months) \ if age_in_whole_months < 2 \ @@ -1887,6 +1897,7 @@ def _prob_treatment_fails_when_cough_or_cold(): else: raise ValueError('Unrecognised imci_symptom_based_classification.') + # --------------------------------------------------------------------------------------------------------- # DISEASE MODULE EVENTS # --------------------------------------------------------------------------------------------------------- @@ -3040,7 +3051,8 @@ def apply(self, person_id): assert 'fast_breathing_pneumonia' == \ self.module.get_imci_classification_based_on_symptoms( - child_is_younger_than_2_months=False, symptoms=self.sim.modules['SymptomManager'].has_what(person_id=person_id) + child_is_younger_than_2_months=False, + symptoms=self.sim.modules['SymptomManager'].has_what(person_id=person_id) ) diff --git a/src/tlo/methods/bladder_cancer.py b/src/tlo/methods/bladder_cancer.py index 52271f6f16..bdf02351dc 100644 --- a/src/tlo/methods/bladder_cancer.py +++ b/src/tlo/methods/bladder_cancer.py @@ -594,6 +594,15 @@ def report_daly_values(self): return disability_series_for_alive_persons + def report_prevalence(self): + # This reports on the prevalence of bladder cancer for all individuals + + df = self.sim.population.props + total_prev = len( + df[(df.bc_status != 'none') & (df.is_alive)] + ) / len(df[df.is_alive]) + return {'Bladder Cancer': total_prev} + def do_at_generic_first_appt( self, person_id: int, diff --git a/src/tlo/methods/breast_cancer.py b/src/tlo/methods/breast_cancer.py index a55c6f4930..ac9382582c 100644 --- a/src/tlo/methods/breast_cancer.py +++ b/src/tlo/methods/breast_cancer.py @@ -571,6 +571,14 @@ def report_daly_values(self): return disability_series_for_alive_persons + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of breast cancer for all individuals + df = self.sim.population.props + total_prev = len( + df[(df['is_alive']) & (df['brc_status'] != 'none')] + ) / len(df[df['is_alive']]) + + return {'Breast Cancer': total_prev} def do_at_generic_first_appt( self, person_id: int, diff --git a/src/tlo/methods/cardio_metabolic_disorders.py b/src/tlo/methods/cardio_metabolic_disorders.py index 3c985c2bf1..342ba0a093 100644 --- a/src/tlo/methods/cardio_metabolic_disorders.py +++ b/src/tlo/methods/cardio_metabolic_disorders.py @@ -807,6 +807,18 @@ def left_censor(obs, window_open): return dw + def report_prevalence(self): + """Report prevalence of disease to the HealthBurden module""" + df = self.sim.population.props + prevalence_dict = {} + + for condition in self.conditions: + prevalence = df[f'nc_{condition}'].sum() / len(df) + prevalence_dict[condition] = prevalence + + # Create a DataFrame from the prevalence dictionary + return prevalence_dict + def on_hsi_alert(self, person_id, treatment_id): """ This is called whenever there is an HSI event commissioned by one of the other disease modules. diff --git a/src/tlo/methods/chronicsyndrome.py b/src/tlo/methods/chronicsyndrome.py index 0ae6599939..1740367c4f 100644 --- a/src/tlo/methods/chronicsyndrome.py +++ b/src/tlo/methods/chronicsyndrome.py @@ -283,6 +283,8 @@ def report_daly_values(self): return health_values + def report_prevalence(self): + return {'ChronicSyndrome':None} def do_at_generic_first_appt_emergency( self, person_id: int, diff --git a/src/tlo/methods/copd.py b/src/tlo/methods/copd.py index 53602505ae..2f803b2583 100644 --- a/src/tlo/methods/copd.py +++ b/src/tlo/methods/copd.py @@ -182,6 +182,15 @@ def report_daly_values(self): df = self.sim.population.props return df.loc[df.is_alive, 'ch_lungfunction'].map(self.models.disability_weight_given_lungfunction) + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of COPD for all individuals + df = self.sim.population.props + total_prev = len( + df[(df['is_alive']) & (df['ch_lungfunction'] > 3)] # 3 is mild COPD + ) / len(df[df['is_alive']]) + + return {'COPD': total_prev} + def define_symptoms(self): """Define and register Symptoms""" self.sim.modules['SymptomManager'].register_symptom( diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index e58f3895f4..265df1875d 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -134,7 +134,7 @@ def __init__(self, name=None, resourcefilepath=None, equal_allocation_by_distric ), 'district_num_of_residence': Property( - Types.CATEGORICAL, + Types.CATEGORICAL, 'The district number in which the person is resident', categories=['SET_AT_RUNTIME'] ), @@ -383,6 +383,54 @@ def on_birth(self, mother_id, child_id): 'mother_age_at_pregnancy': _mother_age_at_pregnancy} ) + def report_prevalence(self): + """ + This function reports the prevalence of maternal death and neonatal deaths for this module generated in the previous month. + Returns a dataframe with these values. + """ + if 'SimplifiedBirths' in self.sim.modules: + neonatal_mortality_rate = None + maternal_mortality_rate = None + live_births = None + else: + df = self.sim.population.props + if len(df[(df['age_days'] < 29)]) == 0: + neonatal_mortality_rate = 0 + maternal_mortality_rate = 0 + live_births = 0 + else: + neonatal_deaths = len(df[(df['age_days'] < 29) & (df['age_years'] == 0) & ~(df['is_alive']) & ( + df['date_of_death'] >= (self.sim.date - DateOffset(months=1)))]) + live_births = len(df[(df['age_days'] < 29)]) + neonatal_mortality_rate = neonatal_deaths/live_births * 1000 + maternal_direct_deaths = len(df.loc[ + (df['cause_of_death'] == 'Maternal Disorders') & + (df['date_of_death'] >= (self.sim.date - DateOffset(months=1))) + ]) + indirect_deaths_non_hiv = len(df.loc[ + (df['is_pregnant'] | df['la_is_postpartum']) & + (df['cause_of_death'].str.contains( + 'Malaria|Suicide|ever_stroke|diabetes|chronic_ischemic_hd|ever_heart_attack|chronic_kidney_disease') | + (df['cause_of_death'] == 'TB')) & + (df['date_of_death'] >= (self.sim.date - DateOffset(months=1))) + ]) + indirect_deaths_hiv = len(df.loc[ + (df['is_pregnant'] | df['la_is_postpartum']) & + df['cause_of_death'].str.contains('AIDS_non_TB|AIDS_TB') & + (df['date_of_death'] >= (self.sim.date - DateOffset(months=1))) + ]) + indirect_deaths_hiv = indirect_deaths_hiv * 0.3 # https://www.who.int/publications/i/item/9789240068759 + maternal_deaths = maternal_direct_deaths + indirect_deaths_non_hiv + indirect_deaths_hiv + maternal_mortality_rate = maternal_deaths/live_births * 1000 + print(live_births) + + metrics_dict = { + 'NMR': neonatal_mortality_rate, + 'MMR': maternal_mortality_rate, + 'live_births': live_births + } + return metrics_dict + def _edit_init_pop_to_prevent_persons_greater_than_max_age(self, df, max_age: int): """Return an edited version of the `pd.DataFrame` describing the probability of persons in the population being created with certain characteristics to reflect the constraint the persons aged greater than `max_age_initial` diff --git a/src/tlo/methods/depression.py b/src/tlo/methods/depression.py index a0ffdd12b2..07ea0075ce 100644 --- a/src/tlo/methods/depression.py +++ b/src/tlo/methods/depression.py @@ -554,6 +554,20 @@ def right_censor(obs, window_close): return av_daly_wt_last_month + def report_prevalence(self): + # This reports on the prevalence of depression for all individuals + + df = self.sim.population.props + any_depr_in_the_last_month = df[((df['is_alive']) & ( + ~pd.isnull(df['de_date_init_most_rec_depr']) & (df['de_date_init_most_rec_depr'] <= self.sim.date) + ) & ( + pd.isnull(df['de_date_depr_resolved']) | + (df['de_date_depr_resolved'] >= (self.sim.date - DateOffset(months=1))) + ))] + total_prev = len( + any_depr_in_the_last_month + ) / len(df[df.is_alive]) + return {'Depression': total_prev} def _check_for_suspected_depression( self, symptoms: List[str], treatment_id: str, has_even_been_diagnosed: bool ): diff --git a/src/tlo/methods/diarrhoea.py b/src/tlo/methods/diarrhoea.py index 06c8a37b18..d8fec3188d 100644 --- a/src/tlo/methods/diarrhoea.py +++ b/src/tlo/methods/diarrhoea.py @@ -649,6 +649,13 @@ def report_daly_values(self): average_daly_weight_in_last_month = pd.Series(values, idx) / days_last_month return average_daly_weight_in_last_month.reindex(index=df.loc[df.is_alive].index, fill_value=0.0) + def report_prevalence(self): + df = self.sim.population.props + total_prev = len( + df[df.gi_has_diarrhoea & df.is_alive] + ) / len(df[df.is_alive]) + return {'Diarrhoea': total_prev} + def look_up_consumables(self): """Look up and store the consumables item codes used in each of the HSI.""" ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name diff --git a/src/tlo/methods/epilepsy.py b/src/tlo/methods/epilepsy.py index 5645d55e34..6f9fb29208 100644 --- a/src/tlo/methods/epilepsy.py +++ b/src/tlo/methods/epilepsy.py @@ -241,6 +241,16 @@ def report_daly_values(self): df = self.sim.population.props # shortcut to population properties dataframe return df.loc[df.is_alive, 'ep_disability'] + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of epilepsy for all individuals + df = self.sim.population.props + total_prev = len( + df[(df['is_alive']) & (df['ep_seiz_stat'] != '0')] + ) / len(df[df['is_alive']]) + + return {'Epilepsy': total_prev} + + def transition_seizure_stat(self): """ This function handles all transitions in epilepsy seizure status, for those on and off anti epileptics. The diff --git a/src/tlo/methods/healthburden.py b/src/tlo/methods/healthburden.py index ba9a9de836..c2b8f95e90 100644 --- a/src/tlo/methods/healthburden.py +++ b/src/tlo/methods/healthburden.py @@ -39,7 +39,9 @@ def __init__(self, name=None, resourcefilepath=None): self.years_life_lost_stacked_time = None self.years_life_lost_stacked_age_and_time = None self.years_lived_with_disability = None + self.prevalence_of_diseases = None self.recognised_modules_names = None + self.recognised_modules_names_for_prevalence = None self.causes_of_disability = None self._causes_of_yll = None self._causes_of_dalys = None @@ -58,7 +60,9 @@ def __init__(self, name=None, resourcefilepath=None): 'Age_Limit_For_YLL': Parameter( Types.REAL, 'The age up to which deaths are recorded as having induced a lost of life years'), 'gbd_causes_of_disability': Parameter( - Types.LIST, 'List of the strings of causes of disability defined in the GBD data') + Types.LIST, 'List of the strings of causes of disability defined in the GBD data'), + 'logging_frequency_prevalence': Parameter(Types.STRING, + 'Set to the frequency at which we want to make calculations of the prevalence logger') } PROPERTIES = {} @@ -71,6 +75,7 @@ def read_parameters(self, data_folder): # ghe2019_daly-methods.pdf?sfvrsn=31b25009_7 p['gbd_causes_of_disability'] = set(pd.read_csv( Path(self.resourcefilepath) / 'gbd' / 'ResourceFile_CausesOfDALYS_GBD2019.csv', header=None)[0].values) + p['logging_frequency_prevalence'] = 'month' def initialise_population(self, population): pass @@ -90,6 +95,8 @@ def initialise_simulation(self, sim): age_index = self.sim.modules['Demography'].AGE_RANGE_CATEGORIES wealth_index = sim.modules['Lifestyle'].PROPERTIES['li_wealth'].categories year_index = list(range(self.sim.start_date.year, self.sim.end_date.year + 1)) + month_index = list(range(self.sim.start_date.month, self.sim.end_date.month + 1)) + day_index = list(range(self.sim.start_date.day, self.sim.end_date.day + 1)) self.multi_index_for_age_and_wealth_and_time = pd.MultiIndex.from_product( [sex_index, age_index, wealth_index, year_index], names=['sex', 'age_range', 'li_wealth', 'year']) @@ -99,27 +106,55 @@ def initialise_simulation(self, sim): self.years_life_lost_stacked_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) self.years_life_lost_stacked_age_and_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) self.years_lived_with_disability = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) + if self.parameters['logging_frequency_prevalence'] == 'day': + self.prevalence_of_diseases = pd.DataFrame(index=day_index) + elif self.parameters['logging_frequency_prevalence'] == 'month': + self.prevalence_of_diseases = pd.DataFrame(index=month_index) + else: + self.prevalence_of_diseases = pd.DataFrame(index=year_index) # 2) Collect the module that will use this HealthBurden module self.recognised_modules_names = [ m.name for m in self.sim.modules.values() if Metadata.USES_HEALTHBURDEN in m.METADATA ] + # 2) Collect the module that are expected to return prevalences - # Check that all registered disease modules have the report_daly_values() function + self.recognised_modules_names_for_prevalence = self.recognised_modules_names + ['Demography'] + + # Check that all registered disease modules have the report_daly_values() and report_prevalence() functions for module_name in self.recognised_modules_names: assert getattr(self.sim.modules[module_name], 'report_daly_values', None) and \ callable(self.sim.modules[module_name].report_daly_values), 'A module that declares use of ' \ 'HealthBurden module must have a ' \ 'callable function "report_daly_values"' + if not module_name.startswith('DiseaseThatCauses'): + assert getattr(self.sim.modules[module_name], 'report_prevalence', None) and \ + callable(self.sim.modules[module_name].report_prevalence), 'A module that declares use of ' \ + 'HealthBurden module must have a ' \ + 'callable function "report_prevalence"' # 3) Process the declarations of causes of disability and DALYS made by the disease modules self.process_causes_of_disability() self.process_causes_of_dalys() - # 4) Launch the DALY Logger to run every month, starting with the end of the first month of simulation + # 4) Launch the DALY to run every month, starting with the end of the first month of simulation sim.schedule_event(Get_Current_DALYS(self), sim.date + DateOffset(months=1)) - # 5) Schedule `Healthburden_WriteToLog` that will write to log annually + # 5) Schedule 'Get_Current_Prevalence_Write_to_Log', which collects prevalence at a set frequency and writes + # them to the log at that frequency + if self.parameters['logging_frequency_prevalence'] == 'day': + sim.schedule_event(GetCurrentPrevalenceWriteToLog(self, frequency=DateOffset(days=1)), + sim.date + DateOffset(days=0)) + + elif self.parameters['logging_frequency_prevalence'] == 'month': + sim.schedule_event(GetCurrentPrevalenceWriteToLog(self, frequency=DateOffset(months=1)), + sim.date + DateOffset(months=1)) + + else: + sim.schedule_event(GetCurrentPrevalenceWriteToLog(self, frequency=DateOffset(year=1)), + sim.date + DateOffset(years=1)) + + # 6) Schedule `Healthburden_WriteToLog` that will write to log annually last_day_of_the_year = Date(sim.date.year, 12, 31) sim.schedule_event(Healthburden_WriteToLog(self), last_day_of_the_year) @@ -169,6 +204,7 @@ def process_causes_of_dalys(self): 3) Output to the log mappers for causes of disability to the label """ ... + # 1) Collect causes of death and disability that are reported by each disease module, # merging the gbd_causes declared for deaths or disabilities under the same label, @@ -193,13 +229,13 @@ def merge_dicts_of_causes(d1: Dict, d2: Dict) -> Dict: return merged_causes causes_of_death = collect_causes_from_disease_modules( - all_modules=self.sim.modules.values(), - collect='CAUSES_OF_DEATH', - acceptable_causes=self.sim.modules['Demography'].gbd_causes_of_death) + all_modules=self.sim.modules.values(), + collect='CAUSES_OF_DEATH', + acceptable_causes=self.sim.modules['Demography'].gbd_causes_of_death) causes_of_disability = collect_causes_from_disease_modules( - all_modules=self.sim.modules.values(), - collect='CAUSES_OF_DISABILITY', - acceptable_causes=set(self.parameters['gbd_causes_of_disability'])) + all_modules=self.sim.modules.values(), + collect='CAUSES_OF_DISABILITY', + acceptable_causes=set(self.parameters['gbd_causes_of_disability'])) causes_of_death_and_disability = merge_dicts_of_causes( causes_of_death, @@ -315,10 +351,10 @@ def _format_for_multi_index(_yll: pd.Series): """Returns pd.Series which is the same as in the argument `_yll` except that the multi-index has been expanded to include sex and li_wealth and rearranged so that it matched the expected multi-index format (sex/age_range/li_wealth/year).""" - return pd.DataFrame(_yll)\ - .assign(sex=sex, li_wealth=wealth)\ - .set_index(['sex', 'li_wealth'], append=True)\ - .reorder_levels(['sex', 'age_range', 'li_wealth', 'year'])[_yll.name] + return pd.DataFrame(_yll) \ + .assign(sex=sex, li_wealth=wealth) \ + .set_index(['sex', 'li_wealth'], append=True) \ + .reorder_levels(['sex', 'age_range', 'li_wealth', 'year'])[_yll.name] assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time) assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time) @@ -346,19 +382,19 @@ def _format_for_multi_index(_yll: pd.Series): end_date=( date_of_birth + pd.DateOffset(years=self.parameters['Age_Limit_For_YLL']) - pd.DateOffset(days=1)), date_of_birth=date_of_birth - ).groupby(level=1).sum()\ - .assign(year=date_of_death.year)\ - .set_index(['year'], append=True)['person_years']\ - .pipe(_format_for_multi_index) + ).groupby(level=1).sum() \ + .assign(year=date_of_death.year) \ + .set_index(['year'], append=True)['person_years'] \ + .pipe(_format_for_multi_index) # Get the years of live lost "stacked by age and time", whereby all the life-years lost up to the age_limit are # ascribed to the age of death and to the year of death. This is computed by collapsing the age-dimension of # `yll_stacked_by_time` onto the age(-range) of death. age_range_to_stack_to = age_range - yll_stacked_by_age_and_time = pd.DataFrame(yll_stacked_by_time.groupby(level=[0, 2, 3]).sum())\ - .assign(age_range=age_range_to_stack_to)\ - .set_index(['age_range'], append=True)['person_years']\ - .reorder_levels(['sex', 'age_range', 'li_wealth', 'year']) + yll_stacked_by_age_and_time = pd.DataFrame(yll_stacked_by_time.groupby(level=[0, 2, 3]).sum()) \ + .assign(age_range=age_range_to_stack_to) \ + .set_index(['age_range'], append=True)['person_years'] \ + .reorder_levels(['sex', 'age_range', 'li_wealth', 'year']) # Add the years-of-life-lost from this death to the overall YLL dataframe keeping track if cause_of_death not in self.years_life_lost.columns: @@ -407,42 +443,41 @@ def decompose_yll_by_age_and_time(self, start_date, end_date, date_of_birth): return period + def log_df_line_by_line(self, key, description, df, force_cols=None) -> None: + """Log each line of a dataframe to `logger.info`. Each row of the dataframe is one logged entry. + `force_cols` is the names of the colums that must be included in each logging line (As the parsing of the + log requires the name of the format of each row to be uniform.).""" + df[sorted(set(force_cols) - set(df.columns))] = 0.0 # Force the addition of any missing causes + df = df[sorted(df.columns)] # sort the columns so that they are always in same order + for _, row in df.iterrows(): + logger.info( + key=key, + data=row.to_dict(), + description=description, + ) + def write_to_log(self, year: int): """Write to the log the YLL, YLD and DALYS for a specific year. N.B. This is called at the end of the simulation as well as at the end of each year, so we need to check that the year is not being written to the log more than once.""" - if year in self._years_written_to_log: - return # Skip if the year has already been logged. + return # Skip if the year has already been logged def summarise_results_for_this_year(df, level=[0, 1]) -> pd.DataFrame: """Return pd.DataFrame that gives the summary of the `df` for the `year` by certain levels in the df's multi-index. The `level` argument gives a list of levels to use in `groupby`: e.g., level=[0,1] gives a summary of sex/age-group; and level=[2] gives a summary only by wealth category.""" return df.loc[(slice(None), slice(None), slice(None), year)] \ - .groupby(level=level) \ - .sum() \ - .reset_index() \ - .assign(year=year) - - def log_df_line_by_line(key, description, df, force_cols=None) -> None: - """Log each line of a dataframe to `logger.info`. Each row of the dataframe is one logged entry. - `force_cols` is the names of the colums that must be included in each logging line (As the parsing of the - log requires the name of the format of each row to be uniform.).""" - df[sorted(set(force_cols) - set(df.columns))] = 0.0 # Force the addition of any missing causes - df = df[sorted(df.columns)] # sort the columns so that they are always in same order - for _, row in df.iterrows(): - logger.info( - key=key, - data=row.to_dict(), - description=description, - ) + .groupby(level=level) \ + .sum() \ + .reset_index() \ + .assign(year=year) # Check that the format of the internal storage is as expected. self.check_multi_index() # 1) Log the Years Lived With Disability (YLD) (by the 'causes of disability' declared by disease modules). - log_df_line_by_line( + self.log_df_line_by_line( key='yld_by_causes_of_disability', description='Years lived with disability by the declared cause_of_disability, ' 'broken down by year, sex, age-group', @@ -451,7 +486,7 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: ) # 2) Log the Years of Live Lost (YLL) (by the 'causes of death' declared by disease modules). - log_df_line_by_line( + self.log_df_line_by_line( key='yll_by_causes_of_death', description='Years of life lost by the declared cause_of_death, ' 'broken down by year, sex, age-group. ' @@ -460,7 +495,7 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: df=(yll := summarise_results_for_this_year(self.years_life_lost)), force_cols=self._causes_of_yll, ) - log_df_line_by_line( + self.log_df_line_by_line( key='yll_by_causes_of_death_stacked', description='Years of life lost by the declared cause_of_death, ' 'broken down by year, sex, age-group. ' @@ -470,7 +505,7 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: df=(yll_stacked_by_time := summarise_results_for_this_year(self.years_life_lost_stacked_time)), force_cols=self._causes_of_yll, ) - log_df_line_by_line( + self.log_df_line_by_line( key='yll_by_causes_of_death_stacked_by_age_and_time', description='Years of life lost by the declared cause_of_death, ' 'broken down by year, sex, age-group. ' @@ -482,7 +517,7 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: ) # 3) Log total DALYS recorded (YLD + LYL) (by the labels declared) - log_df_line_by_line( + self.log_df_line_by_line( key='dalys', description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability' ', broken down by year, sex, age-group. ' @@ -491,7 +526,7 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: df=self.get_dalys(yld=yld, yll=yll), force_cols=self._causes_of_dalys, ) - log_df_line_by_line( + self.log_df_line_by_line( key='dalys_stacked', description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability' ', broken down by year, sex, age-group. ' @@ -501,7 +536,7 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: df=self.get_dalys(yld=yld, yll=yll_stacked_by_time), force_cols=self._causes_of_dalys, ) - log_df_line_by_line( + self.log_df_line_by_line( key='dalys_stacked_by_age_and_time', description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability' ', broken down by year, sex, age-group. ' @@ -520,7 +555,7 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: self.years_life_lost_stacked_age_and_time, level=2 ) - log_df_line_by_line( + self.log_df_line_by_line( key='dalys_by_wealth_stacked_by_age_and_time', description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability' ', broken down by year and wealth category.' @@ -532,6 +567,40 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: self._years_written_to_log += [year] + def write_to_log_prevalence(self): + """Write to the log the prevalence of conditions . + N.B. This is called at the end of the simulation as well as at the end of each month, so we need to check that + the year is not being written to the log more than once.""" + # Check that the format of the internal storage is as expected. + self.check_multi_index() + self.log_df_line_by_line( + key='prevalence_of_diseases', + description='Prevalence of each disease. ALRI: individuals who have ri_current_infection_status = True' + 'Bladder_Cancer: individuals who have bc_status != none. ' + 'Breast Cancer: individuals who have brc_stus != none' + 'chronic_ischemic_hd, chronic_kidney_disease, chronic_lower_back_pain, diabetes, hypertension (all in CMD): all individuals with nc_{condition} as True' + 'COPD: all individuals with ch_lungfuction > 3, which is defined as mild COPD' + 'MMR (Demography): sum of direct deaths (cause_of_death == Maternal Disorders), indirect, non-HIV deaths, and indirect, non-HIV deaths * 0.3 https://www.who.int/publications/i/item/9789240068759, all in LAST MONTH' + 'NMR (Demography): sum of all individuals who died in the last logging period who were < 29 days old in LAST MONTH' + 'depression: individuals who had a depressive episode in the last logging period' + 'diarrhoea: individuals who are gi_has_diarrhoea = True' + 'epilepsy: individuals whose ep_seiz_stat != 0' + 'HIV: individals whose hv_inf = True' + 'instrapartum stillbirths (Labour): number of intrapartum stillbirths IN LAST MONTH' + 'malaria: individuals who have clinical or severe infections' + 'mealsea: individuals who have me_has_measles = True' + 'mockitis: inviduals who have mi_is_infected = True' + 'oesphageal cancer: individuals who have oc_status != none' + 'other adult cancer: individuals who have oac_status != none' + 'antenatal stillbirths (Preganancy Supervisor): number of stillbirths that has happened IN LAST MONTH' + 'prostate cancer: individuals who have pc_status != none' + 'RTI: individuals who have rt_inj_severity != none' + 'schisto: individuals who have Low-infection or High-infection, any parasite' + 'TB: individuals who have tb_inf = active', + df=self.prevalence_of_diseases, + force_cols=self.prevalence_of_diseases.columns + ) + def check_multi_index(self): """Check that the multi-index of the dataframes are as expected""" assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time) @@ -631,7 +700,7 @@ def apply(self, population): ['sex', 'age_range', 'li_wealth', 'year']) # 5) Add the monthly summary to the overall dataframe for YearsLivedWithDisability - dalys_to_add = disability_monthly_summary.sum().sum() # for checking + dalys_to_add = disability_monthly_summary.sum().sum() # for checking dalys_current = self.module.years_lived_with_disability.sum().sum() # for checking # (Nb. this will add columns that are not otherwise present and add values to columns where they are.) @@ -643,8 +712,8 @@ def apply(self, population): # Merge into a dataframe with the correct multi-index (the multi-index from combine is subtly different) self.module.years_lived_with_disability = \ - pd.DataFrame(index=self.module.multi_index_for_age_and_wealth_and_time)\ - .merge(combined, left_index=True, right_index=True, how='left') + pd.DataFrame(index=self.module.multi_index_for_age_and_wealth_and_time) \ + .merge(combined, left_index=True, right_index=True, how='left') # Check multi-index is in check and that the addition of DALYS has worked assert self.module.years_lived_with_disability.index.equals(self.module.multi_index_for_age_and_wealth_and_time) @@ -661,3 +730,40 @@ def __init__(self, module): def apply(self, population): self.module.write_to_log(year=self.sim.date.year) + + +class GetCurrentPrevalenceWriteToLog(RegularEvent, PopulationScopeEventMixin): + """ + This event runs every month and asks each disease module to report the prevalence of each disease + during the previous month. + """ + + def __init__(self, module, frequency: pd.DateOffset): + super().__init__(module, frequency=frequency) + + def apply(self, population): + if not self.module.recognised_modules_names: + return + else: + # Calculate the population size + population_size = len(self.sim.population.props[self.sim.population.props['is_alive']]) + prevalence_from_each_disease_module = {'population': [population_size]} + for disease_module_name in self.module.recognised_modules_names_for_prevalence: + if disease_module_name in ['DiseaseThatCausesA']: + continue + else: + disease_module = self.sim.modules[disease_module_name] + prevalence_from_disease_module = disease_module.report_prevalence() + if prevalence_from_disease_module is None: + continue + for key, value in prevalence_from_disease_module.items(): + prevalence_from_each_disease_module[key] = value + prevalence_from_each_disease_module = pd.DataFrame([prevalence_from_each_disease_module]) + prevalence_from_each_disease_module.drop( + prevalence_from_each_disease_module.index.intersection( + ['DiseaseThatCausesA'] + ), + axis=0, inplace=True + ) + self.module.prevalence_of_diseases = prevalence_from_each_disease_module + self.module.write_to_log_prevalence() diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index d6455cc861..adef7801f5 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -1293,6 +1293,15 @@ def report_daly_values(self): return dalys + def report_prevalence(self): + # This reports on the prevalence of HIV for all individuals + + df = self.sim.population.props + total_prev = len( + df[df.hv_inf & df.is_alive] + ) / len(df[df.is_alive]) + return {'HIV': total_prev} + def mtct_during_breastfeeding(self, mother_id, child_id): """ Compute risk of mother-to-child transmission and schedule HivInfectionDuringBreastFeedingEvent. diff --git a/src/tlo/methods/labour.py b/src/tlo/methods/labour.py index 35081b7d27..f7408c949e 100644 --- a/src/tlo/methods/labour.py +++ b/src/tlo/methods/labour.py @@ -68,6 +68,11 @@ def __init__(self, name=None, resourcefilepath=None): self.possible_intrapartum_complications = list() self.possible_postpartum_complications = list() + # Dictionary that records stillbirths + self.stillbirth_dates = { + 'stillbirth_date': pd.NaT + } + # Finally define a dictionary which will hold the required consumables for each intervention self.item_codes_lab_consumables = dict() @@ -1082,6 +1087,27 @@ def report_daly_values(self): return daly_series + def report_prevalence(self): + """ + This function reports the prevalence of intrapartum stillbirth for this module generated in the previous month + """ + # Filter out non-dictionary values and entries where 'stillbirth_date' is None + stillbirths_happened = { + key: value for key, value in self.stillbirth_dates.items() + if isinstance(value, dict) and value.get('stillbirth_date') is not None + } + + # Filter entries with valid 'stillbirth_date' that occurred in the last month + one_month_ago = self.sim.date - pd.DateOffset(months=1) + filtered_stillbirths = { + key: value for key, value in stillbirths_happened.items() + if isinstance(value.get('stillbirth_date'), pd.Timestamp) and value.get('stillbirth_date') >= one_month_ago + } + + intrapartum_stillbirth_for_month = len(filtered_stillbirths) + + return {'Intrapartum stillbirth': intrapartum_stillbirth_for_month} + # ===================================== HELPER AND TESTING FUNCTIONS ============================================== def set_date_of_labour(self, individual_id): """ @@ -2701,7 +2727,7 @@ def apply(self, individual_id): logger.debug(key='message', data=f'person {individual_id} has experienced an intrapartum still birth') random_draw = self.module.rng.random_sample() - + self.module.stillbirth_dates['stillbirth_date'] = self.sim.date # If this woman will experience a stillbirth and she was not pregnant with twins OR she was pregnant with # twins but both twins have died during labour we reset/set the appropriate variables if not df.at[individual_id, 'ps_multiple_pregnancy'] or \ diff --git a/src/tlo/methods/malaria.py b/src/tlo/methods/malaria.py index b1fdfb09dd..a6137fc4d9 100644 --- a/src/tlo/methods/malaria.py +++ b/src/tlo/methods/malaria.py @@ -755,6 +755,20 @@ def report_daly_values(self): return health_values.loc[df.is_alive] # returns the series + + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of malaria for all individuals + df = self.sim.population.props + total_clin = len( + df[ + df.is_alive + & ((df.ma_inf_type == 'clinical') | (df.ma_inf_type == 'severe')) + ] + ) + total_prev = total_clin/ len(df[df.is_alive]) + + return {'Malaria': total_prev} + def check_if_fever_is_caused_by_malaria( self, true_malaria_infection_type: str, diff --git a/src/tlo/methods/measles.py b/src/tlo/methods/measles.py index 39f9828860..1050f1db67 100644 --- a/src/tlo/methods/measles.py +++ b/src/tlo/methods/measles.py @@ -197,6 +197,15 @@ def report_daly_values(self): return health_values + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of measles for all individuals + df = self.sim.population.props + total_prev = len( + df[df.is_alive & df.me_has_measles] + ) / len(df[df.is_alive]) + + return {'Measles': total_prev} + def process_parameters(self): """Process the parameters (following being read-in) prior to the simulation starting. Make `self.symptom_probs` to be a dictionary keyed by age, with values of dictionaries keyed by symptoms and diff --git a/src/tlo/methods/mockitis.py b/src/tlo/methods/mockitis.py index 6af33c5fc7..1c423ef0f5 100644 --- a/src/tlo/methods/mockitis.py +++ b/src/tlo/methods/mockitis.py @@ -293,6 +293,14 @@ def report_daly_values(self): return health_values # returns the series + def report_prevalence(self): + logger.debug(key='debug', data='This is mockitis reporting my prevalence ') + + df = self.sim.population.props # shortcut to population properties dataframe + total_prev = df.loc[df.is_alive, 'mi_is_infected'].sum() / len(df[df['is_alive']]) + print(total_prev) + return {'Mockitis': total_prev} + def do_at_generic_first_appt_emergency( self, person_id: int, @@ -308,6 +316,7 @@ def do_at_generic_first_appt_emergency( ) schedule_hsi_event(event, priority=1, topen=self.sim.date) + class MockitisEvent(RegularEvent, PopulationScopeEventMixin): """ This event is occurring regularly at one monthly intervals and controls the infection process @@ -595,3 +604,200 @@ def apply(self, population): }) logger.info(key='status_counts', data=counts) + + +# --------------------------------------------------- +# Even simpler Dummy Disease - only infection. For checking logging +# --------------------------------------------------- +class DummyDisease(Module, GenericFirstAppointmentsMixin): + """This is a dummy infectious disease. + + It demonstrates the following behaviours in respect of the healthsystem module: + + - Registration of the disease module with healthsystem + - Allow infections + - Collect prevalence + """ + + INIT_DEPENDENCIES = {'Demography'} + + OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden'} + + CAUSES_OF_DISABILITY = { + 'Mockitis': Cause(label='Mockitis_Disability_And_Death') # use mockitis for simplicity + } + + # Declare Metadata + METADATA = { + Metadata.DISEASE_MODULE, + Metadata.USES_HEALTHBURDEN + } + + PARAMETERS = { + 'p_infection': Parameter( + Types.REAL, 'Probability that an uninfected individual becomes infected'), + 'initial_prevalence': Parameter( + Types.REAL, 'Prevalence of the disease in the initial population'), + + } + + PROPERTIES = { + 'dm_is_infected': Property( + Types.BOOL, 'Current status of DummyDisease'), + } + + def __init__(self, name=None, resourcefilepath=None): + # NB. Parameters passed to the module can be inserted in the __init__ definition. + + super().__init__(name) + self.resourcefilepath = resourcefilepath + + def read_parameters(self, data_folder): + """Read in parameters and do the registration of this module and its symptoms""" + + p = self.parameters + + p['p_infection'] = 1 + p['initial_prevalence'] = 0 + + def initialise_population(self, population): + """Set our property values for the initial population. + + This method is called by the simulation when creating the initial population, and is + responsible for assigning initial values, for every individual, of those properties + 'owned' by this module, i.e. those declared in the PROPERTIES dictionary above. + + :param population: the population of individuals + """ + + df = population.props # a shortcut to the dataframe storing data for individiuals + + # Set default for properties + df.loc[df.is_alive, 'mi_is_infected'] = False # default: no individuals infected + + alive_count = df.is_alive.sum() + + # randomly selected some individuals as infected + initial_infected = self.parameters['initial_prevalence'] + df.loc[df.is_alive, 'dm_is_infected'] = self.rng.random_sample(size=alive_count) < initial_infected + + def initialise_simulation(self, sim): + + """Get ready for simulation start. + + This method is called just before the main simulation loop begins, and after all + modules have read their parameters and the initial population has been created. + It is a good place to add initial events to the event queue. + """ + + # add the basic event + event = DummyDiseaseEvent(self) + sim.schedule_event(event, sim.date + DateOffset(months=1)) + + # add an event to log to screen + sim.schedule_event(DummyDiseaseLoggingEvent(self), sim.date + DateOffset(months=1)) + + def on_birth(self, mother_id, child_id): + """Initialise our properties for a newborn individual. + + This is called by the simulation whenever a new person is born. + + :param mother_id: the ID for the mother for this child + :param child_id: the ID for the new child + """ + + df = self.sim.population.props # shortcut to the population props dataframe + + # Initialise all the properties that this module looks after: + + child_is_infected = df.at[mother_id, 'dm_is_infected'] # is infected if mother is infected + + if child_is_infected: + # Assign properties + df.at[child_id, 'dm_is_infected'] = True + else: + # Assign the default for a child who is not infected + df.at[child_id, 'dm_is_infected'] = False + + def report_prevalence(self): + logger.debug(key='debug', data='This is DummyDisease reporting my prevalence ') + + df = self.sim.population.props # shortcut to population properties dataframe + total_prev = df.loc[df.is_alive, 'dm_is_infected'].sum() / len(df[df['is_alive']]) + return {'DummyDisease': total_prev} + + def report_daly_values(self): + # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been + # experienced by persons in the previous month. Only rows for alive-persons must be returned. + # The names of the series of columns is taken to be the label of the cause of this disability. + # It will be recorded by the healthburden module as _. + + logger.debug(key='debug', data='This is DummyDisease reporting my daly values') + + df = self.sim.population.props # shortcut to population properties dataframe + health_values = pd.Series(index=df.index[df.is_alive], data=0.2) # dummy value + + return health_values # returns the series + + + +class DummyDiseaseEvent(RegularEvent, PopulationScopeEventMixin): + """ + This event is occurring regularly at one monthly intervals and controls the infection process + and onset of symptoms of DummyDisease. + """ + + def __init__(self, module): + super().__init__(module, frequency=DateOffset(months=1)) + assert isinstance(module, DummyDisease) + + def apply(self, population): + + logger.debug(key='debug', + data='This is DummyDiseaseEvent, tracking the disease progression of the population.') + + df = population.props + + # 1. get (and hold) index of currently infected and uninfected individuals + currently_infected = df.index[df.dm_is_infected & df.is_alive] + currently_susc = df.index[df.is_alive & (~df.dm_is_infected)] + + if df.is_alive.sum(): + prevalence = len(currently_infected) / ( + len(currently_infected) + len(currently_susc)) + else: + prevalence = 0 + + # 2. handle new infections + now_infected = self.module.rng.choice([True, False], + size=len(currently_susc), + p=[prevalence, 1 - prevalence]) + + # if any are newly infected... + if now_infected.sum(): + infected_idx = currently_susc[now_infected] + + df.loc[infected_idx, 'dm_is_infected'] = True + + else: + logger.debug(key='debug', data='This is DummyDiseaseEvent, no one is newly infected.') + + +class DummyDiseaseLoggingEvent(RegularEvent, PopulationScopeEventMixin): + def __init__(self, module): + """Produce a summmary of the numbers of people with respect to their 'DummyDisease status' + """ + # run this event every month + self.repeat = 1 + super().__init__(module, frequency=DateOffset(months=self.repeat)) + assert isinstance(module, DummyDisease) + + def apply(self, population): + # get some summary statistics + df = population.props + + infected_total = df.loc[df.is_alive, 'dm_is_infected'].sum() + proportion_infected = infected_total / len(df) + + logger.info(key='summary', + data={'PropInf': proportion_infected,}) diff --git a/src/tlo/methods/newborn_outcomes.py b/src/tlo/methods/newborn_outcomes.py index 3691bc6003..039d2e3c72 100644 --- a/src/tlo/methods/newborn_outcomes.py +++ b/src/tlo/methods/newborn_outcomes.py @@ -1308,6 +1308,8 @@ def report_daly_values(self): return health_values_df + def report_prevalence(self): + return None def run_if_care_of_the_receives_postnatal_check_cant_run(self, hsi_event): """ This function is called by HSI_NewbornOutcomes_ReceivesPostnatalCheck if the HSI is unable to diff --git a/src/tlo/methods/oesophagealcancer.py b/src/tlo/methods/oesophagealcancer.py index 8adc0614e1..dbeb1ce5da 100644 --- a/src/tlo/methods/oesophagealcancer.py +++ b/src/tlo/methods/oesophagealcancer.py @@ -577,6 +577,13 @@ def report_daly_values(self): return disability_series_for_alive_persons + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of oesophageal cancer for all individuals + df = self.sim.population.props + total_prev = len( + df[(df['is_alive']) & (df['oc_status'] != 'none')]) / len(df[df['is_alive']]) + + return {'Oesophageal Cancer': total_prev} def do_at_generic_first_appt( self, person_id: int, diff --git a/src/tlo/methods/other_adult_cancers.py b/src/tlo/methods/other_adult_cancers.py index 5aad8f971a..3ecaf7eee7 100644 --- a/src/tlo/methods/other_adult_cancers.py +++ b/src/tlo/methods/other_adult_cancers.py @@ -574,6 +574,14 @@ def report_daly_values(self): ] = self.daly_wts['metastatic_palliative_care'] return disability_series_for_alive_persons + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of other adult cancer for all individuals + df = self.sim.population.props + total_prev = len( + df[(df['is_alive']) & (df['oac_status'] != 'none')] + ) / len(df[df['is_alive']]) + + return {'Other Adult Cancers': total_prev} def do_at_generic_first_appt( self, diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py index 25bce6013f..d35725a05e 100644 --- a/src/tlo/methods/postnatal_supervisor.py +++ b/src/tlo/methods/postnatal_supervisor.py @@ -397,6 +397,9 @@ def report_daly_values(self): return daly_series + def report_prevalence(self): + return None + def apply_linear_model(self, lm, df): """ Helper function will apply the linear model (lm) on the dataframe (df) to get a probability of some event diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py index 7dd8819ab6..581911c8f5 100644 --- a/src/tlo/methods/pregnancy_supervisor.py +++ b/src/tlo/methods/pregnancy_supervisor.py @@ -59,6 +59,11 @@ def __init__(self, name=None, resourcefilepath=None): # across the length of pregnancy and the postnatal period self.mother_and_newborn_info = dict() + # Dictionary that records stillbirths + self.stillbirth_dates = { + 'stillbirth_date': pd.NaT + } + # This variable will store a Bitset handler for the property ps_abortion_complications self.abortion_complications = None @@ -67,7 +72,7 @@ def __init__(self, name=None, resourcefilepath=None): OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden', 'Malaria', 'CardioMetabolicDisorders', 'Hiv'} ADDITIONAL_DEPENDENCIES = { - 'Contraception', 'HealthSystem', 'Labour', 'CareOfWomenDuringPregnancy', 'Lifestyle'} + 'Contraception', 'HealthSystem', 'Labour', 'CareOfWomenDuringPregnancy', 'Lifestyle'} METADATA = {Metadata.DISEASE_MODULE, Metadata.USES_HEALTHBURDEN} @@ -145,7 +150,6 @@ def __init__(self, name=None, resourcefilepath=None): 'prob_spontaneous_abortion_death': Parameter( Types.LIST, 'underlying risk of death following an spontaneous abortion'), - # ANAEMIA... 'baseline_prob_anaemia_per_month': Parameter( Types.LIST, 'baseline risk of a woman developing anaemia secondary only to pregnant'), @@ -272,7 +276,6 @@ def __init__(self, name=None, resourcefilepath=None): 'rr_still_birth_chorio': Parameter( Types.LIST, 'relative risk of still birth in women with chorioamnionitis'), - # CARE SEEKING (NOT ANC)... 'prob_seek_care_pregnancy_complication': Parameter( Types.LIST, 'Probability that a woman who is pregnant will seek care in the event of a complication'), @@ -725,7 +728,7 @@ def chronic_daly_calculations(person, complication): # Otherwise, if the complication started this month she gets a daly weight relative to the number of # days she has experience the complication elif (self.sim.date - DateOffset(months=1)) <= mni[person][ - f'{complication}_onset'] <= self.sim.date: + f'{complication}_onset'] <= self.sim.date: days_since_onset = pd.Timedelta((self.sim.date - mni[person][f'{complication}_onset']), unit='d') daly_weight = days_since_onset.days * (p[f'{complication}'] / DAYS_IN_YEAR) @@ -741,7 +744,7 @@ def chronic_daly_calculations(person, complication): if mni[person][f'{complication}_resolution'] < mni[person][f'{complication}_onset']: if (mni[person][f'{complication}_resolution'] == (self.sim.date - DateOffset(months=1))) and \ - (mni[person][f'{complication}_onset'] == self.sim.date): + (mni[person][f'{complication}_onset'] == self.sim.date): return # Calculate daily weight and how many days this woman hasnt had the complication @@ -793,7 +796,7 @@ def chronic_daly_calculations(person, complication): for complication in ['abortion', 'abortion_haem', 'abortion_sep', 'ectopic', 'ectopic_rupture', 'mild_mod_aph', 'severe_aph', 'chorio', 'eclampsia', 'obstructed_labour', - 'sepsis', 'uterine_rupture', 'mild_mod_pph', 'severe_pph', 'secondary_pph']: + 'sepsis', 'uterine_rupture', 'mild_mod_pph', 'severe_pph', 'secondary_pph']: acute_daly_calculation(complication=complication, person=person) for complication in ['hypertension', 'gest_diab', 'mild_anaemia', 'moderate_anaemia', @@ -826,6 +829,27 @@ def chronic_daly_calculations(person, complication): return daly_series + def report_prevalence(self): + """ + This function reports the prevalence of intrapartum stillbirth for this module generated in the previous month + """ + # Filter out non-dictionary values and entries where 'stillbirth_date' is None + stillbirths_happened = { + key: value for key, value in self.stillbirth_dates.items() + if isinstance(value, dict) and value.get('stillbirth_date') is not None + } + + # Filter entries with valid 'stillbirth_date' that occurred in the last month + one_month_ago = self.sim.date - pd.DateOffset(months=1) + filtered_stillbirths = { + key: value for key, value in stillbirths_happened.items() + if isinstance(value.get('stillbirth_date'), pd.Timestamp) and value.get('stillbirth_date') >= one_month_ago + } + + antenatal_stillbirth_for_month = len(filtered_stillbirths) + + return {'Antenatal stillbirth': antenatal_stillbirth_for_month} + def pregnancy_supervisor_property_reset(self, id_or_index): """ This function is called when all properties housed in the PregnancySupervisorModule should be reset. For example @@ -946,7 +970,7 @@ def apply_risk_of_induced_abortion(self, gestation_of_interest): # This function follows the same pattern as apply_risk_of_spontaneous_abortion (only women with unintended # pregnancy may seek induced abortion) - at_risk =\ + at_risk = \ df.is_alive & df.is_pregnant & (df.ps_gestational_age_in_weeks == gestation_of_interest) & \ (df.ps_ectopic_pregnancy == 'none') @@ -1212,7 +1236,7 @@ def log_new_progressed_cases(disease): women_on_anti_htns = \ df.is_pregnant & df.is_alive & (df.ps_gestational_age_in_weeks == gestation_of_interest) & \ - (df.ps_htn_disorders.str.contains('gest_htn|mild_pre_eclamp|severe_gest_htn|severe_pre_eclamp'))\ + (df.ps_htn_disorders.str.contains('gest_htn|mild_pre_eclamp|severe_gest_htn|severe_pre_eclamp')) \ & ~df.la_currently_in_labour & df.ac_gest_htn_on_treatment # Check theres no accidental cross over between these subsets @@ -1316,7 +1340,7 @@ def apply_risk_of_antepartum_haemorrhage(self, gestation_of_interest): 'timing': 'antenatal'}) non_severe_women = (df.loc[antepartum_haemorrhage.loc[antepartum_haemorrhage].index, - 'ps_antepartum_haemorrhage'] != 'severe') + 'ps_antepartum_haemorrhage'] != 'severe') non_severe_women.loc[non_severe_women].index.to_series().apply( pregnancy_helper_functions.store_dalys_in_mni, mni=mni, mni_variable='mild_mod_aph_onset', @@ -1442,6 +1466,8 @@ def update_variables_post_still_birth_for_data_frame(self, women): self.sim.modules['CareOfWomenDuringPregnancy'].care_of_women_in_pregnancy_property_reset( id_or_index=women.index) + self.stillbirth_dates['stillbirth_date'] = self.sim.date + def update_variables_post_still_birth_for_individual(self, individual_id): """ This function is called to reset all the relevant pregnancy and treatment variables for a woman who undergoes @@ -1577,7 +1603,7 @@ def apply_risk_of_death_from_monthly_complications(self, individual_id): # Function checks df for any potential cause of death, uses CFR parameters to determine risk of death # (either from one or multiple causes) and if death occurs returns the cause potential_cause_of_death = pregnancy_helper_functions.check_for_risk_of_death_from_cause_maternal( - self, individual_id=individual_id, timing='antenatal') + self, individual_id=individual_id, timing='antenatal') # If a cause is returned death is scheduled if potential_cause_of_death: @@ -1628,7 +1654,7 @@ def schedule_first_anc_contact_for_new_pregnancy(self, gestation_of_interest): # Of the women who will not attend ANC4 early, we determine who will attend ANC4 later in pregnancy late_initiation_anc4 = pd.Series(self.rng.random_sample( len(early_initiation_anc4.loc[~early_initiation_anc4])) < params['prob_late_initiation_anc4'], - index=early_initiation_anc4.loc[~early_initiation_anc4].index) + index=early_initiation_anc4.loc[~early_initiation_anc4].index) # Check there are no duplicates for v in late_initiation_anc4.loc[late_initiation_anc4].index: @@ -1641,7 +1667,7 @@ def schedule_first_anc_contact_for_new_pregnancy(self, gestation_of_interest): # Select any women who are not predicted to attend ANC4 anc_below_4 = \ - df.is_alive & df.is_pregnant & (df.ps_gestational_age_in_weeks == gestation_of_interest) &\ + df.is_alive & df.is_pregnant & (df.ps_gestational_age_in_weeks == gestation_of_interest) & \ (df.ps_ectopic_pregnancy == 'none') & ~df.ps_anc4 # See if any of the women who wont attend ANC4 will still attend their first visit early in pregnancy @@ -1680,10 +1706,10 @@ def do_at_generic_first_appt_emergency( **kwargs, ) -> None: scheduling_options = { - "priority": 0, - "topen": self.sim.date, - "tclose": self.sim.date + pd.DateOffset(days=1), - } + "priority": 0, + "topen": self.sim.date, + "tclose": self.sim.date + pd.DateOffset(days=1), + } # ----- ECTOPIC PREGNANCY ----- if individual_properties["ps_ectopic_pregnancy"] != 'none': @@ -1706,12 +1732,14 @@ def do_at_generic_first_appt_emergency( ) schedule_hsi_event(event, **scheduling_options) + class PregnancySupervisorEvent(RegularEvent, PopulationScopeEventMixin): """ This is the PregnancySupervisorEvent, it is a weekly event which has four primary functions. 1.) It updates the gestational age (in weeks) of all women who are pregnant 2.) It applies monthly risk of key complications associated with pregnancy 3.) It determines if women who experience life seeking complications associated with pregnancy will seek care 4.) It applies risk of death and stillbirth to women who do not seek care following complications""" + def __init__(self, module, ): super().__init__(module, frequency=DateOffset(weeks=1)) @@ -1779,7 +1807,7 @@ def apply(self, population): df.is_alive & df.is_pregnant & (df.ps_gestational_age_in_weeks == 3) & (df.ps_ectopic_pregnancy == 'none') multiples = pd.Series(self.module.rng.random_sample(len(multiple_risk.loc[multiple_risk])) - < params['prob_multiples'], index=multiple_risk.loc[multiple_risk].index) + < params['prob_multiples'], index=multiple_risk.loc[multiple_risk].index) df.loc[multiples.loc[multiples].index, 'ps_multiple_pregnancy'] = True @@ -1962,7 +1990,6 @@ def apply(self, individual_id): care_seeking_result = self.module.care_seeking_pregnancy_loss_complications(individual_id, cause='ectopic_pre_rupture') if not care_seeking_result: - # For women who dont seek care (and get treatment) we schedule EctopicPregnancyRuptureEvent (simulating # fallopian tube rupture) in an additional 2-4 weeks from this event (if care seeking is unsuccessful # then this event is scheduled by the HSI (did_not_run) @@ -2062,12 +2089,12 @@ def apply(self, individual_id): if (not mother.is_alive or not mother.is_pregnant or (mother.ps_gestational_age_in_weeks < 20) or - ((mother.ps_gest_diab == 'none') and (mother.ac_gest_diab_on_treatment == 'none'))): + ((mother.ps_gest_diab == 'none') and (mother.ac_gest_diab_on_treatment == 'none'))): return # We apply a probability that the treatment this woman is receiving for her GDM (diet and exercise/ # oral anti-diabetics) will not control this womans hyperglycaemia - if self.module.rng.random_sample() > params[f'prob_glycaemic_control_{mother.ac_gest_diab_on_treatment }']: + if self.module.rng.random_sample() > params[f'prob_glycaemic_control_{mother.ac_gest_diab_on_treatment}']: # If so we reset her diabetes status as uncontrolled, her treatment is ineffective at reducing # risk of still birth, and when she returns for follow up she should be started on the next # treatment available @@ -2090,7 +2117,7 @@ def apply(self, individual_id): if (not df.at[individual_id, 'is_alive'] or not df.at[individual_id, 'is_pregnant'] or (individual_id not in mni) or - (not (mni[individual_id]['pred_syph_infect'] == self.sim.date))): + (not (mni[individual_id]['pred_syph_infect'] == self.sim.date))): return df.at[individual_id, 'ps_syphilis'] = True @@ -2102,6 +2129,7 @@ def apply(self, individual_id): class ParameterUpdateEvent(Event, PopulationScopeEventMixin): """This is ParameterUpdateEvent. It is scheduled to occur once on 2015 to update parameters being used by the maternal and newborn health model""" + def __init__(self, module): super().__init__(module) @@ -2151,6 +2179,7 @@ class PregnancyAnalysisEvent(Event, PopulationScopeEventMixin): either of the module parameters the signify analysis is being conducted are set to True, then key parameters are overridden to alter the coverage and/or quality of routine antenatal care delivery. """ + def __init__(self, module): super().__init__(module) @@ -2163,7 +2192,7 @@ def apply(self, population): params['alternative_anc_quality'] or \ params['alternative_ip_anc_quality'] or \ params['sens_analysis_max'] or \ - params['sens_analysis_min']: + params['sens_analysis_min']: # Update this parameter which is a signal used in the pregnancy_helper_function_file to ensure that # alternative functionality for determining availability of interventions only occurs when analysis is @@ -2173,7 +2202,6 @@ def apply(self, population): # When this parameter is set as True, the following parameters are overridden when the event is called. # Otherwise no parameters are updated. if params['alternative_anc_coverage']: - # Reset the intercept parameter of the equation determining care seeking for ANC4+ and scale the model target = params['anc_availability_odds'] params['odds_early_init_anc4'] = 1 @@ -2244,7 +2272,7 @@ def apply(self, population): women_with_previous_pe = len(df.index[(df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50) & df.ps_prev_pre_eclamp)]) women_with_hysterectomy = len(df.index[(df.is_alive & (df.sex == 'F') & (df.age_years > 14) & - (df.age_years < 50) & df.la_has_had_hysterectomy)]) + (df.age_years < 50) & df.la_has_had_hysterectomy)]) yearly_prev_sa = (women_with_previous_sa / women_reproductive_age) * 100 yearly_prev_pe = (women_with_previous_pe / women_reproductive_age) * 100 @@ -2254,7 +2282,7 @@ def apply(self, population): for parity in [0, 1, 2, 3, 4, 5]: if parity < 5: par = len(df.index[(df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50) & - (df.la_parity == parity))]) + (df.la_parity == parity))]) else: par = len(df.index[(df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50) & (df.la_parity >= parity))]) diff --git a/src/tlo/methods/prostate_cancer.py b/src/tlo/methods/prostate_cancer.py index dbbe2c427f..d563558b6b 100644 --- a/src/tlo/methods/prostate_cancer.py +++ b/src/tlo/methods/prostate_cancer.py @@ -256,7 +256,7 @@ def initialise_population(self, population): if pc_status_.sum(): sum_probs = sum(p['init_prop_prostate_ca_stage']) if sum_probs > 0: - prob_by_stage_of_cancer_if_cancer = [i/sum_probs for i in p['init_prop_prostate_ca_stage']] + prob_by_stage_of_cancer_if_cancer = [i / sum_probs for i in p['init_prop_prostate_ca_stage']] assert (sum(prob_by_stage_of_cancer_if_cancer) - 1.0) < 1e-10 df.loc[pc_status_, "pc_status"] = self.rng.choice( [val for val in df.pc_status.cat.categories if val != 'none'], @@ -305,7 +305,7 @@ def initialise_population(self, population): .when("none", 0.0) .when("prostate_confined", p['init_prop_urinary_symptoms_by_stage'][0]) .when("local_ln", p['init_prop_urinary_symptoms_by_stage'][1]) - .when("metastatic", p['init_prop_urinary_symptoms_by_stage'][2]) + .when("metastatic", p['init_prop_urinary_symptoms_by_stage'][2]) ) has_urinary_symptoms_at_init = lm_init_urinary.predict(df.loc[df.is_alive], self.rng) self.sim.modules['SymptomManager'].change_symptom( @@ -417,7 +417,7 @@ def initialise_simulation(self, sim): Predictor('had_treatment_during_this_stage', external=True).when(True, p['rr_local_ln_prostate_ca_undergone_curative_treatment']), Predictor('pc_status').when('prostate_confined', 1.0) - .otherwise(0.0) + .otherwise(0.0) ) lm['metastatic'] = LinearModel( @@ -426,7 +426,7 @@ def initialise_simulation(self, sim): Predictor('had_treatment_during_this_stage', external=True).when(True, p['rr_metastatic_prostate_ca_undergone_curative_treatment']), Predictor('pc_status').when('local_ln', 1.0) - .otherwise(0.0) + .otherwise(0.0) ) # Check that the dict labels are correct as these are used to set the value of pc_status @@ -589,6 +589,15 @@ def report_daly_values(self): return disability_series_for_alive_persons + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of prostate cancer for all individuals + df = self.sim.population.props + total_prev = len( + df[(df['is_alive']) & (df['pc_status'] != 'none')] + ) / len(df[df['is_alive']]) + + return {'Prostate Cancer': total_prev} + def do_at_generic_first_appt( self, person_id: int, @@ -730,9 +739,9 @@ def apply(self, person_id, squeeze_factor): # todo: stratify by pc_status # Use a psa test to assess whether the person has prostate cancer: dx_result = hs.dx_manager.run_dx_test( - dx_tests_to_run='psa_for_prostate_cancer', - hsi_event=self - ) + dx_tests_to_run='psa_for_prostate_cancer', + hsi_event=self + ) # Check consumable availability cons_avail = self.get_consumables(item_codes=self.module.item_codes_prostate_can['screening_psa_test_optional']) @@ -787,13 +796,13 @@ def apply(self, person_id, squeeze_factor): if dx_result and cons_avail: # send for biopsy hs.schedule_hsi_event( - hsi_event=HSI_ProstateCancer_Investigation_Following_psa_positive( - module=self.module, - person_id=person_id - ), - priority=0, - topen=self.sim.date, - tclose=None + hsi_event=HSI_ProstateCancer_Investigation_Following_psa_positive( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None ) @@ -823,7 +832,7 @@ def apply(self, person_id, squeeze_factor): cons_available = self.get_consumables(item_codes=self.module.item_codes_prostate_can['screening_biopsy_core'], optional_item_codes=self.module.item_codes_prostate_can[ - 'screening_biopsy_endoscopy_cystoscopy_optional']) + 'screening_biopsy_endoscopy_cystoscopy_optional']) if cons_available: # If consumables are available update the use of equipment and run the dx_test representing the biopsy @@ -1093,11 +1102,11 @@ def apply(self, population): # todo: the .between function I think includes the two dates so events on these dates counted twice # todo:_ I think we need to replace with date_lastlog <= x < date_now n_newly_diagnosed_prostate_confined = ( - df.pc_date_diagnosis.between(date_lastlog, date_now) & (df.pc_status == 'prostate_confined')).sum() + df.pc_date_diagnosis.between(date_lastlog, date_now) & (df.pc_status == 'prostate_confined')).sum() n_newly_diagnosed_local_ln = ( - df.pc_date_diagnosis.between(date_lastlog, date_now) & (df.pc_status == 'local_ln')).sum() + df.pc_date_diagnosis.between(date_lastlog, date_now) & (df.pc_status == 'local_ln')).sum() n_newly_diagnosed_metastatic = ( - df.pc_date_diagnosis.between(date_lastlog, date_now) & (df.pc_status == 'metastatic')).sum() + df.pc_date_diagnosis.between(date_lastlog, date_now) & (df.pc_status == 'metastatic')).sum() n_diagnosed = (df.is_alive & ~pd.isnull(df.pc_date_diagnosis)).sum() diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 18c1987483..c441b832b0 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2439,6 +2439,24 @@ def report_daly_values(self): disability_series_for_alive_persons = df.loc[df.is_alive, "rt_disability"] return disability_series_for_alive_persons + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of RTIs for all individuals + df = self.sim.population.props + df_valid_dates = df[df['rt_date_inj'].notna()] + if df_valid_dates.empty: + total_prev = 0 # or you could use np.nan if you prefer + else: + # Calculate total prevalence for individuals with non-NaT injury dates + total_prev = len( + df_valid_dates[ + (df_valid_dates['is_alive']) & + (df_valid_dates['rt_inj_severity'] != 'none') & + (df_valid_dates['rt_date_inj'] >= (self.sim.date - DateOffset(months=1))) + ] + ) / len(df[df['is_alive']]) + + return {'RTI': total_prev} + def rti_assign_injuries(self, number): """ A function that can be called specifying the number of people affected by RTI injuries diff --git a/src/tlo/methods/schisto.py b/src/tlo/methods/schisto.py index 0e9735286a..a1a67eadbb 100644 --- a/src/tlo/methods/schisto.py +++ b/src/tlo/methods/schisto.py @@ -214,6 +214,14 @@ def get_total_disability_weight(list_of_symptoms: list) -> float: return pd.Series(index=df.index[df.is_alive], data=0.0).add(disability_weights_for_each_person_with_symptoms, fill_value=0.0) + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of schisto for all individuals + df = self.sim.population.props + is_infected = (df[self.cols_of_infection_status].isin(['Low-infection', 'High-infection'])).any() + total_prev = len(is_infected)/ len(df[df['is_alive']]) + + return {'Schisto': total_prev} + def do_effect_of_treatment(self, person_id: Union[int, Sequence[int]]) -> None: """Do the effects of a treatment administered to a person or persons. This can be called for a person who is infected and receiving treatment following a diagnosis, or for a person who is receiving treatment as part of a diff --git a/src/tlo/methods/skeleton.py b/src/tlo/methods/skeleton.py index 3f21cc4fff..1690326e54 100644 --- a/src/tlo/methods/skeleton.py +++ b/src/tlo/methods/skeleton.py @@ -32,6 +32,7 @@ class Skeleton(Module): - `on_birth(mother, child)` - `on_hsi_alert(person_id, treatment_id)` [If this is disease module] - `report_daly_values()` [If this is disease module] + - `report_prevalence()` [If this is disease module] """ # Declares modules that need to be registered in simulation and initialised before @@ -148,6 +149,15 @@ def report_daly_values(self): """ raise NotImplementedError + def report_prevalence(self): + """ + This reports on the prevalence of a disease/condition or the monthly rate of certain events. + If multiple conditions in a module are defined, a pd.DataFrame must be returned with a column + corresponding to each cause (but if only one cause in module is defined, the pd.Series does not + need to be given a specific name). + """ + raise NotImplementedError + def on_hsi_alert(self, person_id, treatment_id): """ This is called whenever there is an HSI event commissioned by one of the other disease modules. diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 623ee2e483..b7aec9f7c6 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -1009,6 +1009,15 @@ def report_daly_values(self): return health_values.loc[df.is_alive] + def report_prevalence(self): + # This returns dataframe that reports on the prevalence of malaria for all individuals + + df = self.sim.population.props + df_tmp = df.loc[df.is_alive] + num_active_tb_cases = len(df_tmp[(df_tmp.tb_inf == "active") & df_tmp.is_alive]) + total_prev = num_active_tb_cases / len(df_tmp) + return {'TB': total_prev} + def calculate_untreated_proportion(self, population, strain): """ calculate the proportion of active TB cases not on correct treatment @@ -2752,9 +2761,7 @@ def apply(self, population): # ACTIVE num_active_tb_cases = len(df[(df.tb_inf == "active") & df.is_alive]) prev_active = num_active_tb_cases / len(df[df.is_alive]) - assert prev_active <= 1 - # prevalence of active TB in adults num_active_adult = len( df[(df.tb_inf == "active") & (df.age_years >= 15) & df.is_alive] diff --git a/tests/test_record_prevalence_healthburden_class.py b/tests/test_record_prevalence_healthburden_class.py new file mode 100644 index 0000000000..2e4eda693a --- /dev/null +++ b/tests/test_record_prevalence_healthburden_class.py @@ -0,0 +1,113 @@ +import os +from pathlib import Path + +from tlo import Date, Simulation +from tlo.analysis.utils import parse_log_file +from tlo.methods import demography, enhanced_lifestyle, healthburden, mockitis +from tlo.methods.fullmodel import fullmodel + +resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' +outputpath = Path("./outputs/") + +start_date = Date(2010, 1, 1) +end_date = Date(2011, 1, 12) + +popsize = 1000 +do_sim = True + + +def check_dtypes(simulation): + df = simulation.population.props + orig = simulation.population.new_row + assert (df.dtypes == orig.dtypes).all() + + +def log_prevalences_from_sim_func(sim): + """Logs the prevalence of disease monthly""" + health_burden = sim.modules['HealthBurden'] + monthly_prevalence = health_burden.prevalence_of_diseases + monthly_prevalence['date'] = sim.date.year + return monthly_prevalence + + +def test_run_with_healthburden_with_real_diseases(tmpdir, seed): + """Check that everything runs in the simple cases of Mockitis and Chronic Syndrome and that outputs are as expected.""" + + sim = Simulation(start_date=start_date, seed=seed, log_config={'filename': 'test_log', 'directory': outputpath}) + sim.register(*fullmodel( + resourcefilepath=resourcefilepath, + use_simplified_births=False, )) + sim.make_initial_population(n=popsize) + sim.modules['HealthBurden'].parameters['logging_frequency_prevalence'] = 'day' + sim.simulate(end_date=end_date) + check_dtypes(sim) + output = parse_log_file(sim.log_filepath) + + prevalence = output['tlo.methods.healthburden']['prevalence_of_diseases'] + + # check to see if the monthly prevalence is calculated correctly NB for only one month + + log_prevalences_from_sim = log_prevalences_from_sim_func(sim) + for log_date in log_prevalences_from_sim['date']: + if log_date in prevalence['date'].values: + prevalence_row = prevalence.loc[prevalence['date'] == log_date].squeeze() + if 'date' in prevalence.columns: + prevalence_row = prevalence_row.drop('date') + + sim_row = log_prevalences_from_sim.loc[ + log_prevalences_from_sim['date'] == log_date].squeeze() + + for column in prevalence_row.index: + # Compare the values between the two DataFrames for this date and column + if prevalence_row[column] != sim_row[column]: + pass + else: + # Handle cases where the date is not found in prevalence DataFrame + pass + + ## See if the registered modules are reporting prevalences as they should + columns = prevalence.columns + excluded_modules = ['Lifestyle', 'HealthBurden', 'HealthSeekingBehaviour', 'SymptomManager', 'Epi', 'HealthSystem', + 'SimplifiedBirths', 'Contraception', 'CareOfWomenDuringPregnancy'] # don't return prevalences + + assert 'chronic_ischemic_hd' in columns + + for module in sim.modules: + if module not in excluded_modules: + if module == 'CardioMetabolicDisorders': + corresponding_diseases = ['chronic_ischemic_hd', 'chronic_kidney_disease', 'chronic_lower_back_pain', + 'diabetes', 'hypertension'] + elif module == 'Demography': + corresponding_diseases = ['MMR', 'NMR'] + elif module == 'PregnancySupervisor': + corresponding_diseases = ['Antenatal stillbirth'] + elif module == 'Labour': + corresponding_diseases = ['Intrapartum stillbirth'] + assert all(disease in columns for disease in corresponding_diseases), \ + f"Not all diseases for module '{module}' are in columns." + + +def test_structure_logging_dummy_disease(tmpdir, seed): + start_date = Date(2010, 1, 1) + end_date = Date(2011, 1, 1) + + sim = Simulation(start_date=start_date, seed=0, log_config={'filename': 'tmp', 'directory': tmpdir}) + sim.register( + demography.Demography(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + mockitis.DummyDisease(resourcefilepath=resourcefilepath, ), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + sort_modules=False, + check_all_dependencies=False + ) + + sim.make_initial_population(n=popsize) + sim.modules['HealthBurden'].parameters['logging_frequency_prevalence'] = 'month' + sim.simulate(end_date=end_date) + output = parse_log_file(sim.log_filepath) + + prevalence_healthburden_log = output['tlo.methods.healthburden']['prevalence_of_diseases']['DummyDisease'] + prevalence_dummy_log = output['tlo.methods.mockitis']["summary"]["PropInf"] + + for row in range(len(prevalence_healthburden_log) -1): # has extra log for first day + assert prevalence_healthburden_log[row + 1] == prevalence_dummy_log[row]