Source code for tempdataset.core.datasets.medical_history

"""
Medical history dataset generator.

Generates realistic patient medical history records.
"""

import random
from datetime import datetime, timedelta
from typing import List, Dict, Any

from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils


[docs] class MedicalHistoryDataset(BaseDataset): """Medical history dataset generator for patient conditions.""" def __init__(self, rows: int = 500): super().__init__(rows) self.faker_utils = get_faker_utils() self._init_data_lists() self._history_counter = 1 def _init_data_lists(self) -> None: # Medical conditions with ICD-10 codes and typical treatments self.medical_conditions = { 'Hypertension': { 'codes': ['I10', 'I11.9', 'I12.9'], 'treatments': ['Lisinopril therapy', 'Lifestyle modification', 'DASH diet', 'Amlodipine therapy'], 'chronic_likelihood': 0.9 }, 'Type 2 Diabetes Mellitus': { 'codes': ['E11.9', 'E11.65', 'E11.40'], 'treatments': ['Metformin therapy', 'Insulin therapy', 'Diet modification', 'Glucose monitoring'], 'chronic_likelihood': 0.95 }, 'Hyperlipidemia': { 'codes': ['E78.5', 'E78.0', 'E78.2'], 'treatments': ['Statin therapy', 'Diet modification', 'Exercise program', 'Atorvastatin therapy'], 'chronic_likelihood': 0.8 }, 'Coronary Artery Disease': { 'codes': ['I25.10', 'I25.9', 'I25.119'], 'treatments': ['Cardiac catheterization', 'Stent placement', 'Beta-blocker therapy', 'Aspirin therapy'], 'chronic_likelihood': 0.9 }, 'Asthma': { 'codes': ['J45.9', 'J45.909', 'J45.40'], 'treatments': ['Albuterol inhaler', 'Inhaled corticosteroids', 'Allergy management', 'Bronchodilator therapy'], 'chronic_likelihood': 0.85 }, 'Depression': { 'codes': ['F32.9', 'F33.9', 'F32.2'], 'treatments': ['SSRI therapy', 'Cognitive behavioral therapy', 'Psychotherapy', 'Sertraline therapy'], 'chronic_likelihood': 0.7 }, 'Anxiety Disorder': { 'codes': ['F41.9', 'F41.1', 'F40.9'], 'treatments': ['Anxiolytic therapy', 'Cognitive behavioral therapy', 'Relaxation techniques', 'SSRI therapy'], 'chronic_likelihood': 0.6 }, 'Osteoarthritis': { 'codes': ['M19.90', 'M15.9', 'M19.011'], 'treatments': ['NSAIDs', 'Physical therapy', 'Joint injection', 'Weight management'], 'chronic_likelihood': 0.95 }, 'COPD': { 'codes': ['J44.1', 'J44.0', 'J44.10'], 'treatments': ['Bronchodilator therapy', 'Inhaled corticosteroids', 'Pulmonary rehabilitation', 'Oxygen therapy'], 'chronic_likelihood': 0.95 }, 'Chronic Kidney Disease': { 'codes': ['N18.9', 'N18.6', 'N18.3'], 'treatments': ['ACE inhibitor therapy', 'Dietary restriction', 'Phosphate binders', 'Nephrology consultation'], 'chronic_likelihood': 0.9 }, 'Atrial Fibrillation': { 'codes': ['I48.91', 'I48.0', 'I48.1'], 'treatments': ['Anticoagulation therapy', 'Rate control', 'Cardioversion', 'Ablation therapy'], 'chronic_likelihood': 0.8 }, 'Migraine': { 'codes': ['G43.909', 'G43.919', 'G43.009'], 'treatments': ['Triptan therapy', 'Preventive medication', 'Lifestyle modification', 'Botox injections'], 'chronic_likelihood': 0.7 }, 'Pneumonia': { 'codes': ['J18.9', 'J15.9', 'J44.0'], 'treatments': ['Antibiotic therapy', 'Supportive care', 'Hospitalization', 'Respiratory therapy'], 'chronic_likelihood': 0.1 }, 'Gastroesophageal Reflux Disease': { 'codes': ['K21.9', 'K21.0', 'K20.9'], 'treatments': ['PPI therapy', 'H2 blocker therapy', 'Lifestyle modification', 'Dietary changes'], 'chronic_likelihood': 0.8 }, 'Sleep Apnea': { 'codes': ['G47.33', 'G47.30', 'G47.39'], 'treatments': ['CPAP therapy', 'Weight loss', 'Sleep study', 'Oral appliance'], 'chronic_likelihood': 0.9 } } self.condition_statuses = ['Active', 'Resolved', 'Chronic']
[docs] def generate(self) -> List[Dict[str, Any]]: if self.seed is not None: random.seed(self.seed) self.faker_utils.set_seed(self.seed) return [self._generate_row() for _ in range(self.rows)]
def _generate_row(self) -> Dict[str, Any]: # Basic history record info history_id = f"HIS-2025-{self._history_counter:06d}" self._history_counter += 1 patient_id = f"PAT-2025-{random.randint(1, 999999):06d}" physician_id = f"PHY-{random.randint(1000, 9999)}" if random.random() < 0.8 else None # Select medical condition condition_name = random.choice(list(self.medical_conditions.keys())) condition_info = self.medical_conditions[condition_name] condition_code = random.choice(condition_info['codes']) if random.random() < 0.9 else None # Diagnosis date - can be anywhere from 10 years ago to now diagnosis_date = self.faker_utils.date_between( datetime.now() - timedelta(days=3650), # Up to 10 years ago datetime.now() ) # Determine status based on condition type chronic_likelihood = condition_info['chronic_likelihood'] if random.random() < chronic_likelihood: status = 'Chronic' else: status = random.choices(['Active', 'Resolved'], weights=[0.3, 0.7])[0] # Treatment information treatment_name = random.choice(condition_info['treatments']) if random.random() < 0.8 else None if treatment_name: # Treatment typically starts close to diagnosis date treatment_start_delay = random.randint(0, 30) # 0-30 days after diagnosis treatment_start_date = diagnosis_date + timedelta(days=treatment_start_delay) # Treatment end date if status == 'Resolved': # Resolved conditions have treatment end dates treatment_duration = random.randint(30, 365) # 1 month to 1 year treatment_end_date = treatment_start_date + timedelta(days=treatment_duration) elif status == 'Chronic': # Chronic conditions may have ongoing treatment if random.random() < 0.3: # 30% have treatment end dates (switched treatments) treatment_duration = random.randint(90, 1095) # 3 months to 3 years treatment_end_date = treatment_start_date + timedelta(days=treatment_duration) else: treatment_end_date = None # Ongoing treatment else: # Active # Active conditions may or may not have treatment end dates if random.random() < 0.5: treatment_duration = random.randint(7, 180) # 1 week to 6 months treatment_end_date = treatment_start_date + timedelta(days=treatment_duration) else: treatment_end_date = None else: treatment_start_date = None treatment_end_date = None # Notes (optional) notes_options = [ 'Patient responding well to treatment', 'Medication dosage adjusted', 'Requires regular monitoring', 'Patient non-compliant with medication', 'Condition well-controlled', 'Referred to specialist', 'Family history positive', 'Patient education provided', 'Lifestyle modifications recommended', None ] notes = random.choice(notes_options) if random.random() < 0.4 else None return { 'history_id': history_id, 'patient_id': patient_id, 'condition_name': condition_name, 'condition_code': condition_code, 'diagnosis_date': diagnosis_date.strftime('%Y-%m-%d'), 'status': status, 'treatment_name': treatment_name, 'treatment_start_date': treatment_start_date.strftime('%Y-%m-%d') if treatment_start_date else None, 'treatment_end_date': treatment_end_date.strftime('%Y-%m-%d') if treatment_end_date else None, 'physician_id': physician_id, 'notes': notes }
[docs] def get_schema(self) -> Dict[str, str]: return { 'history_id': 'string', 'patient_id': 'string', 'condition_name': 'string', 'condition_code': 'string', 'diagnosis_date': 'date', 'status': 'string', 'treatment_name': 'string', 'treatment_start_date': 'date', 'treatment_end_date': 'date', 'physician_id': 'string', 'notes': 'string' }