Source code for tempdataset.core.datasets.app_usage

"""
App usage dataset generator.

Generates realistic mobile/web application usage data.
"""

import random
from datetime import datetime, timedelta
from typing import List, Dict, Any

from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils


[docs] class AppUsageDataset(BaseDataset): """App usage dataset generator for application analytics.""" def __init__(self, rows: int = 500): super().__init__(rows) self.faker_utils = get_faker_utils() self._init_data_lists() self._usage_counter = 1 def _init_data_lists(self) -> None: self.app_versions = ['1.0.0', '1.1.0', '1.2.0', '1.2.1', '2.0.0', '2.0.1', '2.1.0'] self.device_models = { 'iOS': [ 'iPhone 14 Pro', 'iPhone 14', 'iPhone 13 Pro', 'iPhone 13', 'iPhone 12', 'iPhone SE', 'iPad Pro', 'iPad Air', 'iPad' ], 'Android': [ 'Samsung Galaxy S23', 'Samsung Galaxy S22', 'Google Pixel 7', 'OnePlus 11', 'Xiaomi 13', 'Samsung Galaxy A54', 'Huawei P50' ] } self.device_os_versions = { 'iOS': ['16.0', '16.1', '16.2', '15.7', '15.6', '14.8'], 'Android': ['13', '12', '11', '10', '9'] } self.network_types = ['WiFi', '4G', '5G', '3G', 'Edge'] self.app_features = [ 'Login', 'Profile', 'Settings', 'Search', 'Notifications', 'Chat', 'Camera', 'Gallery', 'Maps', 'Payment', 'Share', 'Favorites', 'History', 'Help', 'Feedback', 'Dark Mode' ] self.countries = [ 'United States', 'India', 'China', 'Brazil', 'Japan', 'Germany', 'United Kingdom', 'France', 'Canada', 'Australia' ]
[docs] def generate(self) -> List[Dict[str, Any]]: if self.seed is not None: random.seed(self.seed) self.faker_utils.set_seed(self.seed) return [self._generate_row() for _ in range(self.rows)]
def _generate_row(self) -> Dict[str, Any]: # Basic usage info usage_id = f"APPUSE-2025-{self._usage_counter:06d}" self._usage_counter += 1 user_id = f"USER-{random.randint(100000, 999999)}" app_version = random.choice(self.app_versions) # Device info device_os = random.choice(['iOS', 'Android']) device_model = random.choice(self.device_models[device_os]) device_os_version = f"{device_os} {random.choice(self.device_os_versions[device_os])}" # Session info session_id = f"SESS-{random.randint(100000, 999999)}" # Session timing session_start_time = self.faker_utils.date_between( datetime.now() - timedelta(days=7), datetime.now() ) session_start_time = datetime.combine( session_start_time, datetime.min.time().replace( hour=random.randint(6, 23), minute=random.randint(0, 59), second=random.randint(0, 59) ) ) # Session duration based on usage patterns session_duration_minutes = random.choices( [1, 5, 15, 30, 60, 120], # Common session lengths weights=[0.3, 0.25, 0.2, 0.15, 0.07, 0.03] )[0] session_end_time = session_start_time + timedelta(minutes=session_duration_minutes) screen_time_seconds = session_duration_minutes * 60 # Features used num_features = random.randint(1, 8) features_used = random.sample(self.app_features, num_features) features_used_str = ', '.join(features_used) # Events and interactions events_triggered = random.randint(5, 100) # Network type network_type = random.choices( self.network_types, weights=[0.6, 0.25, 0.10, 0.04, 0.01] # WiFi most common )[0] # App stability crashes_occurred = random.choices([0, 1, 2], weights=[0.95, 0.04, 0.01])[0] # In-app purchases purchase_probability = 0.05 # 5% of sessions have purchases if random.random() < purchase_probability: in_app_purchases = round(random.uniform(0.99, 49.99), 2) else: in_app_purchases = 0.0 # Location location_country = random.choice(self.countries) return { 'usage_id': usage_id, 'user_id': user_id, 'app_version': app_version, 'device_model': device_model, 'device_os': device_os_version, 'session_id': session_id, 'session_start_time': session_start_time.strftime('%Y-%m-%d %H:%M:%S'), 'session_end_time': session_end_time.strftime('%Y-%m-%d %H:%M:%S'), 'features_used': features_used_str, 'events_triggered': events_triggered, 'screen_time_seconds': screen_time_seconds, 'network_type': network_type, 'crashes_occurred': crashes_occurred, 'in_app_purchases': in_app_purchases, 'location_country': location_country }
[docs] def get_schema(self) -> Dict[str, str]: return { 'usage_id': 'string', 'user_id': 'string', 'app_version': 'string', 'device_model': 'string', 'device_os': 'string', 'session_id': 'string', 'session_start_time': 'datetime', 'session_end_time': 'datetime', 'features_used': 'string', 'events_triggered': 'integer', 'screen_time_seconds': 'integer', 'network_type': 'string', 'crashes_occurred': 'integer', 'in_app_purchases': 'float', 'location_country': 'string' }