Source code for tempdataset.core.datasets.performance

"""
Performance monitoring dataset generator.

Generates realistic application performance metrics.
"""

import random
from datetime import datetime, timedelta
from typing import List, Dict, Any

from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils



[docs]
class PerformanceDataset(BaseDataset):
    """Performance monitoring dataset generator for application performance tracking."""
    
    def __init__(self, rows: int = 500):
        super().__init__(rows)
        self.faker_utils = get_faker_utils()
        self._init_data_lists()
        self._metric_counter = 1
    
    def _init_data_lists(self) -> None:
        self.applications = [
            'web-frontend', 'user-service', 'order-service', 'payment-service',
            'inventory-service', 'notification-service', 'auth-service',
            'report-service', 'mobile-api', 'admin-panel', 'analytics-service'
        ]
        
        self.endpoints = {
            'web-frontend': ['/home', '/products', '/checkout', '/profile', '/search'],
            'user-service': ['/api/users', '/api/users/{id}', '/api/users/search', '/api/users/profile'],
            'order-service': ['/api/orders', '/api/orders/{id}', '/api/orders/status', '/api/orders/history'],
            'payment-service': ['/api/payments/process', '/api/payments/{id}', '/api/payments/refund'],
            'inventory-service': ['/api/inventory', '/api/inventory/check', '/api/inventory/update'],
            'notification-service': ['/api/notifications/send', '/api/notifications/{id}'],
            'auth-service': ['/api/auth/login', '/api/auth/logout', '/api/auth/refresh', '/api/auth/validate'],
            'report-service': ['/api/reports/sales', '/api/reports/users', '/api/reports/performance'],
            'mobile-api': ['/mobile/auth', '/mobile/products', '/mobile/orders', '/mobile/profile'],
            'admin-panel': ['/admin/dashboard', '/admin/users', '/admin/orders', '/admin/settings'],
            'analytics-service': ['/api/analytics/events', '/api/analytics/reports', '/api/analytics/metrics']
        }
        
        self.environments = ['production', 'staging', 'development']
        
        self.regions = ['us-east-1', 'us-west-2', 'eu-west-1', 'ap-southeast-1']
        
        self.performance_metrics = [
            'response_time', 'throughput', 'error_rate', 'cpu_usage',
            'memory_usage', 'database_query_time', 'cache_hit_rate'
        ]
        
        self.alert_types = ['SLA_BREACH', 'HIGH_ERROR_RATE', 'SLOW_RESPONSE', 'RESOURCE_USAGE']
    

[docs]
    def generate(self) -> List[Dict[str, Any]]:
        if self.seed is not None:
            random.seed(self.seed)
            self.faker_utils.set_seed(self.seed)
        
        return [self._generate_row() for _ in range(self.rows)]

    
    def _generate_row(self) -> Dict[str, Any]:
        # Basic metric info
        metric_id = f"PERF-2025-{self._metric_counter:08d}"
        self._metric_counter += 1
        
        # Timestamp - performance metrics collected frequently
        timestamp = self.faker_utils.date_between(
            datetime.now() - timedelta(days=7),
            datetime.now()
        )
        timestamp = datetime.combine(
            timestamp,
            datetime.min.time().replace(
                hour=random.randint(0, 23),
                minute=random.randint(0, 59),
                second=random.randint(0, 59)
            )
        )
        
        # Application and endpoint
        application = random.choice(self.applications)
        endpoint = random.choice(self.endpoints[application])
        environment = random.choices(
            self.environments,
            weights=[0.70, 0.20, 0.10]  # production, staging, development
        )[0]
        region = random.choice(self.regions)
        
        # Performance metrics - realistic values based on application type
        if 'service' in application:
            # Microservices typically faster
            response_time_ms = random.randint(50, 2000)
            throughput_rps = random.randint(10, 500)
        elif application == 'web-frontend':
            # Frontend can be slower due to rendering
            response_time_ms = random.randint(200, 5000)
            throughput_rps = random.randint(20, 200)
        else:
            # Admin/reporting applications
            response_time_ms = random.randint(500, 10000)
            throughput_rps = random.randint(1, 50)
        
        # Error rate - lower for production
        if environment == 'production':
            error_rate_percent = round(random.uniform(0.0, 5.0), 2)
        else:
            error_rate_percent = round(random.uniform(0.0, 15.0), 2)
        
        # System resources
        cpu_usage_percent = round(random.uniform(5.0, 90.0), 2)
        memory_usage_percent = round(random.uniform(20.0, 85.0), 2)
        
        # Database performance
        database_query_time_ms = random.randint(10, 1000)
        database_connections = random.randint(5, 100)
        
        # Cache performance
        cache_hit_rate_percent = round(random.uniform(75.0, 99.0), 2)
        
        # Network metrics
        network_latency_ms = random.randint(1, 200)
        
        # Concurrent users
        concurrent_users = random.randint(1, 1000)
        
        # SLA metrics
        sla_target_ms = random.choice([500, 1000, 2000, 5000])
        sla_compliance = response_time_ms <= sla_target_ms
        
        # Availability
        uptime_percent = round(random.uniform(95.0, 100.0), 3)
        
        # Alert conditions
        alert_triggered = (
            response_time_ms > sla_target_ms or
            error_rate_percent > 5.0 or
            cpu_usage_percent > 80.0 or
            memory_usage_percent > 80.0
        )
        
        if alert_triggered:
            if response_time_ms > sla_target_ms:
                alert_type = 'SLOW_RESPONSE'
            elif error_rate_percent > 5.0:
                alert_type = 'HIGH_ERROR_RATE'
            elif cpu_usage_percent > 80.0 or memory_usage_percent > 80.0:
                alert_type = 'RESOURCE_USAGE'
            else:
                alert_type = random.choice(self.alert_types)
        else:
            alert_type = None
        
        # Performance score (0-100)
        performance_score = 100
        if response_time_ms > sla_target_ms:
            performance_score -= 20
        if error_rate_percent > 2.0:
            performance_score -= 15
        if cpu_usage_percent > 70.0:
            performance_score -= 10
        if memory_usage_percent > 70.0:
            performance_score -= 10
        if cache_hit_rate_percent < 85.0:
            performance_score -= 5
        
        performance_score = max(0, performance_score)
        
        return {
            'metric_id': metric_id,
            'timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            'application': application,
            'endpoint': endpoint,
            'environment': environment,
            'region': region,
            'response_time_ms': response_time_ms,
            'throughput_rps': throughput_rps,
            'error_rate_percent': error_rate_percent,
            'cpu_usage_percent': cpu_usage_percent,
            'memory_usage_percent': memory_usage_percent,
            'database_query_time_ms': database_query_time_ms,
            'database_connections': database_connections,
            'cache_hit_rate_percent': cache_hit_rate_percent,
            'network_latency_ms': network_latency_ms,
            'concurrent_users': concurrent_users,
            'sla_target_ms': sla_target_ms,
            'sla_compliance': sla_compliance,
            'uptime_percent': uptime_percent,
            'alert_triggered': alert_triggered,
            'alert_type': alert_type,
            'performance_score': performance_score
        }
    

[docs]
    def get_schema(self) -> Dict[str, str]:
        return {
            'metric_id': 'string', 'timestamp': 'datetime', 'application': 'string',
            'endpoint': 'string', 'environment': 'string', 'region': 'string',
            'response_time_ms': 'integer', 'throughput_rps': 'integer',
            'error_rate_percent': 'float', 'cpu_usage_percent': 'float',
            'memory_usage_percent': 'float', 'database_query_time_ms': 'integer',
            'database_connections': 'integer', 'cache_hit_rate_percent': 'float',
            'network_latency_ms': 'integer', 'concurrent_users': 'integer',
            'sla_target_ms': 'integer', 'sla_compliance': 'boolean',
            'uptime_percent': 'float', 'alert_triggered': 'boolean',
            'alert_type': 'string', 'performance_score': 'integer'
        }