Source code for tempdataset.core.datasets.performance
"""
Performance monitoring dataset generator.
Generates realistic application performance metrics.
"""
import random
from datetime import datetime, timedelta
from typing import List, Dict, Any
from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils
[docs]
class PerformanceDataset(BaseDataset):
"""Performance monitoring dataset generator for application performance tracking."""
def __init__(self, rows: int = 500):
super().__init__(rows)
self.faker_utils = get_faker_utils()
self._init_data_lists()
self._metric_counter = 1
def _init_data_lists(self) -> None:
self.applications = [
'web-frontend', 'user-service', 'order-service', 'payment-service',
'inventory-service', 'notification-service', 'auth-service',
'report-service', 'mobile-api', 'admin-panel', 'analytics-service'
]
self.endpoints = {
'web-frontend': ['/home', '/products', '/checkout', '/profile', '/search'],
'user-service': ['/api/users', '/api/users/{id}', '/api/users/search', '/api/users/profile'],
'order-service': ['/api/orders', '/api/orders/{id}', '/api/orders/status', '/api/orders/history'],
'payment-service': ['/api/payments/process', '/api/payments/{id}', '/api/payments/refund'],
'inventory-service': ['/api/inventory', '/api/inventory/check', '/api/inventory/update'],
'notification-service': ['/api/notifications/send', '/api/notifications/{id}'],
'auth-service': ['/api/auth/login', '/api/auth/logout', '/api/auth/refresh', '/api/auth/validate'],
'report-service': ['/api/reports/sales', '/api/reports/users', '/api/reports/performance'],
'mobile-api': ['/mobile/auth', '/mobile/products', '/mobile/orders', '/mobile/profile'],
'admin-panel': ['/admin/dashboard', '/admin/users', '/admin/orders', '/admin/settings'],
'analytics-service': ['/api/analytics/events', '/api/analytics/reports', '/api/analytics/metrics']
}
self.environments = ['production', 'staging', 'development']
self.regions = ['us-east-1', 'us-west-2', 'eu-west-1', 'ap-southeast-1']
self.performance_metrics = [
'response_time', 'throughput', 'error_rate', 'cpu_usage',
'memory_usage', 'database_query_time', 'cache_hit_rate'
]
self.alert_types = ['SLA_BREACH', 'HIGH_ERROR_RATE', 'SLOW_RESPONSE', 'RESOURCE_USAGE']
[docs]
def generate(self) -> List[Dict[str, Any]]:
if self.seed is not None:
random.seed(self.seed)
self.faker_utils.set_seed(self.seed)
return [self._generate_row() for _ in range(self.rows)]
def _generate_row(self) -> Dict[str, Any]:
# Basic metric info
metric_id = f"PERF-2025-{self._metric_counter:08d}"
self._metric_counter += 1
# Timestamp - performance metrics collected frequently
timestamp = self.faker_utils.date_between(
datetime.now() - timedelta(days=7),
datetime.now()
)
timestamp = datetime.combine(
timestamp,
datetime.min.time().replace(
hour=random.randint(0, 23),
minute=random.randint(0, 59),
second=random.randint(0, 59)
)
)
# Application and endpoint
application = random.choice(self.applications)
endpoint = random.choice(self.endpoints[application])
environment = random.choices(
self.environments,
weights=[0.70, 0.20, 0.10] # production, staging, development
)[0]
region = random.choice(self.regions)
# Performance metrics - realistic values based on application type
if 'service' in application:
# Microservices typically faster
response_time_ms = random.randint(50, 2000)
throughput_rps = random.randint(10, 500)
elif application == 'web-frontend':
# Frontend can be slower due to rendering
response_time_ms = random.randint(200, 5000)
throughput_rps = random.randint(20, 200)
else:
# Admin/reporting applications
response_time_ms = random.randint(500, 10000)
throughput_rps = random.randint(1, 50)
# Error rate - lower for production
if environment == 'production':
error_rate_percent = round(random.uniform(0.0, 5.0), 2)
else:
error_rate_percent = round(random.uniform(0.0, 15.0), 2)
# System resources
cpu_usage_percent = round(random.uniform(5.0, 90.0), 2)
memory_usage_percent = round(random.uniform(20.0, 85.0), 2)
# Database performance
database_query_time_ms = random.randint(10, 1000)
database_connections = random.randint(5, 100)
# Cache performance
cache_hit_rate_percent = round(random.uniform(75.0, 99.0), 2)
# Network metrics
network_latency_ms = random.randint(1, 200)
# Concurrent users
concurrent_users = random.randint(1, 1000)
# SLA metrics
sla_target_ms = random.choice([500, 1000, 2000, 5000])
sla_compliance = response_time_ms <= sla_target_ms
# Availability
uptime_percent = round(random.uniform(95.0, 100.0), 3)
# Alert conditions
alert_triggered = (
response_time_ms > sla_target_ms or
error_rate_percent > 5.0 or
cpu_usage_percent > 80.0 or
memory_usage_percent > 80.0
)
if alert_triggered:
if response_time_ms > sla_target_ms:
alert_type = 'SLOW_RESPONSE'
elif error_rate_percent > 5.0:
alert_type = 'HIGH_ERROR_RATE'
elif cpu_usage_percent > 80.0 or memory_usage_percent > 80.0:
alert_type = 'RESOURCE_USAGE'
else:
alert_type = random.choice(self.alert_types)
else:
alert_type = None
# Performance score (0-100)
performance_score = 100
if response_time_ms > sla_target_ms:
performance_score -= 20
if error_rate_percent > 2.0:
performance_score -= 15
if cpu_usage_percent > 70.0:
performance_score -= 10
if memory_usage_percent > 70.0:
performance_score -= 10
if cache_hit_rate_percent < 85.0:
performance_score -= 5
performance_score = max(0, performance_score)
return {
'metric_id': metric_id,
'timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S'),
'application': application,
'endpoint': endpoint,
'environment': environment,
'region': region,
'response_time_ms': response_time_ms,
'throughput_rps': throughput_rps,
'error_rate_percent': error_rate_percent,
'cpu_usage_percent': cpu_usage_percent,
'memory_usage_percent': memory_usage_percent,
'database_query_time_ms': database_query_time_ms,
'database_connections': database_connections,
'cache_hit_rate_percent': cache_hit_rate_percent,
'network_latency_ms': network_latency_ms,
'concurrent_users': concurrent_users,
'sla_target_ms': sla_target_ms,
'sla_compliance': sla_compliance,
'uptime_percent': uptime_percent,
'alert_triggered': alert_triggered,
'alert_type': alert_type,
'performance_score': performance_score
}
[docs]
def get_schema(self) -> Dict[str, str]:
return {
'metric_id': 'string', 'timestamp': 'datetime', 'application': 'string',
'endpoint': 'string', 'environment': 'string', 'region': 'string',
'response_time_ms': 'integer', 'throughput_rps': 'integer',
'error_rate_percent': 'float', 'cpu_usage_percent': 'float',
'memory_usage_percent': 'float', 'database_query_time_ms': 'integer',
'database_connections': 'integer', 'cache_hit_rate_percent': 'float',
'network_latency_ms': 'integer', 'concurrent_users': 'integer',
'sla_target_ms': 'integer', 'sla_compliance': 'boolean',
'uptime_percent': 'float', 'alert_triggered': 'boolean',
'alert_type': 'string', 'performance_score': 'integer'
}