Source code for tempdataset.core.datasets.smarthome

"""
Smart Home dataset generator.

Generates realistic smart home IoT device data with home automation events,
device status monitoring, environmental sensors, and security alerts.
"""

import random
import string
from datetime import datetime, timedelta
from typing import List, Dict, Any

from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils


[docs] class SmartHomeDataset(BaseDataset): """ Smart Home dataset generator that creates realistic smart home IoT data. Generates smart home device data including: - Event identification (event_id, timestamp, home_id) - Device information (room, device_type, device_id) - Device status and environmental readings - Motion detection and security monitoring - Home automation triggers and alerts """ def __init__(self, rows: int = 500): """ Initialize the SmartHomeDataset generator. Args: rows: Number of rows to generate (default: 500) """ super().__init__(rows) self.faker_utils = get_faker_utils() # Initialize data for consistent generation self._init_data_lists() # Counter for sequential IDs self._event_counter = 1 def _init_data_lists(self) -> None: """Initialize predefined data lists for realistic generation.""" # Room types in a typical home self.rooms = [ 'Living Room', 'Kitchen', 'Master Bedroom', 'Bedroom 2', 'Bedroom 3', 'Bathroom', 'Master Bathroom', 'Dining Room', 'Office', 'Garage', 'Basement', 'Attic', 'Hallway', 'Entryway', 'Laundry Room' ] # Device types self.device_types = ['Thermostat', 'Camera', 'Light', 'Door Lock', 'Appliance'] # Device status options self.device_statuses = ['On', 'Off', 'Standby', 'Open', 'Closed'] # Security alert types self.alert_types = [ 'Motion Detected', 'Door/Window Opened', 'Unusual Activity', 'System Armed', 'System Disarmed', 'Low Battery', 'Device Offline' ] # Automation triggers self.automation_triggers = [ 'Schedule', 'Motion', 'Temperature', 'Time', 'Geofence', 'Voice Command', 'App Control', 'Sensor', 'Security Event' ] # Specific devices by type self.specific_devices = { 'Thermostat': ['Nest Thermostat', 'Ecobee Smart', 'Honeywell T9', 'Emerson Sensi'], 'Camera': ['Ring Indoor Cam', 'Arlo Pro', 'Wyze Cam', 'Nest Cam'], 'Light': ['Philips Hue', 'LIFX Bulb', 'Smart Switch', 'LED Strip'], 'Door Lock': ['August Smart Lock', 'Yale Assure', 'Schlage Encode', 'Kwikset Halo'], 'Appliance': ['Smart Fridge', 'Smart Oven', 'Robot Vacuum', 'Smart Washer', 'Smart Dryer', 'Smart Dishwasher'] }
[docs] def generate(self) -> List[Dict[str, Any]]: """ Generate smart home dataset rows. Returns: List of dictionaries representing smart home device events """ if self.seed is not None: random.seed(self.seed) self.faker_utils.set_seed(self.seed) data = [] for i in range(self.rows): row = self._generate_row() data.append(row) return data
def _generate_row(self) -> Dict[str, Any]: """Generate a single smart home device event row.""" # Generate timestamp (within last 30 days) end_time = datetime.now() start_time = end_time - timedelta(days=30) date_part = self.faker_utils.date_between(start_time, end_time) # Convert date to datetime with random time timestamp = datetime.combine(date_part, datetime.min.time()) + timedelta( hours=random.randint(0, 23), minutes=random.randint(0, 59), seconds=random.randint(0, 59) ) # Generate home and room home_id = self._generate_home_id() room = random.choice(self.rooms) # Generate device information device_type = random.choice(self.device_types) device_id = self._generate_device_id(device_type) # Generate device status based on type and time status = self._get_device_status(device_type, timestamp) # Generate environmental readings temperature_c, humidity_percent = self._get_environmental_readings(room, timestamp) # Generate energy usage based on device type and status energy_usage_kwh = self._get_energy_usage(device_type, status) # Generate motion detection (higher probability in certain rooms and times) motion_detected = self._get_motion_detection(room, timestamp) # Generate door/window status door_window_open = self._get_door_window_status(room, device_type) # Generate light level light_level_lux = self._get_light_level(room, timestamp) # Generate security alert (5% chance) security_alert_flag = random.random() < 0.05 alert_type = random.choice(self.alert_types) if security_alert_flag else None # Generate automation trigger (30% chance of automated event) automation_trigger = random.choice(self.automation_triggers) if random.random() < 0.30 else None return { 'event_id': self._generate_event_id(), 'timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S'), 'home_id': home_id, 'room': room, 'device_type': device_type, 'device_id': device_id, 'status': status, 'temperature_c': round(temperature_c, 1), 'humidity_percent': round(humidity_percent, 1), 'energy_usage_kwh': round(energy_usage_kwh, 3), 'motion_detected': motion_detected, 'door_window_open': door_window_open, 'light_level_lux': round(light_level_lux, 1), 'security_alert_flag': security_alert_flag, 'alert_type': alert_type, 'automation_trigger': automation_trigger } def _generate_event_id(self) -> str: """ Generate event ID in format "SMH-YYYY-NNNNNN". Returns: Formatted event ID """ year = datetime.now().year event_num = str(self._event_counter).zfill(6) self._event_counter += 1 return f"SMH-{year}-{event_num}" def _generate_home_id(self) -> str: """ Generate home ID in format "HOME-NNNN". Returns: Formatted home ID """ home_num = random.randint(1, 9999) return f"HOME-{home_num:04d}" def _generate_device_id(self, device_type: str) -> str: """ Generate device ID based on device type. Args: device_type: Type of device Returns: Formatted device ID """ type_code = device_type[:3].upper() device_num = random.randint(1, 999) return f"DEV-{type_code}-{device_num:03d}" def _get_device_status(self, device_type: str, timestamp: datetime) -> str: """ Get device status based on type and time. Args: device_type: Type of device timestamp: Current timestamp Returns: Device status """ hour = timestamp.hour if device_type == 'Light': # Lights more likely to be on during evening/night if 18 <= hour <= 23 or 6 <= hour <= 8: return random.choice(['On', 'On', 'On', 'Off']) # 75% on else: return random.choice(['On', 'Off', 'Off', 'Off']) # 25% on elif device_type == 'Thermostat': # Thermostats are usually always on, but may be in different modes return random.choice(['On', 'On', 'On', 'Standby']) elif device_type == 'Camera': # Cameras usually always on for security return random.choice(['On', 'On', 'On', 'Standby']) elif device_type == 'Door Lock': # Doors can be open or closed return random.choice(['Open', 'Closed', 'Closed', 'Closed']) # Usually closed elif device_type == 'Appliance': # Appliances vary by time of day if 6 <= hour <= 9 or 17 <= hour <= 21: # Morning and evening return random.choice(['On', 'On', 'Off', 'Standby']) else: return random.choice(['On', 'Off', 'Off', 'Standby']) return random.choice(self.device_statuses) def _get_environmental_readings(self, room: str, timestamp: datetime) -> tuple: """ Get temperature and humidity readings based on room and time. Args: room: Room name timestamp: Current timestamp Returns: Tuple of (temperature, humidity) """ hour = timestamp.hour month = timestamp.month # Base temperature varies by room if room in ['Kitchen', 'Laundry Room']: base_temp = 24 # Warmer due to appliances elif room in ['Basement', 'Garage']: base_temp = 18 # Cooler elif room in ['Bathroom', 'Master Bathroom']: base_temp = 23 # Slightly warmer else: base_temp = 21 # Standard room temperature # Seasonal adjustment if month in [12, 1, 2]: # Winter base_temp -= 2 elif month in [6, 7, 8]: # Summer base_temp += 2 # Daily variation if 14 <= hour <= 16: # Afternoon peak base_temp += 1 elif 3 <= hour <= 6: # Early morning low base_temp -= 1 temperature_c = base_temp + random.uniform(-2, 2) # Humidity varies by room if room in ['Bathroom', 'Master Bathroom']: humidity_percent = random.uniform(50, 80) # Higher humidity elif room in ['Kitchen', 'Laundry Room']: humidity_percent = random.uniform(45, 70) else: humidity_percent = random.uniform(35, 60) return temperature_c, humidity_percent def _get_energy_usage(self, device_type: str, status: str) -> float: """ Get energy usage based on device type and status. Args: device_type: Type of device status: Current device status Returns: Energy usage in kWh """ if status in ['Off', 'Closed']: return random.uniform(0, 0.001) # Minimal standby power # Energy usage by device type (per hour) energy_ranges = { 'Thermostat': (0.002, 0.005), # Very low, just controls 'Camera': (0.005, 0.015), # Low power devices 'Light': (0.008, 0.060), # LED to incandescent range 'Door Lock': (0.001, 0.003), # Very low power 'Appliance': (0.100, 2.000) # Wide range depending on appliance } min_usage, max_usage = energy_ranges.get(device_type, (0.005, 0.050)) if status == 'Standby': # Standby uses less power return random.uniform(min_usage * 0.1, min_usage * 0.3) else: return random.uniform(min_usage, max_usage) def _get_motion_detection(self, room: str, timestamp: datetime) -> bool: """ Get motion detection based on room and time. Args: room: Room name timestamp: Current timestamp Returns: True if motion detected """ hour = timestamp.hour # Higher probability of motion during active hours if 6 <= hour <= 23: base_probability = 0.3 else: base_probability = 0.05 # Low probability at night # Adjust by room type if room in ['Living Room', 'Kitchen']: base_probability *= 1.5 # More active rooms elif room in ['Bedroom 2', 'Bedroom 3', 'Office']: base_probability *= 0.7 # Less active rooms elif room in ['Garage', 'Basement', 'Attic']: base_probability *= 0.3 # Rarely used rooms return random.random() < base_probability def _get_door_window_status(self, room: str, device_type: str) -> bool: """ Get door/window open status. Args: room: Room name device_type: Type of device Returns: True if door/window is open """ if device_type == 'Door Lock': # Doors are usually closed return random.random() < 0.1 # For other devices, simulate window sensors if room in ['Living Room', 'Bedroom 2', 'Bedroom 3', 'Master Bedroom']: # Rooms with windows return random.random() < 0.15 else: # Rooms without windows or rarely opened return random.random() < 0.05 def _get_light_level(self, room: str, timestamp: datetime) -> float: """ Get light level based on room and time. Args: room: Room name timestamp: Current timestamp Returns: Light level in lux """ hour = timestamp.hour # Base light level by time of day if 6 <= hour <= 8: # Dawn base_lux = 100 elif 9 <= hour <= 17: # Daytime base_lux = 500 elif 18 <= hour <= 20: # Dusk base_lux = 200 else: # Night base_lux = 10 # Adjust by room (some rooms have less natural light) if room in ['Basement', 'Garage', 'Bathroom']: base_lux *= 0.3 # Less natural light elif room in ['Living Room', 'Kitchen']: base_lux *= 1.2 # More windows/artificial light return base_lux + random.uniform(-base_lux * 0.3, base_lux * 0.5)
[docs] def get_schema(self) -> Dict[str, str]: """ Return column schema with types. Returns: Dictionary mapping column names to their data types """ return { 'event_id': 'string', 'timestamp': 'datetime', 'home_id': 'string', 'room': 'string', 'device_type': 'string', 'device_id': 'string', 'status': 'string', 'temperature_c': 'float', 'humidity_percent': 'float', 'energy_usage_kwh': 'float', 'motion_detected': 'boolean', 'door_window_open': 'boolean', 'light_level_lux': 'float', 'security_alert_flag': 'boolean', 'alert_type': 'string', 'automation_trigger': 'string' }