Source code for tempdataset.core.datasets.investments

"""
Investments dataset generator.

Generates realistic investment portfolio data with performance tracking.
"""

import random
from datetime import datetime, timedelta
from typing import List, Dict, Any

from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils


[docs] class InvestmentsDataset(BaseDataset): """Investment portfolio dataset generator.""" def __init__(self, rows: int = 500): super().__init__(rows) self.faker_utils = get_faker_utils() self._init_data_lists() self._portfolio_counter = 10001 self._customer_counter = 1 self._broker_counter = 1 def _init_data_lists(self) -> None: self.asset_types = ['Stock', 'Bond', 'ETF', 'Mutual Fund', 'REIT', 'Crypto', 'Commodity'] self.sectors = ['Technology', 'Healthcare', 'Finance', 'Energy', 'Consumer', 'Industrial', 'Utilities', 'Real Estate'] self.risk_levels = ['Low', 'Medium', 'High', 'Very High'] self.currencies = ['USD', 'EUR', 'GBP', 'JPY', 'CAD', 'AUD'] self.assets_by_type = { 'Stock': [('AAPL', 'Apple Inc.'), ('MSFT', 'Microsoft'), ('GOOGL', 'Alphabet'), ('AMZN', 'Amazon'), ('TSLA', 'Tesla')], 'Bond': [('TLT', 'Treasury Bond ETF'), ('CORP', 'Corporate Bond'), ('MUNI', 'Municipal Bond'), ('GOVT', 'Government Bond')], 'ETF': [('SPY', 'S&P 500 ETF'), ('QQQ', 'NASDAQ ETF'), ('VTI', 'Total Market ETF'), ('IWM', 'Small Cap ETF')], 'Mutual Fund': [('FXAIX', 'Fidelity 500 Fund'), ('VTSAX', 'Vanguard Total Market'), ('VTSMX', 'Vanguard Total Stock')], 'REIT': [('VNQ', 'Vanguard REIT'), ('SCHH', 'Schwab REIT'), ('IYR', 'iShares REIT')], 'Crypto': [('BTC', 'Bitcoin'), ('ETH', 'Ethereum'), ('ADA', 'Cardano'), ('SOL', 'Solana')], 'Commodity': [('GLD', 'Gold ETF'), ('SLV', 'Silver ETF'), ('USO', 'Oil ETF'), ('DBA', 'Agriculture ETF')] }
[docs] def generate(self) -> List[Dict[str, Any]]: if self.seed is not None: random.seed(self.seed) self.faker_utils.set_seed(self.seed) return [self._generate_row() for _ in range(self.rows)]
def _generate_row(self) -> Dict[str, Any]: asset_type = random.choice(self.asset_types) asset_symbol, asset_name = random.choice(self.assets_by_type[asset_type]) # Purchase details purchase_date = self.faker_utils.date_between( datetime.now() - timedelta(days=1095), datetime.now() - timedelta(days=30) ) quantity = random.uniform(1, 1000) if asset_type in ['Stock', 'ETF'] else random.uniform(0.1, 100) purchase_price = self._generate_price(asset_type) # Current performance current_price = purchase_price * random.uniform(0.7, 1.5) # -30% to +50% change market_value = quantity * current_price gain_loss_amount = market_value - (quantity * purchase_price) gain_loss_percent = (gain_loss_amount / (quantity * purchase_price)) * 100 # Portfolio allocation allocation_percent = random.uniform(1, 25) # 1-25% of portfolio # Risk and dividends risk_level = self._get_risk_level(asset_type) dividend_yield = self._get_dividend_yield(asset_type) sector = random.choice(self.sectors) currency = random.choice(self.currencies) if random.random() < 0.1 else 'USD' # Timestamps last_updated = self.faker_utils.date_between(purchase_date, datetime.now()) # Notes notes = random.choice([ 'Long-term hold', 'Growth investment', 'Dividend focus', 'Speculative play', 'Portfolio diversification', 'Value investment', 'Income generation', None ]) if random.random() < 0.2 else None return { 'portfolio_id': f"PORT-{self._portfolio_counter:05d}", 'customer_id': f"CUST-{self._customer_counter:06d}", 'asset_type': asset_type, 'asset_symbol': asset_symbol, 'asset_name': asset_name, 'quantity': round(quantity, 4), 'purchase_date': purchase_date.strftime('%Y-%m-%d'), 'purchase_price': round(purchase_price, 2), 'current_price': round(current_price, 2), 'market_value': round(market_value, 2), 'gain_loss_amount': round(gain_loss_amount, 2), 'gain_loss_percent': round(gain_loss_percent, 2), 'sector': sector, 'risk_level': risk_level, 'allocation_percent': round(allocation_percent, 2), 'dividend_yield': round(dividend_yield, 2), 'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S'), 'currency': currency, 'broker_id': f"BRK-{self._broker_counter:03d}", 'notes': notes } def _generate_price(self, asset_type: str): price_ranges = { 'Stock': (10, 500), 'Bond': (95, 105), 'ETF': (20, 400), 'Mutual Fund': (10, 100), 'REIT': (15, 150), 'Crypto': (0.1, 50000), 'Commodity': (10, 200) } min_price, max_price = price_ranges.get(asset_type, (10, 100)) return random.uniform(min_price, max_price) def _get_risk_level(self, asset_type: str): risk_map = { 'Stock': random.choice(['Medium', 'High']), 'Bond': 'Low', 'ETF': random.choice(['Low', 'Medium']), 'Mutual Fund': random.choice(['Low', 'Medium']), 'REIT': 'Medium', 'Crypto': 'Very High', 'Commodity': 'High' } return risk_map.get(asset_type, 'Medium') def _get_dividend_yield(self, asset_type: str): if asset_type in ['Stock', 'ETF', 'REIT']: return random.uniform(0, 8) elif asset_type in ['Bond', 'Mutual Fund']: return random.uniform(1, 5) return 0 # Crypto and some others don't pay dividends
[docs] def get_schema(self) -> Dict[str, str]: return { 'portfolio_id': 'string', 'customer_id': 'string', 'asset_type': 'string', 'asset_symbol': 'string', 'asset_name': 'string', 'quantity': 'float', 'purchase_date': 'date', 'purchase_price': 'float', 'current_price': 'float', 'market_value': 'float', 'gain_loss_amount': 'float', 'gain_loss_percent': 'float', 'sector': 'string', 'risk_level': 'string', 'allocation_percent': 'float', 'dividend_yield': 'float', 'last_updated': 'datetime', 'currency': 'string', 'broker_id': 'string', 'notes': 'string' }