Source code for tempdataset.core.datasets.cryptocurrency

"""
Cryptocurrency dataset generator.

Generates realistic cryptocurrency trading data with extreme volatility modeling.
"""

import random
import string
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional

from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils


[docs] class CryptocurrencyDataset(BaseDataset): """ Cryptocurrency dataset generator that creates realistic crypto trading data. Generates 20 columns of cryptocurrency data including: - Basic crypto info (symbol, name, blockchain) - OHLCV data with extreme volatility - Market metrics (market cap, supply, volume) - Blockchain statistics (hash rate, difficulty, fees) """ def __init__(self, rows: int = 500): """ Initialize the CryptocurrencyDataset generator. Args: rows: Number of rows to generate (default: 500) """ super().__init__(rows) self.faker_utils = get_faker_utils() # Initialize data for consistent generation self._init_data_lists() def _init_data_lists(self) -> None: """Initialize predefined data lists for realistic generation.""" # Major cryptocurrencies with their data self.cryptocurrencies = { 'BTC': { 'name': 'Bitcoin', 'blockchain': 'Bitcoin', 'price_range': (20000, 70000), 'market_cap_range': (400_000_000_000, 1_400_000_000_000), 'max_supply': 21_000_000, 'has_hash_rate': True }, 'ETH': { 'name': 'Ethereum', 'blockchain': 'Ethereum', 'price_range': (1200, 4800), 'market_cap_range': (150_000_000_000, 580_000_000_000), 'max_supply': None, 'has_hash_rate': False }, 'BNB': { 'name': 'BNB', 'blockchain': 'Binance Smart Chain', 'price_range': (200, 600), 'market_cap_range': (30_000_000_000, 90_000_000_000), 'max_supply': 200_000_000, 'has_hash_rate': False }, 'ADA': { 'name': 'Cardano', 'blockchain': 'Cardano', 'price_range': (0.3, 3.0), 'market_cap_range': (10_000_000_000, 100_000_000_000), 'max_supply': 45_000_000_000, 'has_hash_rate': False }, 'XRP': { 'name': 'XRP', 'blockchain': 'XRP Ledger', 'price_range': (0.3, 2.0), 'market_cap_range': (15_000_000_000, 100_000_000_000), 'max_supply': 100_000_000_000, 'has_hash_rate': False }, 'SOL': { 'name': 'Solana', 'blockchain': 'Solana', 'price_range': (8, 260), 'market_cap_range': (3_000_000_000, 120_000_000_000), 'max_supply': None, 'has_hash_rate': False }, 'DOT': { 'name': 'Polkadot', 'blockchain': 'Polkadot', 'price_range': (4, 55), 'market_cap_range': (5_000_000_000, 55_000_000_000), 'max_supply': None, 'has_hash_rate': False }, 'DOGE': { 'name': 'Dogecoin', 'blockchain': 'Dogecoin', 'price_range': (0.05, 0.7), 'market_cap_range': (7_000_000_000, 100_000_000_000), 'max_supply': None, 'has_hash_rate': True }, 'AVAX': { 'name': 'Avalanche', 'blockchain': 'Avalanche', 'price_range': (9, 146), 'market_cap_range': (3_000_000_000, 50_000_000_000), 'max_supply': 720_000_000, 'has_hash_rate': False }, 'MATIC': { 'name': 'Polygon', 'blockchain': 'Polygon', 'price_range': (0.3, 2.9), 'market_cap_range': (3_000_000_000, 25_000_000_000), 'max_supply': 10_000_000_000, 'has_hash_rate': False }, 'LINK': { 'name': 'Chainlink', 'blockchain': 'Ethereum', 'price_range': (5, 52), 'market_cap_range': (2_500_000_000, 25_000_000_000), 'max_supply': 1_000_000_000, 'has_hash_rate': False }, 'UNI': { 'name': 'Uniswap', 'blockchain': 'Ethereum', 'price_range': (3, 45), 'market_cap_range': (2_000_000_000, 35_000_000_000), 'max_supply': 1_000_000_000, 'has_hash_rate': False } } # Trading pairs self.trading_pairs = [ 'BTC/USDT', 'ETH/USDT', 'BNB/USDT', 'ADA/USDT', 'XRP/USDT', 'SOL/USDT', 'ETH/BTC', 'ADA/BTC', 'XRP/BTC', 'DOT/BTC', 'LINK/BTC', 'UNI/BTC', 'BTC/USD', 'ETH/USD', 'ADA/USD', 'SOL/USD' ] # Cryptocurrency exchanges self.exchanges = [ 'Binance', 'Coinbase Pro', 'Kraken', 'Huobi', 'KuCoin', 'Bitfinex', 'Gate.io', 'OKX', 'Bybit', 'FTX', 'Gemini', 'Bitstamp' ]
[docs] def generate(self) -> List[Dict[str, Any]]: """ Generate cryptocurrency dataset rows. Returns: List of dictionaries representing cryptocurrency data rows """ if self.seed is not None: random.seed(self.seed) self.faker_utils.set_seed(self.seed) data = [] for i in range(self.rows): row = self._generate_row() data.append(row) return data
def _generate_row(self) -> Dict[str, Any]: """Generate a single cryptocurrency data row.""" # Select random cryptocurrency symbol = random.choice(list(self.cryptocurrencies.keys())) crypto_data = self.cryptocurrencies[symbol] # Generate timestamp (within last 30 days for more recent data) end_date = datetime.now() start_date = end_date - timedelta(days=30) date_time = self.faker_utils.date_between(start_date, end_date) # Generate OHLCV data with extreme volatility typical of crypto min_price, max_price = crypto_data['price_range'] base_price = random.uniform(min_price, max_price) # Crypto has much higher volatility than stocks open_price = base_price * random.uniform(0.85, 1.15) # High and low with extreme movements high_price = open_price * random.uniform(1.00, 1.25) low_price = open_price * random.uniform(0.75, 1.00) # Close price anywhere between low and high close_price = random.uniform(low_price, high_price) # Volume - crypto has highly variable volume volume = self._generate_crypto_volume(symbol) # Market cap and supply min_market_cap, max_market_cap = crypto_data['market_cap_range'] market_cap = random.uniform(min_market_cap, max_market_cap) # Circulating supply based on market cap and price circulating_supply = market_cap / close_price # Max supply max_supply = crypto_data['max_supply'] # Change percentages (crypto has extreme volatility) change_24h = ((close_price - open_price) / open_price) * 100 change_7d = change_24h + random.uniform(-50, 50) # 7-day change can be extreme # Exchange and trading pair exchange = random.choice(self.exchanges) trading_pair = random.choice(self.trading_pairs) # Ensure trading pair matches symbol if symbol not in trading_pair: trading_pair = f"{symbol}/USDT" # Blockchain metrics transaction_count = random.randint(100_000, 2_000_000) # Hash rate and difficulty (only for PoW coins) hash_rate = None difficulty = None if crypto_data['has_hash_rate']: if symbol == 'BTC': hash_rate = random.uniform(200, 400) # EH/s difficulty = random.uniform(25_000_000_000_000, 50_000_000_000_000) else: # Other PoW coins hash_rate = random.uniform(50, 200) difficulty = random.uniform(1_000_000, 10_000_000) # Network fees network_fees = self._generate_network_fees(symbol) return { 'symbol': symbol, 'name': crypto_data['name'], 'date_time': date_time.strftime('%Y-%m-%d %H:%M:%S'), 'open_price': round(open_price, 6), 'high_price': round(high_price, 6), 'low_price': round(low_price, 6), 'close_price': round(close_price, 6), 'volume': round(volume, 2), 'market_cap': round(market_cap, 2), 'circulating_supply': round(circulating_supply, 2), 'max_supply': max_supply, '24h_change_percent': round(change_24h, 2), '7d_change_percent': round(change_7d, 2), 'exchange': exchange, 'trading_pair': trading_pair, 'blockchain': crypto_data['blockchain'], 'transaction_count': transaction_count, 'hash_rate': round(hash_rate, 2) if hash_rate else None, 'difficulty': round(difficulty, 0) if difficulty else None, 'network_fees': round(network_fees, 6) } def _generate_crypto_volume(self, symbol: str) -> float: """ Generate realistic trading volume for cryptocurrency. Args: symbol: Cryptocurrency symbol Returns: Trading volume """ volume_ranges = { 'BTC': (500_000_000, 50_000_000_000), 'ETH': (200_000_000, 20_000_000_000), 'BNB': (100_000_000, 5_000_000_000), 'ADA': (50_000_000, 2_000_000_000), 'XRP': (100_000_000, 5_000_000_000), 'SOL': (200_000_000, 8_000_000_000), 'DOT': (50_000_000, 1_000_000_000), 'DOGE': (500_000_000, 10_000_000_000), 'AVAX': (100_000_000, 2_000_000_000), 'MATIC': (50_000_000, 1_500_000_000), 'LINK': (100_000_000, 2_000_000_000), 'UNI': (50_000_000, 1_000_000_000) } min_vol, max_vol = volume_ranges.get(symbol, (10_000_000, 500_000_000)) # Add extreme volatility to volume volume = random.uniform(min_vol, max_vol) * random.uniform(0.1, 10) return volume def _generate_network_fees(self, symbol: str) -> float: """ Generate realistic network fees for cryptocurrency. Args: symbol: Cryptocurrency symbol Returns: Network fee amount """ fee_ranges = { 'BTC': (0.00001, 0.001), 'ETH': (0.001, 0.1), 'BNB': (0.0001, 0.01), 'ADA': (0.1, 2.0), 'XRP': (0.00001, 0.01), 'SOL': (0.000005, 0.01), 'DOT': (0.01, 1.0), 'DOGE': (0.01, 10.0), 'AVAX': (0.001, 0.1), 'MATIC': (0.0001, 0.1), 'LINK': (0.001, 0.1), 'UNI': (0.001, 0.1) } min_fee, max_fee = fee_ranges.get(symbol, (0.001, 0.1)) return random.uniform(min_fee, max_fee)
[docs] def get_schema(self) -> Dict[str, str]: """ Return column schema with types. Returns: Dictionary mapping column names to their data types """ return { 'symbol': 'string', 'name': 'string', 'date_time': 'datetime', 'open_price': 'float', 'high_price': 'float', 'low_price': 'float', 'close_price': 'float', 'volume': 'float', 'market_cap': 'float', 'circulating_supply': 'float', 'max_supply': 'float', '24h_change_percent': 'float', '7d_change_percent': 'float', 'exchange': 'string', 'trading_pair': 'string', 'blockchain': 'string', 'transaction_count': 'integer', 'hash_rate': 'float', 'difficulty': 'float', 'network_fees': 'float' }