Source code for tempdataset.core.datasets.payments

"""
Payments dataset generator.

Generates realistic digital payment transactions through multiple gateways.
"""

import random
from datetime import datetime, timedelta
from typing import List, Dict, Any

from .base import BaseDataset
from ..utils.faker_utils import get_faker_utils


[docs] class PaymentsDataset(BaseDataset): """Payments dataset generator for digital payment transactions.""" def __init__(self, rows: int = 500): super().__init__(rows) self.faker_utils = get_faker_utils() self._init_data_lists() self._transaction_counter = 100001 def _init_data_lists(self) -> None: self.payment_methods = [ 'Credit Card', 'Debit Card', 'PayPal', 'Apple Pay', 'Google Pay', 'Bank Transfer', 'Cryptocurrency', 'Stripe', 'Square', 'Venmo', 'Zelle' ] self.payment_gateways = [ 'Stripe', 'PayPal', 'Square', 'Braintree', 'Authorize.Net', 'Worldpay', 'Adyen', 'Klarna', 'Razorpay', 'Mollie' ] self.currencies = ['USD', 'EUR', 'GBP', 'CAD', 'AUD', 'JPY', 'CHF', 'SEK', 'NOK', 'DKK'] self.transaction_types = [ 'Purchase', 'Refund', 'Recurring', 'Subscription', 'Transfer', 'Withdrawal', 'Deposit', 'Fee', 'Chargeback', 'Authorization' ] self.transaction_statuses = ['Pending', 'Completed', 'Failed', 'Cancelled', 'Refunded'] self.countries = [ 'United States', 'United Kingdom', 'Germany', 'France', 'Canada', 'Australia', 'Netherlands', 'Sweden', 'Norway', 'Denmark' ] self.merchants = [ 'Amazon', 'eBay', 'Shopify Store', 'WooCommerce', 'Etsy', 'Best Buy', 'Target', 'Walmart', 'Nike', 'Apple Store' ] self.failure_reasons = [ 'Insufficient funds', 'Card expired', 'Invalid CVV', 'Card declined', 'Network error', 'Fraud detected', 'Limit exceeded', 'Invalid account' ]
[docs] def generate(self) -> List[Dict[str, Any]]: if self.seed is not None: random.seed(self.seed) self.faker_utils.set_seed(self.seed) return [self._generate_row() for _ in range(self.rows)]
def _generate_row(self) -> Dict[str, Any]: # Basic transaction info transaction_id = f"TXN-{self._transaction_counter:08d}" self._transaction_counter += 1 # Dates date_part = self.faker_utils.date_between( datetime.now() - timedelta(days=90), datetime.now() ) # Add random time component created_at = datetime.combine( date_part, datetime.min.time().replace( hour=random.randint(0, 23), minute=random.randint(0, 59), second=random.randint(0, 59) ) ) # Payment details payment_method = random.choice(self.payment_methods) payment_gateway = random.choice(self.payment_gateways) transaction_type = random.choice(self.transaction_types) # Amount generation based on transaction type if transaction_type == 'Fee': amount = random.uniform(0.30, 25.00) elif transaction_type in ['Transfer', 'Withdrawal', 'Deposit']: amount = random.uniform(50, 5000) elif transaction_type == 'Subscription': amount = random.choice([9.99, 19.99, 29.99, 49.99, 99.99]) else: # Purchase, Refund, etc. amount = random.uniform(5, 2000) # Currency (affects amount slightly for non-USD) currency = random.choice(self.currencies) if currency == 'JPY': amount *= 110 # Approximate USD to JPY conversion elif currency == 'EUR': amount *= 0.85 elif currency == 'GBP': amount *= 0.75 # Status and processing status = random.choice(self.transaction_statuses) # Processing times if status == 'Completed': processing_time_seconds = random.uniform(1.2, 45.0) settled_at = created_at + timedelta(seconds=processing_time_seconds) elif status == 'Pending': processing_time_seconds = None settled_at = None else: processing_time_seconds = random.uniform(0.5, 15.0) settled_at = None # Fee calculation (gateway takes a cut) if status == 'Completed' and transaction_type not in ['Refund', 'Chargeback']: fee_percentage = random.uniform(0.025, 0.035) # 2.5% - 3.5% fixed_fee = random.uniform(0.15, 0.30) transaction_fee = amount * fee_percentage + fixed_fee else: transaction_fee = 0.00 # Customer info customer_id = f"CUST-{random.randint(10000, 99999)}" customer_email = self.faker_utils.email() # Merchant info merchant_name = random.choice(self.merchants) merchant_category = random.choice([ 'E-commerce', 'Retail', 'Software', 'Digital Services', 'Food & Beverage', 'Travel', 'Entertainment', 'Finance', 'Healthcare', 'Education' ]) # Geographic data billing_country = random.choice(self.countries) ip_address = f"{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}" # Risk assessment risk_score = random.uniform(0, 100) fraud_detected = risk_score > 85 or status == 'Failed' and random.random() < 0.3 # Failure handling failure_reason = None if status == 'Failed': failure_reason = random.choice(self.failure_reasons) # Card/account info (masked for security) if payment_method in ['Credit Card', 'Debit Card']: card_last_four = f"****-{random.randint(1000, 9999)}" card_brand = random.choice(['Visa', 'Mastercard', 'Amex', 'Discover']) account_id = None elif payment_method in ['PayPal', 'Stripe', 'Square']: card_last_four = None card_brand = None account_id = f"{payment_method.upper()}-{random.randint(100000, 999999)}" else: card_last_four = None card_brand = None account_id = f"ACC-{random.randint(1000000, 9999999)}" # Reference numbers gateway_reference = f"{payment_gateway.upper()}-{random.randint(1000000, 9999999)}" merchant_reference = f"ORD-{random.randint(100000, 999999)}" # Recurring payment info is_recurring = transaction_type in ['Recurring', 'Subscription'] recurring_frequency = None if is_recurring: recurring_frequency = random.choice(['Monthly', 'Quarterly', 'Yearly', 'Weekly']) # Metadata user_agent = random.choice([ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)', 'Mozilla/5.0 (Android 11; Mobile; rv:89.0) Gecko/89.0' ]) if random.random() < 0.8 else None return { 'transaction_id': transaction_id, 'payment_method': payment_method, 'payment_gateway': payment_gateway, 'transaction_type': transaction_type, 'amount': round(amount, 2), 'currency': currency, 'transaction_fee': round(transaction_fee, 2), 'status': status, 'created_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), 'settled_at': settled_at.strftime('%Y-%m-%d %H:%M:%S') if settled_at else None, 'processing_time_seconds': round(processing_time_seconds, 2) if processing_time_seconds else None, 'customer_id': customer_id, 'customer_email': customer_email, 'merchant_name': merchant_name, 'merchant_category': merchant_category, 'card_last_four': card_last_four, 'card_brand': card_brand, 'account_id': account_id, 'billing_country': billing_country, 'ip_address': ip_address, 'gateway_reference': gateway_reference, 'merchant_reference': merchant_reference, 'risk_score': round(risk_score, 2), 'fraud_detected': fraud_detected, 'failure_reason': failure_reason, 'is_recurring': is_recurring, 'recurring_frequency': recurring_frequency, 'user_agent': user_agent[:50] + '...' if user_agent and len(user_agent) > 50 else user_agent }
[docs] def get_schema(self) -> Dict[str, str]: return { 'transaction_id': 'string', 'payment_method': 'string', 'payment_gateway': 'string', 'transaction_type': 'string', 'amount': 'float', 'currency': 'string', 'transaction_fee': 'float', 'status': 'string', 'created_at': 'datetime', 'settled_at': 'datetime', 'processing_time_seconds': 'float', 'customer_id': 'string', 'customer_email': 'string', 'merchant_name': 'string', 'merchant_category': 'string', 'card_last_four': 'string', 'card_brand': 'string', 'account_id': 'string', 'billing_country': 'string', 'ip_address': 'string', 'gateway_reference': 'string', 'merchant_reference': 'string', 'risk_score': 'float', 'fraud_detected': 'boolean', 'failure_reason': 'string', 'is_recurring': 'boolean', 'recurring_frequency': 'string', 'user_agent': 'string' }