Adilbai
/

stock-trading-rl-agent

+import yfinance as yf
+import pandas as pd
+import numpy as np
+from typing import List, Dict, Optional, Tuple
+import os
+import logging
+from datetime import datetime, timedelta
+import pickle
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+import warnings
+warnings.filterwarnings('ignore')
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class StockDataProcessor:
+    """
+    A comprehensive class for downloading, processing, and preprocessing stock data
+    from Yahoo Finance for reinforcement learning applications.
+    """
+    def __init__(self, data_dir: str = "stock_data", cache_dir: str = "cache"):
+        self.data_dir = data_dir
+        self.cache_dir = cache_dir
+        self.scalers = {}
+        # Create directories if they don't exist
+        os.makedirs(data_dir, exist_ok=True)
+        os.makedirs(cache_dir, exist_ok=True)
+    def get_sp500_tickers(self) -> List[str]:
+        """Get S&P 500 stock tickers"""
+        try:
+            # Download S&P 500 list from Wikipedia
+            url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
+            tables = pd.read_html(url)
+            sp500_table = tables[0]
+            tickers = sp500_table['Symbol'].tolist()
+            # Clean tickers (remove dots, etc.)
+            tickers = [ticker.replace('.', '-') for ticker in tickers]
+            logger.info(f"Retrieved {len(tickers)} S&P 500 tickers")
+            return tickers
+        except Exception as e:
+            logger.error(f"Error fetching S&P 500 tickers: {e}")
+            # Fallback to a smaller list of popular stocks
+            return ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'JPM', 'JNJ', 'V']
+    def download_stock_data(self,
+                          ticker: str,
+                          period: str = "10y",
+                          interval: str = "1d") -> Optional[pd.DataFrame]:
+        """
+        Download stock data for a single ticker
+        Args:
+            ticker: Stock symbol
+            period: Time period (1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max)
+            interval: Data interval (1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo)
+        """
+        try:
+            stock = yf.Ticker(ticker)
+            data = stock.history(period=period, interval=interval)
+            if data.empty:
+                logger.warning(f"No data found for {ticker}")
+                return None
+            # Add ticker column
+            data['Ticker'] = ticker
+            data.reset_index(inplace=True)
+            logger.info(f"Downloaded {len(data)} records for {ticker}")
+            return data
+        except Exception as e:
+            logger.error(f"Error downloading data for {ticker}: {e}")
+            return None
+    def download_multiple_stocks(self,
+                                tickers: List[str],
+                                period: str = "10y",
+                                interval: str = "1d",
+                                max_workers: int = 10) -> pd.DataFrame:
+        """
+        Download stock data for multiple tickers using parallel processing
+        """
+        all_data = []
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all download tasks
+            future_to_ticker = {
+                executor.submit(self.download_stock_data, ticker, period, interval): ticker
+                for ticker in tickers
+            }
+            # Collect results
+            for future in as_completed(future_to_ticker):
+                ticker = future_to_ticker[future]
+                try:
+                    data = future.result()
+                    if data is not None:
+                        all_data.append(data)
+                except Exception as e:
+                    logger.error(f"Error processing {ticker}: {e}")
+                # Rate limiting
+                time.sleep(0.1)
+        if all_data:
+            combined_data = pd.concat(all_data, ignore_index=True)
+            logger.info(f"Combined data shape: {combined_data.shape}")
+            return combined_data
+        else:
+            return pd.DataFrame()
+    def calculate_technical_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Calculate technical indicators for each stock
+        """
+        logger.info("Calculating technical indicators...")
+        result_dfs = []
+        for ticker in df['Ticker'].unique():
+            ticker_data = df[df['Ticker'] == ticker].copy()
+            ticker_data = ticker_data.sort_values('Date')
+            # Moving averages
+            ticker_data['SMA_5'] = ticker_data['Close'].rolling(window=5).mean()
+            ticker_data['SMA_10'] = ticker_data['Close'].rolling(window=10).mean()
+            ticker_data['SMA_20'] = ticker_data['Close'].rolling(window=20).mean()
+            ticker_data['SMA_50'] = ticker_data['Close'].rolling(window=50).mean()
+            # Exponential moving averages
+            ticker_data['EMA_12'] = ticker_data['Close'].ewm(span=12).mean()
+            ticker_data['EMA_26'] = ticker_data['Close'].ewm(span=26).mean()
+            # MACD
+            ticker_data['MACD'] = ticker_data['EMA_12'] - ticker_data['EMA_26']
+            ticker_data['MACD_Signal'] = ticker_data['MACD'].ewm(span=9).mean()
+            ticker_data['MACD_Histogram'] = ticker_data['MACD'] - ticker_data['MACD_Signal']
+            # RSI
+            delta = ticker_data['Close'].diff()
+            gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
+            loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
+            rs = gain / loss
+            ticker_data['RSI'] = 100 - (100 / (1 + rs))
+            # Bollinger Bands
+            ticker_data['BB_Middle'] = ticker_data['Close'].rolling(window=20).mean()
+            bb_std = ticker_data['Close'].rolling(window=20).std()
+            ticker_data['BB_Upper'] = ticker_data['BB_Middle'] + (bb_std * 2)
+            ticker_data['BB_Lower'] = ticker_data['BB_Middle'] - (bb_std * 2)
+            ticker_data['BB_Width'] = ticker_data['BB_Upper'] - ticker_data['BB_Lower']
+            ticker_data['BB_Position'] = (ticker_data['Close'] - ticker_data['BB_Lower']) / ticker_data['BB_Width']
+            # Volatility
+            ticker_data['Volatility'] = ticker_data['Close'].rolling(window=20).std()
+            # Price change features
+            ticker_data['Price_Change'] = ticker_data['Close'].pct_change()
+            ticker_data['Price_Change_5d'] = ticker_data['Close'].pct_change(periods=5)
+            ticker_data['High_Low_Ratio'] = ticker_data['High'] / ticker_data['Low']
+            ticker_data['Open_Close_Ratio'] = ticker_data['Open'] / ticker_data['Close']
+            # Volume features
+            ticker_data['Volume_SMA'] = ticker_data['Volume'].rolling(window=20).mean()
+            ticker_data['Volume_Ratio'] = ticker_data['Volume'] / ticker_data['Volume_SMA']
+            result_dfs.append(ticker_data)
+        result = pd.concat(result_dfs, ignore_index=True)
+        logger.info(f"Technical indicators calculated. New shape: {result.shape}")
+        return result
+    def create_lagged_features(self, df: pd.DataFrame, lags: List[int] = [1, 2, 3, 5, 10]) -> pd.DataFrame:
+        """
+        Create lagged features for time series analysis
+        """
+        logger.info("Creating lagged features...")
+        result_dfs = []
+        feature_columns = ['Close', 'Volume', 'Price_Change', 'RSI', 'MACD', 'Volatility']
+        for ticker in df['Ticker'].unique():
+            ticker_data = df[df['Ticker'] == ticker].copy()
+            ticker_data = ticker_data.sort_values('Date')
+            for col in feature_columns:
+                if col in ticker_data.columns:
+                    for lag in lags:
+                        ticker_data[f'{col}_lag_{lag}'] = ticker_data[col].shift(lag)
+            result_dfs.append(ticker_data)
+        result = pd.concat(result_dfs, ignore_index=True)
+        logger.info(f"Lagged features created. New shape: {result.shape}")
+        return result
+    def create_future_returns(self, df: pd.DataFrame, horizons: List[int] = [1, 5, 10, 20]) -> pd.DataFrame:
+        """
+        Create future return targets for prediction
+        """
+        logger.info("Creating future return targets...")
+        result_dfs = []
+        for ticker in df['Ticker'].unique():
+            ticker_data = df[df['Ticker'] == ticker].copy()
+            ticker_data = ticker_data.sort_values('Date')
+            for horizon in horizons:
+                ticker_data[f'Future_Return_{horizon}d'] = ticker_data['Close'].shift(-horizon) / ticker_data['Close'] - 1
+                # Create binary classification targets
+                ticker_data[f'Future_Up_{horizon}d'] = (ticker_data[f'Future_Return_{horizon}d'] > 0).astype(int)
+                # Create categorical targets (strong down, down, up, strong up)
+                returns = ticker_data[f'Future_Return_{horizon}d']
+                ticker_data[f'Future_Category_{horizon}d'] = pd.cut(
+                    returns,
+                    bins=[-np.inf, -0.02, 0, 0.02, np.inf],
+                    labels=[0, 1, 2, 3]
+                ).astype(float)
+            result_dfs.append(ticker_data)
+        result = pd.concat(result_dfs, ignore_index=True)
+        logger.info(f"Future return targets created. New shape: {result.shape}")
+        return result
+    def clean_and_normalize_data(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Clean and normalize the data for ML/RL
+        """
+        logger.info("Cleaning and normalizing data...")
+        # Remove rows with too many NaN values
+        df = df.dropna(thresh=len(df.columns) * 0.7)
+        # Forward fill remaining NaN values
+        numeric_columns = df.select_dtypes(include=[np.number]).columns
+        df[numeric_columns] = df[numeric_columns].fillna(method='ffill')
+        # Remove infinite values
+        df = df.replace([np.inf, -np.inf], np.nan)
+        df = df.dropna()
+        logger.info(f"Data cleaned. Final shape: {df.shape}")
+        return df
+    def create_rl_states_actions(self, df: pd.DataFrame) -> Dict:
+        """
+        Create state and action spaces suitable for reinforcement learning
+        """
+        logger.info("Creating RL state and action representations...")
+        # Define state features (technical indicators and market data)
+        state_features = [
+            'Open', 'High', 'Low', 'Close', 'Volume',
+            'SMA_5', 'SMA_10', 'SMA_20', 'SMA_50',
+            'EMA_12', 'EMA_26', 'MACD', 'MACD_Signal', 'RSI',
+            'BB_Position', 'BB_Width', 'Volatility',
+            'Price_Change', 'High_Low_Ratio', 'Volume_Ratio'
+        ]
+        # Add lagged features to state
+        lag_features = [col for col in df.columns if '_lag_' in col]
+        state_features.extend(lag_features)
+        # Filter existing features
+        state_features = [feat for feat in state_features if feat in df.columns]
+        # Normalize state features
+        scaler = StandardScaler()
+        df_scaled = df.copy()
+        df_scaled[state_features] = scaler.fit_transform(df[state_features])
+        # Define action space (0: Hold, 1: Buy, 2: Sell)
+        # You can expand this based on your RL strategy
+        # Create sequences for each stock
+        rl_data = {}
+        sequence_length = 60  # Number of days to look back
+        for ticker in df_scaled['Ticker'].unique():
+            ticker_data = df_scaled[df_scaled['Ticker'] == ticker].sort_values('Date')
+            states = []
+            rewards = []
+            dates = []
+            for i in range(sequence_length, len(ticker_data)):
+                # State: sequence of technical indicators
+                state_sequence = ticker_data.iloc[i-sequence_length:i][state_features].values
+                states.append(state_sequence)
+                # Reward: next day return (can be modified based on your RL objective)
+                if 'Future_Return_1d' in ticker_data.columns:
+                    reward = ticker_data.iloc[i]['Future_Return_1d']
+                else:
+                    current_price = ticker_data.iloc[i]['Close']
+                    if i < len(ticker_data) - 1:
+                        next_price = ticker_data.iloc[i+1]['Close']
+                        reward = (next_price - current_price) / current_price
+                    else:
+                        reward = 0
+                rewards.append(reward)
+                dates.append(ticker_data.iloc[i]['Date'])
+            rl_data[ticker] = {
+                'states': np.array(states),
+                'rewards': np.array(rewards),
+                'dates': dates,
+                'state_features': state_features
+            }
+        logger.info(f"RL data created for {len(rl_data)} stocks")
+        return rl_data, scaler
+    def save_processed_data(self, data: pd.DataFrame, rl_data: Dict, scaler, filename_prefix: str = "processed_stock_data"):
+        """
+        Save processed data to files
+        """
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        # Save CSV data
+        csv_filename = f"{self.data_dir}/{filename_prefix}_{timestamp}.csv"
+        data.to_csv(csv_filename, index=False)
+        logger.info(f"CSV data saved to {csv_filename}")
+        # Save RL data
+        rl_filename = f"{self.data_dir}/{filename_prefix}_rl_{timestamp}.pkl"
+        with open(rl_filename, 'wb') as f:
+            pickle.dump(rl_data, f)
+        logger.info(f"RL data saved to {rl_filename}")
+        # Save scaler
+        scaler_filename = f"{self.data_dir}/{filename_prefix}_scaler_{timestamp}.pkl"
+        with open(scaler_filename, 'wb') as f:
+            pickle.dump(scaler, f)
+        logger.info(f"Scaler saved to {scaler_filename}")
+        return csv_filename, rl_filename, scaler_filename
+    def process_stocks_pipeline(self,
+                               tickers: Optional[List[str]] = None,
+                               period: str = "10y",
+                               interval: str = "1d",
+                               use_sp500: bool = True) -> Tuple[pd.DataFrame, Dict, object]:
+        """
+        Complete pipeline for processing stock data
+        """
+        logger.info("Starting stock data processing pipeline...")
+        # Get tickers
+        if tickers is None:
+            if use_sp500:
+                tickers = self.get_sp500_tickers()
+            else:
+                tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']  # Default list
+        # Download data
+        logger.info(f"Downloading data for {len(tickers)} tickers...")
+        raw_data = self.download_multiple_stocks(tickers, period, interval)
+        if raw_data.empty:
+            logger.error("No data downloaded. Exiting.")
+            return None, None, None
+        # Process data
+        data_with_indicators = self.calculate_technical_indicators(raw_data)
+        data_with_lags = self.create_lagged_features(data_with_indicators)
+        data_with_targets = self.create_future_returns(data_with_lags)
+        cleaned_data = self.clean_and_normalize_data(data_with_targets)
+        # Create RL data
+        rl_data, scaler = self.create_rl_states_actions(cleaned_data)
+        # Save data
+        self.save_processed_data(cleaned_data, rl_data, scaler)
+        logger.info("Pipeline completed successfully!")
+        return cleaned_data, rl_data, scaler
+# Example usage and utility functions
+def example_usage():
+    """
+    Example of how to use the StockDataProcessor
+    """
+    # Initialize processor
+    processor = StockDataProcessor()
+    # Option 1: Process S&P 500 stocks
+    print("Processing S&P 500 stocks...")
+    data, rl_data, scaler = processor.process_stocks_pipeline(use_sp500=True, period="5y")
+    # Option 2: Process specific stocks
+    # custom_tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA']
+    # data, rl_data, scaler = processor.process_stocks_pipeline(tickers=custom_tickers, period="10y")
+    if data is not None:
+        print(f"Processed data shape: {data.shape}")
+        print(f"Features: {data.columns.tolist()}")
+        print(f"RL data available for {len(rl_data)} stocks")
+        # Example: Access RL data for a specific stock
+        if 'AAPL' in rl_data:
+            aapl_states = rl_data['AAPL']['states']
+            aapl_rewards = rl_data['AAPL']['rewards']
+            print(f"AAPL: {aapl_states.shape[0]} sequences, each with {aapl_states.shape[1]} timesteps and {aapl_states.shape[2]} features")
+def load_processed_data(rl_filename: str, scaler_filename: str) -> Tuple[Dict, object]:
+    """
+    Load previously processed RL data
+    """
+    with open(rl_filename, 'rb') as f:
+        rl_data = pickle.load(f)
+    with open(scaler_filename, 'rb') as f:
+        scaler = pickle.load(f)
+    return rl_data, scaler
+if __name__ == "__main__":
+    example_usage()

enviromentcreator.py ADDED Viewed

	@@ -0,0 +1,463 @@

+import numpy as np
+import pandas as pd
+import gymnasium as gym
+from gymnasium import spaces
+from typing import Dict, Tuple, List, Optional
+import logging
+from dataclasses import dataclass
+from enum import Enum
+import matplotlib.pyplot as plt
+import seaborn as sns
+from collections import deque
+import json
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ActionType(Enum):
+    HOLD = 0
+    BUY = 1
+    SELL = 2
+@dataclass
+class TradingMetrics:
+    """Comprehensive trading metrics for evaluation"""
+    total_return: float = 0.0
+    sharpe_ratio: float = 0.0
+    max_drawdown: float = 0.0
+    win_rate: float = 0.0
+    total_trades: int = 0
+    profitable_trades: int = 0
+    average_trade_return: float = 0.0
+    volatility: float = 0.0
+    calmar_ratio: float = 0.0
+    sortino_ratio: float = 0.0
+class EnhancedStockTradingEnvironment(gym.Env):
+    """
+    Enhanced stock trading environment with comprehensive metrics and logging
+    """
+    def __init__(self,
+                 rl_data: Dict,
+                 ticker: str,
+                 initial_balance: float = 10000,
+                 transaction_cost: float = 0.001,  # 0.1% transaction cost
+                 max_position_size: float = 1.0,   # Maximum position size as fraction of portfolio
+                 lookback_window: int = 60,        # Number of days to look back
+                 reward_type: str = "return",      # "return", "sharpe", "sortino"
+                 enable_logging: bool = True):
+        super().__init__()
+        self.rl_data = rl_data
+        self.ticker = ticker
+        self.initial_balance = initial_balance
+        self.transaction_cost = transaction_cost
+        self.max_position_size = max_position_size
+        self.lookback_window = lookback_window
+        self.reward_type = reward_type
+        self.enable_logging = enable_logging
+        # Get data for the specific ticker
+        self.stock_data = rl_data[ticker]
+        self.states = self.stock_data['states']
+        self.prices = self._extract_prices()  # Extract actual prices
+        self.dates = self.stock_data['dates']
+        # Environment parameters
+        self.current_step = 0
+        self.max_steps = len(self.states) - 1
+        # Portfolio state
+        self.reset_portfolio()
+        # Trading history
+        self.trade_history = []
+        self.portfolio_history = []
+        self.action_history = []
+        self.reward_history = []
+        # Performance tracking
+        self.daily_returns = deque(maxlen=252)  # 1 year of returns for Sharpe calculation
+        self.drawdown_history = []
+        self.peak_portfolio_value = initial_balance
+        # Action space: 0 = Hold, 1 = Buy, 2 = Sell, with continuous position sizing
+        self.action_space = spaces.Box(
+            low=np.array([0, 0]),        # [action_type (0-2), position_size (0-1)]
+            high=np.array([2, 1]),
+            dtype=np.float32
+        )
+        # Observation space: market state + portfolio state + technical indicators
+        market_state_size = self.states.shape[1] * self.states.shape[2]
+        portfolio_state_size = 8  # Extended portfolio state
+        self.observation_space = spaces.Box(
+            low=-np.inf,
+            high=np.inf,
+            shape=(market_state_size + portfolio_state_size,),
+            dtype=np.float32
+        )
+        if self.enable_logging:
+            logger.info(f"Environment initialized for {ticker}")
+            logger.info(f"Data shape: {self.states.shape}")
+            logger.info(f"Price range: ${self.prices.min():.2f} - ${self.prices.max():.2f}")
+    def _extract_prices(self) -> np.ndarray:
+        """Extract actual prices from the state data"""
+        # Assuming the first feature in states is the close price
+        return self.states[:, -1, 3]  # Close price is typically at index 3
+    def reset_portfolio(self):
+        """Reset portfolio to initial state"""
+        self.balance = self.initial_balance
+        self.shares_held = 0
+        self.net_worth = self.initial_balance
+        self.max_net_worth = self.initial_balance
+        self.position_value = 0
+        self.total_transaction_costs = 0
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.current_step = 0
+        self.reset_portfolio()
+        # Clear histories
+        self.trade_history.clear()
+        self.portfolio_history.clear()
+        self.action_history.clear()
+        self.reward_history.clear()
+        self.daily_returns.clear()
+        self.drawdown_history.clear()
+        self.peak_portfolio_value = self.initial_balance
+        return self._get_observation(), {}
+    def step(self, action):
+        # Parse action
+        action_type = int(np.clip(action[0], 0, 2))
+        position_size = np.clip(action[1], 0, 1)
+        # Execute action
+        reward = self._execute_action(action_type, position_size)
+        # Update portfolio metrics
+        self._update_portfolio_metrics()
+        # Store history
+        self._store_step_data(action_type, position_size, reward)
+        # Move to next step
+        self.current_step += 1
+        # Check if episode is done
+        done = self.current_step >= self.max_steps
+        truncated = False
+        # Calculate final metrics if done
+        info = {}
+        if done:
+            info = self._calculate_episode_metrics()
+        return self._get_observation(), reward, done, truncated, info
+    def _execute_action(self, action_type: int, position_size: float) -> float:
+        """Execute trading action and return reward"""
+        current_price = self.prices[self.current_step]
+        previous_net_worth = self.net_worth
+        if action_type == ActionType.BUY.value:
+            # Calculate how much to buy
+            max_affordable = self.balance / current_price
+            shares_to_buy = int(max_affordable * position_size)
+            if shares_to_buy > 0:
+                cost = shares_to_buy * current_price
+                transaction_cost = cost * self.transaction_cost
+                if self.balance >= cost + transaction_cost:
+                    self.shares_held += shares_to_buy
+                    self.balance -= (cost + transaction_cost)
+                    self.total_transaction_costs += transaction_cost
+                    self.trade_history.append({
+                        'step': self.current_step,
+                        'action': 'BUY',
+                        'shares': shares_to_buy,
+                        'price': current_price,
+                        'cost': cost,
+                        'transaction_cost': transaction_cost
+                    })
+        elif action_type == ActionType.SELL.value:
+            # Calculate how much to sell
+            shares_to_sell = int(self.shares_held * position_size)
+            if shares_to_sell > 0:
+                revenue = shares_to_sell * current_price
+                transaction_cost = revenue * self.transaction_cost
+                self.shares_held -= shares_to_sell
+                self.balance += (revenue - transaction_cost)
+                self.total_transaction_costs += transaction_cost
+                self.trade_history.append({
+                    'step': self.current_step,
+                    'action': 'SELL',
+                    'shares': shares_to_sell,
+                    'price': current_price,
+                    'revenue': revenue,
+                    'transaction_cost': transaction_cost
+                })
+        # Calculate new net worth
+        self.position_value = self.shares_held * current_price
+        self.net_worth = self.balance + self.position_value
+        # Calculate reward based on selected method
+        reward = self._calculate_reward(previous_net_worth)
+        return reward
+    def _calculate_reward(self, previous_net_worth: float) -> float:
+        """Calculate reward based on the selected reward type"""
+        if self.reward_type == "return":
+            # Simple return-based reward
+            return (self.net_worth - previous_net_worth) / previous_net_worth
+        elif self.reward_type == "sharpe":
+            # Sharpe ratio-based reward
+            if len(self.daily_returns) > 1:
+                returns = np.array(self.daily_returns)
+                if np.std(returns) > 0:
+                    sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252)
+                    return sharpe / 100  # Scale down
+            return 0
+        elif self.reward_type == "sortino":
+            # Sortino ratio-based reward
+            if len(self.daily_returns) > 1:
+                returns = np.array(self.daily_returns)
+                negative_returns = returns[returns < 0]
+                if len(negative_returns) > 0 and np.std(negative_returns) > 0:
+                    sortino = np.mean(returns) / np.std(negative_returns) * np.sqrt(252)
+                    return sortino / 100  # Scale down
+            return 0
+        else:
+            return (self.net_worth - previous_net_worth) / previous_net_worth
+    def _update_portfolio_metrics(self):
+        """Update portfolio performance metrics"""
+        # Calculate daily return
+        if len(self.portfolio_history) > 0:
+            daily_return = (self.net_worth - self.portfolio_history[-1]['net_worth']) / self.portfolio_history[-1]['net_worth']
+            self.daily_returns.append(daily_return)
+        # Update peak and drawdown
+        if self.net_worth > self.peak_portfolio_value:
+            self.peak_portfolio_value = self.net_worth
+        current_drawdown = (self.peak_portfolio_value - self.net_worth) / self.peak_portfolio_value
+        self.drawdown_history.append(current_drawdown)
+    def _store_step_data(self, action_type: int, position_size: float, reward: float):
+        """Store data for analysis"""
+        self.action_history.append({
+            'step': self.current_step,
+            'action_type': action_type,
+            'position_size': position_size
+        })
+        self.portfolio_history.append({
+            'step': self.current_step,
+            'balance': self.balance,
+            'shares_held': self.shares_held,
+            'position_value': self.position_value,
+            'net_worth': self.net_worth,
+            'price': self.prices[self.current_step]
+        })
+        self.reward_history.append(reward)
+    def _calculate_episode_metrics(self) -> Dict:
+        """Calculate comprehensive episode metrics"""
+        if len(self.portfolio_history) == 0:
+            return {}
+        # Basic returns
+        total_return = (self.net_worth - self.initial_balance) / self.initial_balance
+        # Risk metrics
+        returns = np.array(self.daily_returns) if self.daily_returns else np.array([0])
+        max_drawdown = max(self.drawdown_history) if self.drawdown_history else 0
+        volatility = np.std(returns) * np.sqrt(252)
+        # Sharpe ratio
+        sharpe_ratio = np.mean(returns) / np.std(returns) * np.sqrt(252) if np.std(returns) > 0 else 0
+        # Sortino ratio
+        negative_returns = returns[returns < 0]
+        sortino_ratio = np.mean(returns) / np.std(negative_returns) * np.sqrt(252) if len(negative_returns) > 0 and np.std(negative_returns) > 0 else 0
+        # Calmar ratio
+        calmar_ratio = (np.mean(returns) * 252) / max_drawdown if max_drawdown > 0 else 0
+        # Trading metrics
+        total_trades = len(self.trade_history)
+        buy_trades = [t for t in self.trade_history if t['action'] == 'BUY']
+        sell_trades = [t for t in self.trade_history if t['action'] == 'SELL']
+        # Win rate calculation (simplified)
+        profitable_trades = len([r for r in self.reward_history if r > 0])
+        win_rate = profitable_trades / len(self.reward_history) if len(self.reward_history) > 0 else 0
+        metrics = {
+            'total_return': total_return,
+            'sharpe_ratio': sharpe_ratio,
+            'sortino_ratio': sortino_ratio,
+            'calmar_ratio': calmar_ratio,
+            'max_drawdown': max_drawdown,
+            'volatility': volatility,
+            'win_rate': win_rate,
+            'total_trades': total_trades,
+            'buy_trades': len(buy_trades),
+            'sell_trades': len(sell_trades),
+            'final_balance': self.balance,
+            'final_shares': self.shares_held,
+            'final_net_worth': self.net_worth,
+            'total_transaction_costs': self.total_transaction_costs,
+            'average_reward': np.mean(self.reward_history) if self.reward_history else 0
+        }
+        if self.enable_logging:
+            logger.info(f"Episode completed for {self.ticker}")
+            logger.info(f"Total Return: {total_return:.2%}")
+            logger.info(f"Sharpe Ratio: {sharpe_ratio:.2f}")
+            logger.info(f"Max Drawdown: {max_drawdown:.2%}")
+            logger.info(f"Win Rate: {win_rate:.2%}")
+        return metrics
+    def _get_observation(self):
+        """Get current observation"""
+        if self.current_step >= len(self.states):
+            # Return last available state if we're at the end
+            market_state = self.states[-1].flatten()
+        else:
+            market_state = self.states[self.current_step].flatten()
+        # Portfolio state (normalized)
+        current_price = self.prices[min(self.current_step, len(self.prices)-1)]
+        portfolio_state = np.array([
+            self.balance / self.initial_balance,                    # Normalized balance
+            self.shares_held * current_price / self.initial_balance, # Normalized position value
+            self.net_worth / self.initial_balance,                  # Normalized net worth
+            (self.net_worth - self.initial_balance) / self.initial_balance, # Return
+            len(self.trade_history) / 100,                          # Number of trades (normalized)
+            self.total_transaction_costs / self.initial_balance,    # Transaction costs
+            max(self.drawdown_history) if self.drawdown_history else 0, # Current max drawdown
+            np.std(self.daily_returns) if len(self.daily_returns) > 1 else 0 # Volatility
+        ])
+        return np.concatenate([market_state, portfolio_state]).astype(np.float32)
+    def render(self, mode='human'):
+        """Render environment state"""
+        current_price = self.prices[min(self.current_step, len(self.prices)-1)]
+        print(f"\n=== {self.ticker} Trading Environment ===")
+        print(f"Step: {self.current_step}/{self.max_steps}")
+        print(f"Current Price: ${current_price:.2f}")
+        print(f"Balance: ${self.balance:.2f}")
+        print(f"Shares Held: {self.shares_held}")
+        print(f"Position Value: ${self.position_value:.2f}")
+        print(f"Net Worth: ${self.net_worth:.2f}")
+        print(f"Total Return: {((self.net_worth - self.initial_balance) / self.initial_balance):.2%}")
+        print(f"Total Trades: {len(self.trade_history)}")
+        print(f"Transaction Costs: ${self.total_transaction_costs:.2f}")
+        if self.drawdown_history:
+            print(f"Max Drawdown: {max(self.drawdown_history):.2%}")
+        print("=" * 40)
+    def plot_performance(self, save_path: Optional[str] = None):
+        """Plot comprehensive performance metrics"""
+        if len(self.portfolio_history) == 0:
+            print("No data to plot")
+            return
+        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
+        fig.suptitle(f'{self.ticker} Trading Performance', fontsize=16)
+        # Portfolio value over time
+        steps = [p['step'] for p in self.portfolio_history]
+        net_worths = [p['net_worth'] for p in self.portfolio_history]
+        prices = [p['price'] for p in self.portfolio_history]
+        axes[0, 0].plot(steps, net_worths, label='Portfolio Value', linewidth=2)
+        axes[0, 0].axhline(y=self.initial_balance, color='r', linestyle='--', label='Initial Balance')
+        axes[0, 0].set_title('Portfolio Value Over Time')
+        axes[0, 0].set_xlabel('Time Steps')
+        axes[0, 0].set_ylabel('Portfolio Value ($)')
+        axes[0, 0].legend()
+        axes[0, 0].grid(True)
+        # Stock price over time
+        axes[0, 1].plot(steps, prices, label='Stock Price', color='orange', linewidth=2)
+        axes[0, 1].set_title('Stock Price Over Time')
+        axes[0, 1].set_xlabel('Time Steps')
+        axes[0, 1].set_ylabel('Price ($)')
+        axes[0, 1].legend()
+        axes[0, 1].grid(True)
+        # Drawdown
+        if self.drawdown_history:
+            axes[1, 0].fill_between(range(len(self.drawdown_history)),
+                                   self.drawdown_history, 0,
+                                   alpha=0.3, color='red')
+            axes[1, 0].plot(self.drawdown_history, color='red', linewidth=2)
+            axes[1, 0].set_title('Drawdown Over Time')
+            axes[1, 0].set_xlabel('Time Steps')
+            axes[1, 0].set_ylabel('Drawdown')
+            axes[1, 0].grid(True)
+        # Action distribution
+        actions = [a['action_type'] for a in self.action_history]
+        action_counts = [actions.count(i) for i in range(3)]
+        action_labels = ['Hold', 'Buy', 'Sell']
+        axes[1, 1].pie(action_counts, labels=action_labels, autopct='%1.1f%%')
+        axes[1, 1].set_title('Action Distribution')
+        plt.tight_layout()
+        if save_path:
+            plt.savefig(save_path, dpi=300, bbox_inches='tight')
+            logger.info(f"Performance plot saved to {save_path}")
+        plt.show()
+    def get_metrics_summary(self) -> TradingMetrics:
+        """Get trading metrics as a structured object"""
+        metrics_dict = self._calculate_episode_metrics()
+        return TradingMetrics(
+            total_return=metrics_dict.get('total_return', 0),
+            sharpe_ratio=metrics_dict.get('sharpe_ratio', 0),
+            max_drawdown=metrics_dict.get('max_drawdown', 0),
+            win_rate=metrics_dict.get('win_rate', 0),
+            total_trades=metrics_dict.get('total_trades', 0),
+            profitable_trades=int(metrics_dict.get('win_rate', 0) * metrics_dict.get('total_trades', 0)),
+            average_trade_return=metrics_dict.get('average_reward', 0),
+            volatility=metrics_dict.get('volatility', 0),
+            calmar_ratio=metrics_dict.get('calmar_ratio', 0),
+            sortino_ratio=metrics_dict.get('sortino_ratio', 0)
+        )