LSTMは時系列データの長期依存関係を学習できるニューラルネットワークです。過去N日間の価格・出来高・テクニカル指標を入力として、翌日の方向を予測するモデルを作れます。ということで、この記事ではKerasで実装してバックテストまで手順をまとめます。
📘 外部参考:Backtesting.py(公式ドキュメント) / Backtrader 公式
📘 外部参考:LSTM(Wikipedia) / Keras LSTM(公式)
📘 外部参考:Keras 公式ドキュメント
環境構築
pip install tensorflow yfinance pandas numpy scikit-learn matplotlib
データ取得と特徴量エンジニアリング
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
def get_features(ticker, period='3y'):
"""株価データと技術指標を取得・計算"""
df = yf.download(ticker, period=period, progress=False)
# テクニカル指標
df['MA5'] = df['Close'].rolling(5).mean()
df['MA20'] = df['Close'].rolling(20).mean()
df['MA60'] = df['Close'].rolling(60).mean()
# RSI
delta = df['Close'].diff()
gain = delta.clip(lower=0).rolling(14).mean()
loss = (-delta.clip(upper=0)).rolling(14).mean()
df['RSI'] = 100 - (100 / (1 + gain / loss))
# MACD
exp12 = df['Close'].ewm(span=12).mean()
exp26 = df['Close'].ewm(span=26).mean()
df['MACD'] = exp12 - exp26
df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
# ボリンジャーバンド
df['BB_mid'] = df['Close'].rolling(20).mean()
df['BB_std'] = df['Close'].rolling(20).std()
df['BB_upper'] = df['BB_mid'] + 2 * df['BB_std']
df['BB_lower'] = df['BB_mid'] - 2 * df['BB_std']
# 出来高変化率
df['Volume_change'] = df['Volume'].pct_change()
# 価格変化率(目標変数)
df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)
df = df.dropna()
return df
df = get_features('7203.T') # トヨタ
print(f"データ件数: {len(df)}")
print(df[['Close', 'RSI', 'MACD', 'Target']].tail())
LSTMモデルの構築
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
def prepare_sequences(df, feature_cols, target_col, window=20):
"""時系列シーケンスデータを作成"""
features = df[feature_cols].values
targets = df[target_col].values
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)
X, y = [], []
for i in range(window, len(features_scaled)):
X.append(features_scaled[i-window:i])
y.append(targets[i])
return np.array(X), np.array(y), scaler
feature_cols = ['Close', 'Volume', 'MA5', 'MA20', 'RSI', 'MACD', 'BB_upper', 'BB_lower']
X, y, scaler = prepare_sequences(df, feature_cols, 'Target', window=20)
# 訓練・テスト分割(時系列なので末尾をテストに)
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
print(f"訓練: {X_train.shape}, テスト: {X_test.shape}")
def build_lstm_model(input_shape):
model = Sequential([
LSTM(64, return_sequences=True, input_shape=input_shape),
BatchNormalization(),
Dropout(0.3),
LSTM(32, return_sequences=False),
BatchNormalization(),
Dropout(0.2),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
model.summary()
モデルの学習と評価
callbacks = [
EarlyStopping(patience=15, restore_best_weights=True),
ReduceLROnPlateau(patience=7, factor=0.5)
]
history = model.fit(
X_train, y_train,
epochs=100,
batch_size=32,
validation_split=0.2,
callbacks=callbacks,
verbose=1
)
# テストデータで評価
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"テスト精度: {accuracy:.3%}")
# 予測
y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, target_names=['下落', '上昇']))
バックテストの実装
def backtest_lstm(df, model, scaler, feature_cols, window=20, initial_capital=1000000):
"""LSTMモデルでバックテスト"""
features = df[feature_cols].values
prices = df['Close'].values
features_scaled = scaler.transform(features)
capital = initial_capital
position = 0 # 保有株数
trades = []
portfolio_values = []
for i in range(window, len(features_scaled) - 1):
seq = features_scaled[i-window:i].reshape(1, window, len(feature_cols))
pred_prob = model.predict(seq, verbose=0)[0][0]
pred = 1 if pred_prob > 0.55 else 0 # 閾値0.55で厳しめに
current_price = prices[i]
if pred == 1 and position == 0 and capital > current_price:
# 買いシグナル
shares = int(capital // current_price)
position = shares
capital -= shares * current_price
trades.append({'type': 'buy', 'price': current_price, 'shares': shares})
elif pred == 0 and position > 0:
# 売りシグナル
capital += position * current_price
trades.append({'type': 'sell', 'price': current_price, 'shares': position})
position = 0
total_value = capital + position * current_price
portfolio_values.append(total_value)
final_value = portfolio_values[-1] if portfolio_values else initial_capital
total_return = (final_value - initial_capital) / initial_capital
buy_hold = (prices[-1] - prices[window]) / prices[window]
print(f"初期資金: {initial_capital:,}円")
print(f"最終資産: {final_value:,.0f}円")
print(f"LSTM戦略リターン: {total_return:.2%}")
print(f"バイ&ホールドリターン: {buy_hold:.2%}")
print(f"トレード回数: {len(trades)}回")
return portfolio_values, trades
portfolio_values, trades = backtest_lstm(df, model, scaler, feature_cols)
まとめ
LSTMを株価予測に活用するポイントをまとめます。過去20〜60日のローソク足と技術指標をシーケンスとして入力することで、時系列パターンを学習できます。翌日の方向予測(上昇/下落の2値分類)はシンプルで実用的な設計です。過学習防止にはDropout・BatchNormalization・EarlyStoppingの組み合わせが有効です。バックテストで実際の収益性を必ず検証することが重要で、単純な精度指標だけでは不十分です。また、株価データは非定常なため、定期的なモデルの再学習が必要になります。

