決算短信は上場企業が四半期ごとに発表する業績報告書です。売上高・営業利益・純利益などが記載されており、機械学習を使うと決算サプライズ(市場予想との乖離)を事前に予測できます。ということで、この記事では、yfinanceと機械学習モデルで株価への影響を予測する実装をまとめます。
📘 外部参考:yfinance 公式GitHub / PyPI
決算データの取得
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
def get_earnings_data(ticker):
"""
yfinanceから決算データを取得
"""
stock = yf.Ticker(ticker)
# 財務諸表
income_stmt = stock.quarterly_financials
balance_sheet = stock.quarterly_balance_sheet
if income_stmt.empty:
print(f"{ticker}: 決算データなし")
return None
# 主要指標を抽出
metrics = {}
# 損益計算書
for col in income_stmt.columns:
q_data = {}
if 'Total Revenue' in income_stmt.index:
q_data['revenue'] = income_stmt.loc['Total Revenue', col]
if 'Operating Income' in income_stmt.index:
q_data['operating_income'] = income_stmt.loc['Operating Income', col]
if 'Net Income' in income_stmt.index:
q_data['net_income'] = income_stmt.loc['Net Income', col]
metrics[col] = q_data
df = pd.DataFrame(metrics).T.sort_index()
# 前四半期比成長率を計算
df['revenue_growth'] = df['revenue'].pct_change()
df['op_income_growth'] = df['operating_income'].pct_change()
df['net_income_growth'] = df['net_income'].pct_change()
return df.dropna()
# トヨタの決算データ取得
df_earnings = get_earnings_data('7203.T')
if df_earnings is not None:
print(df_earnings[['revenue', 'revenue_growth', 'net_income_growth']].tail())
決算発表日前後の株価反応分析
def analyze_earnings_reaction(ticker, earnings_dates, days_before=5, days_after=10):
"""
決算発表前後の株価反応をイベントスタディで分析
"""
stock_data = yf.download(ticker, period='5y', progress=False)['Close']
results = []
for date in earnings_dates:
try:
date = pd.Timestamp(date)
start = date - pd.Timedelta(days=days_before*2)
end = date + pd.Timedelta(days=days_after*2)
window = stock_data[start:end]
if len(window) < days_before + days_after:
continue
# 発表日を特定
idx = window.index.get_indexer([date], method='nearest')[0]
if idx < days_before or idx + days_after >= len(window):
continue
base_price = window.iloc[idx - 1]
# 発表翌日のギャップ
gap = (window.iloc[idx] - base_price) / base_price
# その後N日のドリフト
drift = (window.iloc[idx + days_after] - window.iloc[idx]) / window.iloc[idx]
results.append({
'date': date,
'gap': gap,
'drift': drift,
'total_return': (1 + gap) * (1 + drift) - 1
})
except Exception:
continue
if not results:
return None
df = pd.DataFrame(results)
print(f"決算発表後の平均ギャップ: {df['gap'].mean():.2%}")
print(f"ギャップ後のドリフト: {df['drift'].mean():.2%}")
print(f"ポジティブ決算の割合: {(df['gap'] > 0).mean():.1%}")
return df
機械学習で決算サプライズを予測
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
def build_earnings_features(df_earnings, stock_data):
"""
決算データと株価から機械学習用特徴量を構築
"""
features = []
for i in range(3, len(df_earnings)):
row = df_earnings.iloc[i]
date = df_earnings.index[i]
# 直近の価格トレンド(決算前30日)
price_window = stock_data[stock_data.index <= date].tail(30)
if len(price_window) < 20:
continue
price_trend = (price_window.iloc[-1] - price_window.iloc[0]) / price_window.iloc[0]
price_vol = price_window.pct_change().std() * np.sqrt(252)
# 3四半期の成長率トレンド
prev_3q = df_earnings.iloc[i-3:i]
rev_accel = prev_3q['revenue_growth'].diff().mean() # 成長加速度
feat = {
'revenue_growth': row.get('revenue_growth', 0),
'op_income_growth': row.get('op_income_growth', 0),
'net_income_growth': row.get('net_income_growth', 0),
'revenue_growth_3q_avg': prev_3q['revenue_growth'].mean(),
'revenue_acceleration': rev_accel,
'price_trend_30d': price_trend,
'price_volatility': price_vol,
}
features.append(feat)
return pd.DataFrame(features)
# 複数銘柄でデータ収集
tickers = ['7203.T', '6758.T', '9984.T', '4063.T']
all_features = []
for ticker in tickers:
try:
earnings = get_earnings_data(ticker)
if earnings is None:
continue
stock = yf.download(ticker, period='5y', progress=False)['Close']
feats = build_earnings_features(earnings, stock)
all_features.append(feats)
except Exception as e:
print(f"{ticker}: {e}")
if all_features:
df_ml = pd.concat(all_features, ignore_index=True).dropna()
print(f"学習データ件数: {len(df_ml)}")
モデルの学習と評価
def train_earnings_model(df_features, target_col='net_income_growth', threshold=0.05):
"""
決算サプライズ(利益成長5%超)を予測する分類モデル
"""
feature_cols = [c for c in df_features.columns if c != target_col]
X = df_features[feature_cols].values
y = (df_features[target_col] > threshold).astype(int).values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 時系列クロスバリデーション
tscv = TimeSeriesSplit(n_splits=4)
scores = []
model = GradientBoostingClassifier(n_estimators=100, max_depth=3, random_state=42)
for fold, (train_idx, test_idx) in enumerate(tscv.split(X_scaled)):
model.fit(X_scaled[train_idx], y[train_idx])
score = model.score(X_scaled[test_idx], y[test_idx])
scores.append(score)
print(f"Fold {fold+1} 精度: {score:.3%}")
print(f"平均精度: {np.mean(scores):.3%}")
# 全データで最終学習
model.fit(X_scaled, y)
# 特徴量重要度
importance = pd.DataFrame({
'feature': feature_cols,
'importance': model.feature_importances_
}).sort_values('importance', ascending=False)
print("\n特徴量重要度:")
print(importance.to_string())
return model, scaler, feature_cols
if len(df_ml) > 20:
model, scaler, feature_cols = train_earnings_model(df_ml)
Discord通知との連携
import requests
def send_discord_notification(webhook_url, message):
payload = {"content": message}
requests.post(webhook_url, json=payload)
def quarterly_earnings_alert(watchlist, model, scaler, feature_cols, discord_webhook_url):
"""
ウォッチリスト銘柄の最新決算を分析してDiscord通知
"""
alerts = []
for ticker in watchlist:
try:
earnings = get_earnings_data(ticker)
if earnings is None:
continue
stock = yf.download(ticker, period='6mo', progress=False)['Close']
feats = build_earnings_features(earnings, stock)
if feats.empty:
continue
latest = feats.iloc[-1:][feature_cols].values
X = scaler.transform(latest)
prob = model.predict_proba(X)[0][1]
pred = model.predict(X)[0]
alerts.append(f"{ticker}: 好決算確率 {prob:.1%} - {'買い検討' if pred == 1 else '様子見'}")
except Exception as e:
print(f"{ticker}: {e}")
if alerts:
message = "決算分析アラート\n" + "\n".join(alerts)
send_discord_notification(discord_webhook_url, message)
print("Discord通知送信完了")
まとめ
試してみた感想としては、yfinanceで四半期財務データを取得して特徴量化するのは意外とシンプルです。GradientBoostingで好決算かどうかを予測する分類モデルは、時系列クロスバリデーションで評価するのが現実的だと思っています。EDINETとの組み合わせでセンチメント分析にも発展できます。

