fix fetch data in signal page and the backtest export excel issue

This commit is contained in:
2026-02-02 21:30:19 +08:00
parent 50f9734dcd
commit 891f0ea2b0
21 changed files with 8328 additions and 57803 deletions
+75 -97
View File
@@ -11,43 +11,43 @@ from ta.momentum import StochasticOscillator
import math
class DataEngine:
def __init__(self, symbol=None, url=None, provider=None, data_dir='data_cache'):
# 1. Clean and set the symbol
def __init__(self, symbol=None, name=None, url=None, provider=None, data_dir='data_cache'):
# 1. Basics first
self.symbol = symbol.strip().upper() if symbol else None
self.file_path = f"data_cache/{self.symbol}.csv"
# 2. Setup centralized paths
self.name = name
# 2. Setup the directory variable FIRST
# (This was likely below the file_path line, causing the crash)
base_path = os.path.dirname(os.path.abspath(__file__))
self.cache_dir = os.path.join(base_path, data_dir)
os.makedirs(self.cache_dir, exist_ok=True)
# 3. Load master instrument list
# 3. Now it is safe to define file_path because cache_dir exists
self.file_path = os.path.join(self.cache_dir, f"{self.symbol}.csv") if self.symbol else None
# 4. Load instruments and resolve the rest
self.master_instruments = self.load_instruments_from_csv('instruments.csv')
# 4. Resolve Config (Priority: CSV > Arguments > Yahoo Fallback)
# 4. Resolve Config: Find this symbol in your CSV
config = next((i for i in self.master_instruments if i['symbol'].upper() == self.symbol), None)
if config:
self.url = config['url']
self.provider = config['provider']
elif url:
self.url = url
self.provider = provider or 'yahoo'
elif self.symbol:
# Automatic Fallback for missing tickers
self.url = f"https://query1.finance.yahoo.com/v8/finance/chart/{self.symbol}?interval=1d&range=2y"
self.provider = 'yahoo'
self.provider = config.get('provider', 'yahoo').lower()
# BUILD URL DYNAMICALLY based on Symbol + Provider
if self.provider == 'jpm':
self.url = f"https://am.jpmorgan.com/FundsMarketingHandler/historicalData?cusip={self.symbol}&country=hk&role=per"
elif self.provider == 'ft':
self.url = f"https://markets.ft.com/data/funds/tearsheet/historical?s={self.symbol}"
else:
self.url = f"https://query1.finance.yahoo.com/v8/finance/chart/{self.symbol}?interval=1d&range=5y"
else:
self.url = None
self.provider = None
# Fallback for symbols not in your CSV
self.url = f"https://query1.finance.yahoo.com/v8/finance/chart/{self.symbol}?interval=1d&range=5y" if self.symbol else None
self.provider = 'yahoo'
# 5. Define file path and auto-sync
self.file_path = os.path.join(self.cache_dir, f"{self.symbol}.csv") if self.symbol else None
# This now handles the "24-hour check" automatically
if self.symbol:
self.ensure_data()
def ensure_data(self):
"""Checks if file exists and is fresh (less than 24h old)."""
CACHE_EXPIRY = 24 * 3600 # 24 hours
@@ -67,42 +67,40 @@ class DataEngine:
# which uses the URLs from your TEMPLATES
return self.fetch_data()
def load_instruments_from_csv(self, file_path):
def load_instruments_from_csv(self, file_path='instruments.csv'):
instruments = []
# Dynamic templates based on your preference
# Templates use {id} as a generic placeholder
TEMPLATES = {
'jpm': "https://am.jpmorgan.com/FundsMarketingHandler/historicalData?cusip={cusip}&country=hk&role=per",
'yahoo': "https://query1.finance.yahoo.com/v8/finance/chart/{cusip}?period1=0&period2=9999999999&interval=1d&events=history",
'agi': "https://markets.ft.com/data/funds/tearsheet/historical?s={cusip}"
'jpm': "https://am.jpmorgan.com/FundsMarketingHandler/historicalData?cusip={id}&country=hk&role=per",
'yahoo': "https://query1.finance.yahoo.com/v8/finance/chart/{id}?period1=0&period2=9999999999&interval=1d&events=history",
'ft': "https://markets.ft.com/data/funds/tearsheet/historical?s={id}",
'agi': "https://markets.ft.com/data/funds/tearsheet/historical?s={id}"
}
try:
abs_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), file_path)
if not os.path.exists(abs_path):
return []
if not os.path.exists(abs_path): return []
with open(abs_path, mode='r', encoding='utf-8-sig') as csvfile:
reader = csv.DictReader(csvfile)
reader.fieldnames = [name.strip().lower() for name in reader.fieldnames]
reader.fieldnames = [n.strip().lower() for n in reader.fieldnames]
for row in reader:
symbol = (row.get('symbol') or '').strip().upper()
cusip = (row.get('cusip') or '').strip()
# Use whatever identifier is available
uid = (row.get('cusip') or row.get('symbol') or row.get('ticker') or '').strip()
symbol = (row.get('symbol') or row.get('ticker') or row.get('cusip') or '').strip().upper()
provider = (row.get('provider') or 'yahoo').strip().lower()
if symbol and cusip:
# Build URL from template
if symbol and uid:
template = TEMPLATES.get(provider, TEMPLATES['yahoo'])
url = template.format(cusip=cusip)
instruments.append({
"symbol": symbol,
"url": url,
"symbol": symbol,
"url": template.format(id=uid),
"provider": provider,
"cusip": cusip
"name": row.get('name', symbol)
})
except Exception as e:
print(f"CRITICAL: Failed to load instruments.csv: {e}")
print(f"❌ Critical Load Error: {e}")
return instruments
def _ensure_data_exists(self):
@@ -147,7 +145,12 @@ class DataEngine:
try:
self.symbol = item['symbol']
self.provider = item['provider']
self.url = item['url']
if self.provider == 'jpm':
self.url = f"https://am.jpmorgan.com/FundsMarketingHandler/historicalData?cusip={self.symbol}&country=hk&role=per"
elif self.provider == 'ft':
self.url = f"https://markets.ft.com/data/funds/tearsheet/historical?s={self.symbol}"
else:
self.url = f"https://query1.finance.yahoo.com/v8/finance/chart/{self.symbol}?interval=1d&range=5y"
# FIX 2: Use 'self.cache_dir' to match your __init__ logic
self.file_path = os.path.join(self.cache_dir, f"{self.symbol}.csv")
@@ -174,67 +177,36 @@ class DataEngine:
return report
def run_pre_sync_maintenance(self):
"""Backs up files and reports current data health."""
import os
import shutil
import pandas as pd
from datetime import datetime
# 1. Setup paths correctly
"""Backs up files and keeps only the 5 most recent backup folders."""
base_dir = os.path.dirname(os.path.abspath(__file__))
backup_dir = os.path.join(base_dir, 'backups')
backup_root = os.path.join(base_dir, 'backups')
# 2. Create the timestamped folder path FIRST
# 1. Create timestamped folder
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
current_backup_path = os.path.join(backup_dir, f"sync_backup_{timestamp}")
# 3. Create the directories (safety-first)
current_backup_path = os.path.join(backup_root, f"sync_backup_{timestamp}")
os.makedirs(current_backup_path, exist_ok=True)
print(f"\n--- Pre-Sync Health Check ({timestamp}) ---")
stats = []
# 2. Perform Backup
if os.path.exists(self.cache_dir):
files = [f for f in os.listdir(self.cache_dir) if f.endswith('.csv')]
for filename in files:
shutil.copy2(
os.path.join(self.cache_dir, filename),
os.path.join(current_backup_path, filename)
)
print(f"✅ Backed up {len(files)} files to {current_backup_path}")
# 4. Check if cache exists to avoid errors
if not os.path.exists(self.cache_dir):
print(f"⚠️ Cache directory not found at {self.cache_dir}")
return pd.DataFrame()
# 5. Backup loop
for filename in os.listdir(self.cache_dir):
if filename.endswith(".csv"):
src = os.path.join(self.cache_dir, filename)
dst = os.path.join(current_backup_path, filename)
try:
# Perform copy
shutil.copy2(src, dst)
# Read data for health check
df = pd.read_csv(src)
# Store stats
stats.append({
"Fund": filename.replace(".csv", ""),
"Rows": len(df),
"Start": df['date'].min() if 'date' in df.columns else "N/A",
"End": df['date'].max() if 'date' in df.columns else "N/A"
})
print(f"📦 Backed up: {filename} ({len(df)} rows)")
except Exception as e:
print(f"⚠️ Could not backup {filename}: {e}")
continue
# 6. Display and return report
if stats:
stats_df = pd.DataFrame(stats)
print("\n" + stats_df.to_string(index=False))
print(f"\n✅ All backups saved to: {current_backup_path}")
return stats_df
else:
print("📭 No CSV files found to backup.")
return pd.DataFrame()
# 3. Cleanup: Keep only last 5 backup folders
all_backups = sorted([
os.path.join(backup_root, d) for d in os.listdir(backup_root)
if os.path.isdir(os.path.join(backup_root, d))
], key=os.path.getmtime)
while len(all_backups) > 5:
oldest = all_backups.pop(0)
shutil.rmtree(oldest)
print(f"🧹 Storage Cleanup: Removed old backup {os.path.basename(oldest)}")
def _parse_jpm(self, json_data):
if isinstance(json_data, dict) and "historicalNAVList" in json_data:
df = pd.DataFrame(json_data["historicalNAVList"])
@@ -591,14 +563,20 @@ class StrategyEngine:
history.append({
"date": actual_date_str,
"price": round(price, 2),
# --- DISPLAY STRINGS (For the Web UI) ---
"dca_display": f"${round(dca_invested, 2):,.2f} ({dca_new_shares:+.4f})",
"va_display": f"${round(actual_inv, 2):,.2f} ({va_new_shares:+.4f})",
# --- RAW DATA (Your existing variables kept consistent) ---
"dca_value": round(dca_shares * price, 2),
"dca_invested": round(dca_invested, 2),
"dca_shares_trans": round(dca_new_shares, 4),
"dca_shares_total": round(dca_shares, 4),
"va_value": round(va_shares * price, 2),
"va_invested": round(va_invested, 2),
"va_diff": round(actual_inv, 2),
"va_shares_trans": round(va_new_shares, 4),
"va_diff": round(actual_inv, 2), # This matches your ($) in the image
"va_shares_trans": round(va_new_shares, 4), # This matches your (Δ Shares)
"va_shares_total": round(va_shares, 4),
"va_target_value": round(va_target_value, 2)
})