Refactor: Separate data sync from UI rendering, change colunm width

2026-01-27 07:12:09 +08:00
parent 61b32bbdd7
commit 9e7f474d5e
5 changed files with 1691 additions and 144 deletions
@@ -1,48 +1,179 @@
 import pandas as pd
 import requests
 import os
-from datetime import datetime
+import shutil
+from datetime import datetime, time
 from ta.trend import EMAIndicator
 from ta.momentum import StochasticOscillator

 class DataEngine:
-    def __init__(self, symbol, url, provider):
+    def __init__(self, symbol=None, url=None, provider=None, data_dir='data_cache'):
        self.symbol = symbol
        self.url = url
        self.provider = provider
        
-        # 1. Get the folder where engine.py lives
+        # Use your robust path logic
        base_path = os.path.dirname(os.path.abspath(__file__))
-        
-        # 2. Define the cache directory path
-        self.cache_dir = os.path.join(base_path, "data_cache")
-        
-        # 3. Create the folder if it doesn't exist (safety-first)
+        self.cache_dir = os.path.join(base_path, data_dir) # Use data_dir variable
        os.makedirs(self.cache_dir, exist_ok=True)
        
-        # 4. Set the full path for this specific instrument's CSV
-        self.file_path = os.path.join(self.cache_dir, f"{self.symbol}.csv")
+        # 4. Only set file_path if we actually have a symbol
+        if self.symbol:
+            self.file_path = os.path.join(self.cache_dir, f"{self.symbol}.csv")
+        else:
+            self.file_path = None
    
-    def global_sync(self):
-        """The 'One-Click' background loop."""
-        # 1. Get the latest list of instruments from your CSV
-        all_instruments = self.load_instruments_from_csv() 
+    def load_instruments_from_csv(self, file_path):
+        import csv
+        instruments = []
        
-        for item in all_instruments:
-            # 2. Update the 'Current' target for the engine
-            self.symbol = item['symbol']
-            self.cusip = item['cusip']
-            self.provider = item['provider']
+        # Updated templates for maximum historical reach
+        TEMPLATES = {
+            'jpm': "https://am.jpmorgan.com/FundsMarketingHandler/historicalData?cusip={cusip}&country=hk&role=per",
+            # period1=0 fetches from the earliest available date; interval=1d is daily
+            'yahoo': "https://query1.finance.yahoo.com/v8/finance/chart/{cusip}?period1=0&period2=9999999999&interval=1d&events=history",
+            # FT remains 30-day window; Smart Append logic in fetch_data handles the history
+            'agi': "https://markets.ft.com/data/funds/tearsheet/historical?s={cusip}"
+        }
+
+        try:
+            abs_path = os.path.join(os.path.dirname(__file__), file_path)
            
-            # 3. Regenerate the URL and File Path for THIS specific instrument
-            self.url = self.generate_url() 
-            self.file_path = os.path.join(self.data_dir, f"{self.symbol}.csv")
-            
-            # 4. Run the robust fetch/merge logic we built
-            print(f"Syncing {self.symbol}...")
-            self.fetch_data() 
-            
-        print("Global Sync Complete.")
+            if not os.path.exists(abs_path):
+                print(f"Error: {file_path} not found.")
+                return []
+
+            with open(abs_path, mode='r', encoding='utf-8-sig') as csvfile:
+                reader = csv.DictReader(csvfile)
+                reader.fieldnames = [name.strip().lower() for name in reader.fieldnames]
+                
+                for row in reader:
+                    symbol = row.get('symbol', '').strip()
+                    cusip = row.get('cusip', '').strip()
+                    provider = row.get('provider', 'jpm').strip().lower()
+                    
+                    if symbol and cusip:
+                        template = TEMPLATES.get(provider, TEMPLATES['jpm'])
+                        url = template.format(cusip=cusip)
+                        
+                        instruments.append({
+                            "symbol": symbol, 
+                            "url": url, 
+                            "provider": provider,
+                            "cusip": cusip # Added this so sync_all can use it if needed
+                        })
+                    
+        except Exception as e:
+            print(f"CSV Loading Error: {e}")        
+        return instruments
+    # URL_CONFIG = load_instruments_from_csv('instruments.csv')
+        
+    def global_sync(self):
+        """Backup, Sync all instruments, and return a summary report."""
+        # 1. Run Maintenance/Backup
+        self.run_pre_sync_maintenance()
+        
+        # FIX 1: Add 'self.' so it calls the method inside this class
+        instruments = self.load_instruments_from_csv('instruments.csv')
+        
+        report = {
+            "total": len(instruments),
+            "updated": 0,
+            "failed": 0,
+            "details": []
+        }
+        
+        for item in instruments:
+            try:
+                self.symbol = item['symbol']
+                self.provider = item['provider']
+                self.url = item['url']
+                
+                # FIX 2: Use 'self.cache_dir' to match your __init__ logic
+                self.file_path = os.path.join(self.cache_dir, f"{self.symbol}.csv")
+                
+                print(f"Updating {self.symbol}...")
+                
+                # fetch_data now returns the updated DataFrame or None
+                result_df = self.fetch_data()
+
+                time.sleep(1)
+                
+                if result_df is not None and not result_df.empty:
+                    report["updated"] += 1
+                    last_price = result_df['close'].iloc[-1]
+                    report["details"].append(f"✅ {self.symbol}: Updated (Price: {last_price})")
+                else:
+                    report["failed"] += 1
+                    report["details"].append(f"❌ {self.symbol}: No new data found")
+                    
+            except Exception as e:
+                report["failed"] += 1
+                report["details"].append(f"⚠️ {self.symbol}: Error ({str(e)})")
+                
+        return report
+    
+    def run_pre_sync_maintenance(self):
+        """Backs up files and reports current data health."""
+        import os
+        import shutil
+        import pandas as pd
+        from datetime import datetime
+
+        # 1. Setup paths correctly
+        base_dir = os.path.dirname(os.path.abspath(__file__))
+        backup_dir = os.path.join(base_dir, 'backups')
+        
+        # 2. Create the timestamped folder path FIRST
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M")
+        current_backup_path = os.path.join(backup_dir, f"sync_backup_{timestamp}")
+        
+        # 3. Create the directories (safety-first)
+        os.makedirs(current_backup_path, exist_ok=True)
+        
+        print(f"\n--- Pre-Sync Health Check ({timestamp}) ---")
+        stats = []
+
+        # 4. Check if cache exists to avoid errors
+        if not os.path.exists(self.cache_dir):
+            print(f"⚠️ Cache directory not found at {self.cache_dir}")
+            return pd.DataFrame()
+
+        # 5. Backup loop
+        for filename in os.listdir(self.cache_dir): 
+            if filename.endswith(".csv"):
+                src = os.path.join(self.cache_dir, filename)
+                dst = os.path.join(current_backup_path, filename)
+                
+                try:
+                    # Perform copy
+                    shutil.copy2(src, dst)
+                    
+                    # Read data for health check
+                    df = pd.read_csv(src)
+                    
+                    # Store stats
+                    stats.append({
+                        "Fund": filename.replace(".csv", ""),
+                        "Rows": len(df),
+                        "Start": df['date'].min() if 'date' in df.columns else "N/A",
+                        "End": df['date'].max() if 'date' in df.columns else "N/A"
+                    })
+                    print(f"📦 Backed up: {filename} ({len(df)} rows)")
+                    
+                except Exception as e:
+                    print(f"⚠️ Could not backup {filename}: {e}")
+                    continue
+
+        # 6. Display and return report
+        if stats:
+            stats_df = pd.DataFrame(stats)
+            print("\n" + stats_df.to_string(index=False))
+            print(f"\n✅ All backups saved to: {current_backup_path}")
+            return stats_df
+        else:
+            print("📭 No CSV files found to backup.")
+            return pd.DataFrame()

    def _parse_jpm(self, json_data):
        if isinstance(json_data, dict) and "historicalNAVList" in json_data:
@@ -165,6 +296,24 @@ class DataEngine:
            print(f"Network error for {self.symbol}: {e}")
            return local_df
    
+    def get_local_metrics(self):
+        """Reads ONLY from local CSV and returns metrics immediately."""
+        if not os.path.exists(self.file_path):
+            return {"error": "Missing Local Data", "status": "needs_sync"}
+        
+        try:
+            df = pd.read_csv(self.file_path)
+            # Ensure columns are clean
+            df.columns = [c.lower().strip() for c in df.columns]
+            df['date'] = pd.to_datetime(df['date'], errors='coerce')
+            df = df.dropna(subset=['date', 'close']).sort_values('date')
+            
+            # Pass this local dataframe to your existing calculation function
+            return self.calculate_table_metrics(df)
+        except Exception as e:
+            print(f"Error reading local data for {self.symbol}: {e}")
+            return None
+    
        
    def calculate_table_metrics(self, df):
        if df is None or df.empty or len(df) < 2: