import pandas as pd
from model import Fm
from sklearn.model_selection import train_test_split
import numpy as np
# Load the data from the uploaded CSV file to inspect its structure
data_path = '/Users/luweitao/Downloads/data.csv'
stock_data = pd.read_csv(data_path)

# Display the first few rows of the dataframe to understand its structure
stock_data.head(), stock_data.columns

# 计算日收益率作为标签（预测目标）
stock_data['Return'] = stock_data['Close'].pct_change()

# 由于日收益率的第一行是NaN,需要删除这一行
stock_data = stock_data.dropna()

# 提取特征和标签

features = stock_data[['Open', 'High', 'Low', 'Volume','Close','Price_Momentum','Volume_Factor','RSI','30d_Moving_Average']]  # 选择一些可能的特征
labels = stock_data['Return']  # 日收益率作为标签

# Split the data into training and testing sets (80% train, 20% test)
features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, test_size=0.2, random_state=42  # random_state for reproducibility
)

model = Fm()
# Fit the model on the training data
model.fit(features_train, labels_train, iter=20)

# Predict using the trained model on the test data
predictions = model.predict(features_test)

# Since we are predicting on the test data, we don't need to reset the index of labels_test
# Calculate the Information Coefficient (IC) on the test data
ic = np.corrcoef(predictions, labels_test)[0, 1]

# print("Predictions:", predictions)
# print("Actual Returns:", labels_test.tolist())
print("Information Coefficient (IC):", ic)