import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error, r2_score import matplotlib.pyplot as plt import seaborn as sns # データの読み込みと前処理 def load_and_preprocess_data(file_path): data = pd.read_csv(file_path) X = data[['Comp1', 'Comp2', 'Comp3', 'Temperature']].copy() y = data['Strength'] # 新しい特徴量の追加 X['Temperature_squared'] = X['Temperature'] ** 2 X['Temperature_log'] = np.log(X['Temperature']) X['Comp1_Comp2'] = X['Comp1'] * X['Comp2'] X['Comp2_Comp3'] = X['Comp2'] * X['Comp3'] X['Comp1_Comp3'] = X['Comp1'] * X['Comp3'] return X, y # モデルのトレーニングと評価 def train_and_evaluate_model(X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) return model, X_test, y_test, y_pred, mse, r2 # 特徴量の重要度を表示 def print_feature_importance(model, feature_names): importances = model.feature_importances_ for name, importance in zip(feature_names, importances): print(f'{name}: {importance}') # 予測vs実際の値のプロット def plot_predictions(y_test, y_pred): plt.figure(figsize=(10, 6)) plt.scatter(y_test, y_pred, alpha=0.5) plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2) plt.xlabel('Actual Strength') plt.ylabel('Predicted Strength') plt.title('Predicted vs Actual Alloy Strength') plt.tight_layout() plt.savefig('prediction_plot.png') plt.close() # 特徴量の重要度のバープロット def plot_feature_importance(model, feature_names): importances = model.feature_importances_ indices = np.argsort(importances)[::-1] plt.figure(figsize=(10, 6)) plt.title("Feature Importances") plt.bar(range(len(importances)), importances[indices]) plt.xticks(range(len(importances)), [feature_names[i] for i in indices], rotation=90) plt.tight_layout() plt.savefig('feature_importance_plot.png') plt.close() # メイン関数 def main(): # データの読み込みと前処理 X, y = load_and_preprocess_data('alloy_results.csv') # モデルのトレーニングと評価 model, X_test, y_test, y_pred, mse, r2 = train_and_evaluate_model(X, y) # 結果の表示 print(f'Mean Squared Error: {mse}') print(f'R2 Score: {r2}') # 特徴量の重要度を表示 print_feature_importance(model, X.columns) # プロットの生成 plot_predictions(y_test, y_pred) plot_feature_importance(model, X.columns) print('Prediction plot saved as prediction_plot.png') print('Feature importance plot saved as feature_importance_plot.png') if __name__ == "__main__": main()