{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"mount_file_id":"1Fw3GjFHrS_S_0umDDOEqO0kTRJ9tavQX","authorship_tag":"ABX9TyPdJ0Elmyst3XcwdnXfsiOn"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# **Studi Kasus Heart Disease 5 Fitur Part 2**"],"metadata":{"id":"s8B30LBor6qp"}},{"cell_type":"markdown","source":["Implementasi dengan Menggunakan Model `Bagging Classifier` , `Random forest` dan `Stacking clasifier`"],"metadata":{"id":"9CEPTpErkvJw"}},{"cell_type":"markdown","source":["## Membaca data"],"metadata":{"id":"EAF-vi3ksEd5"}},{"cell_type":"code","execution_count":38,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"9xrzLAOwrx-Q","executionInfo":{"status":"ok","timestamp":1669557916548,"user_tz":-420,"elapsed":423,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"95298aa9-3240-40cf-832e-382ad2e12e97"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" age sex BP cholestrol heart disease\n","0 70 1 130 322 1\n","1 67 0 115 564 0\n","2 57 1 124 261 1\n","3 64 1 128 263 0\n","4 74 0 120 269 0"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
agesexBPcholestrolheart disease
07011303221
16701155640
25711242611
36411282630
47401202690
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":38}],"source":["from scipy.io import arff\n","import pandas as pd\n","from sklearn.preprocessing import MinMaxScaler\n","import joblib\n","\n","data = pd.read_csv('https://raw.githubusercontent.com/soumya-mishra/Heart-Disease_DT/main/heart_v2.csv')\n","df = data\n","df.head()"]},{"cell_type":"markdown","source":["### Memisahkan Label"],"metadata":{"id":"o1L0ks0tsVYE"}},{"cell_type":"code","source":["y = df['heart disease'].values\n","y[0:5]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3wWRpqJsXta","executionInfo":{"status":"ok","timestamp":1669557917198,"user_tz":-420,"elapsed":8,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"b34739d9-8e68-49dd-fb25-f3c9ce245b67"},"execution_count":39,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([1, 0, 1, 0, 0])"]},"metadata":{},"execution_count":39}]},{"cell_type":"code","source":["X = df.drop(columns=['heart disease'])\n","X"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"id":"Bn2sNGbYsuaJ","executionInfo":{"status":"ok","timestamp":1669557917198,"user_tz":-420,"elapsed":7,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"359c53c1-62a7-4d2d-88a9-adff5f7c8c6d"},"execution_count":40,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" age sex BP cholestrol\n","0 70 1 130 322\n","1 67 0 115 564\n","2 57 1 124 261\n","3 64 1 128 263\n","4 74 0 120 269\n",".. ... ... ... ...\n","265 52 1 172 199\n","266 44 1 120 263\n","267 56 0 140 294\n","268 57 1 140 192\n","269 67 1 160 286\n","\n","[270 rows x 4 columns]"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
agesexBPcholestrol
0701130322
1670115564
2571124261
3641128263
4740120269
...............
265521172199
266441120263
267560140294
268571140192
269671160286
\n","

270 rows × 4 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":40}]},{"cell_type":"markdown","source":["## Preprocessing Data (`Min-Max`)"],"metadata":{"id":"vVrv-KN8lRZd"}},{"cell_type":"code","source":["scaler = MinMaxScaler()\n","scaled = scaler.fit_transform(X)\n","features_names = X.columns.copy()\n","scaled_features = pd.DataFrame(scaled, columns=features_names)\n","scaled_features.head(10)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":363},"id":"CJG72qsluDIF","executionInfo":{"status":"ok","timestamp":1669557917199,"user_tz":-420,"elapsed":7,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"e311f141-8af0-4ab0-cf4e-c8fca61077d9"},"execution_count":41,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" age sex BP cholestrol\n","0 0.854167 1.0 0.339623 0.447489\n","1 0.791667 0.0 0.198113 1.000000\n","2 0.583333 1.0 0.283019 0.308219\n","3 0.729167 1.0 0.320755 0.312785\n","4 0.937500 0.0 0.245283 0.326484\n","5 0.750000 1.0 0.245283 0.116438\n","6 0.562500 1.0 0.339623 0.296804\n","7 0.625000 1.0 0.150943 0.257991\n","8 0.645833 1.0 0.433962 0.381279\n","9 0.708333 0.0 0.528302 0.641553"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
agesexBPcholestrol
00.8541671.00.3396230.447489
10.7916670.00.1981131.000000
20.5833331.00.2830190.308219
30.7291671.00.3207550.312785
40.9375000.00.2452830.326484
50.7500001.00.2452830.116438
60.5625001.00.3396230.296804
70.6250001.00.1509430.257991
80.6458331.00.4339620.381279
90.7083330.00.5283020.641553
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":41}]},{"cell_type":"markdown","source":["### Split Data"],"metadata":{"id":"kGWWCZQ0vDU4"}},{"cell_type":"code","source":["from sklearn.model_selection import train_test_split\n","\n","X_train, X_test, y_train, y_test=train_test_split(scaled_features, y, test_size=0.2, random_state=1)\n"],"metadata":{"id":"SlLAZXahvAYm","executionInfo":{"status":"ok","timestamp":1669557917199,"user_tz":-420,"elapsed":7,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}}},"execution_count":42,"outputs":[]},{"cell_type":"markdown","source":["## Eksekusi Pada Model `Bagging Clasifier`"],"metadata":{"id":"Gf1redlcuafX"}},{"cell_type":"markdown","source":["### Bagging Clasifier Dengan SVC"],"metadata":{"id":"OxWXISa4pXc0"}},{"cell_type":"markdown","source":["Mencari akurasi tertinggi dengan N_estimators dari 2 sampai 100"],"metadata":{"id":"YAiGEWnupcN2"}},{"cell_type":"code","source":["# import model\n","from sklearn.naive_bayes import GaussianNB\n","from sklearn.svm import SVC\n","from sklearn.ensemble import BaggingClassifier\n","from sklearn.datasets import make_classification\n","from sklearn.metrics import accuracy_score\n","# eksekusi data pada model\n","X, y = make_classification(n_samples=100, n_features=4,\n"," n_informative=2, n_redundant=0,\n"," random_state=0, shuffle=False)\n","# bagging clasifier menggunakan SVC dan Gaussian(naive bayes)\n","# # SVC\n","n_estimator = range(2,101)\n","akurasi_bags_1 = []\n","for n in n_estimator:\n"," # inisialisasi model\n"," clf = BaggingClassifier(base_estimator=SVC(),\n"," n_estimators=n, random_state=40).fit(X_train, y_train)\n"," # predict x_test\n"," y_pred = clf.predict(X_test)\n"," # akurasi count\n"," akurasi_bags_1.append(accuracy_score(y_test,y_pred))\n","\n"," \n"],"metadata":{"id":"FmuXMRYGumNd","executionInfo":{"status":"ok","timestamp":1669557932212,"user_tz":-420,"elapsed":15019,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}}},"execution_count":43,"outputs":[]},{"cell_type":"markdown","source":["Visualisasi Akurasi Bagging dengan SVC"],"metadata":{"id":"NQz5QQcYrDmC"}},{"cell_type":"code","source":["import matplotlib.pyplot as plt\n","plt.plot(n_estimator,akurasi_bags_1)\n","plt.xlabel('Value of N')\n","plt.ylabel('Testing Accuracy')\n","plt.show()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":0},"id":"L6HE-TdBrKhz","executionInfo":{"status":"ok","timestamp":1669557932213,"user_tz":-420,"elapsed":19,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"a761ff0d-639c-4bed-f1cf-f354286d5097"},"execution_count":44,"outputs":[{"output_type":"display_data","data":{"text/plain":["
"],"image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":["Mencari n_estimator dengan Akurasi Tertinggi"],"metadata":{"id":"6mb8XVulreQY"}},{"cell_type":"code","source":["akurasi_bags_1.index(max(akurasi_bags_1))+1 , max(akurasi_bags_1)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"63wpBduBrDJv","executionInfo":{"status":"ok","timestamp":1669557932213,"user_tz":-420,"elapsed":17,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"e2fb4835-1302-45a1-8d91-ff46234d9a51"},"execution_count":45,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(5, 0.7592592592592593)"]},"metadata":{},"execution_count":45}]},{"cell_type":"markdown","source":["### Bagging Clasifier Dengan GaussianNB"],"metadata":{"id":"LH0PLsNhuHik"}},{"cell_type":"markdown","source":["Mencari akurasi tertinggi dengan N_estimators dari 2 sampai 100"],"metadata":{"id":"YtwZWjBZubXW"}},{"cell_type":"code","source":["akurasi_bags_2= []\n","for n in n_estimator:\n"," # inisialisasi model\n"," clf2 = BaggingClassifier(base_estimator=GaussianNB(),\n"," n_estimators=n, random_state=40).fit(X_train, y_train)\n"," # predict x_test\n"," y_pred2 = clf2.predict(X_test)\n"," # akurasi count\n"," akurasi_bags_2.append(accuracy_score(y_test,y_pred2))"],"metadata":{"id":"qgfbA1GOuJqc","executionInfo":{"status":"ok","timestamp":1669557939863,"user_tz":-420,"elapsed":7664,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}}},"execution_count":46,"outputs":[]},{"cell_type":"code","source":["import joblib\n","clf2 = BaggingClassifier(base_estimator=GaussianNB(),\n"," n_estimators=6, random_state=40).fit(X_train, y_train)\n","filenameBCG = '/content/drive/MyDrive/datamining/tugas/model/bagginggaussian.pkl'\n","joblib.dump(clf2,filenameBCG)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WfqxMOgPYj8g","executionInfo":{"status":"ok","timestamp":1669557939864,"user_tz":-420,"elapsed":19,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"ba8a28a2-c64e-44d6-a50e-ab7bbf7790c9"},"execution_count":47,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['/content/drive/MyDrive/datamining/tugas/model/bagginggaussian.pkl']"]},"metadata":{},"execution_count":47}]},{"cell_type":"markdown","source":["Visualisasi Hasil Akurasi "],"metadata":{"id":"u9Vlgo_eub2j"}},{"cell_type":"code","source":["plt.plot(n_estimator,akurasi_bags_2)\n","plt.xlabel('Value of N')\n","plt.ylabel('Testing Accuracy')\n","plt.show()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":0},"id":"nmqKUXhVuXYy","executionInfo":{"status":"ok","timestamp":1669557939864,"user_tz":-420,"elapsed":17,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"c24e66b9-1c69-4bd7-9d92-2d58dacf8e0d"},"execution_count":48,"outputs":[{"output_type":"display_data","data":{"text/plain":["
"],"image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":["Mencari Akurasi Tertinggi"],"metadata":{"id":"MzaBZqEpudqq"}},{"cell_type":"code","source":["akurasi_bags_2.index(max(akurasi_bags_2))+1 , max(akurasi_bags_2)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PYj_oNYkugfq","executionInfo":{"status":"ok","timestamp":1669557939865,"user_tz":-420,"elapsed":17,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"04a22fbc-f0ec-46d2-df64-9336f5a53e62"},"execution_count":49,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(7, 0.7777777777777778)"]},"metadata":{},"execution_count":49}]},{"cell_type":"markdown","source":["## Eksekusi Pada Model `Random Forest`"],"metadata":{"id":"sxR5a0x-wUqZ"}},{"cell_type":"markdown","source":["Mencari akurasi tertinggi dengan N_estimators dari 2 sampai 100"],"metadata":{"id":"Xo10W7C7xwp7"}},{"cell_type":"code","source":["from sklearn.ensemble import RandomForestClassifier\n","akurasirf= []\n","for n in n_estimator:\n"," # inisialisasi model\n"," rf = RandomForestClassifier(\n"," n_estimators=n,max_depth=2, random_state=40).fit(X_train, y_train)\n"," # predict x_test\n"," y_predrf = rf.predict(X_test)\n"," # akurasi count\n"," akurasirf.append(accuracy_score(y_test,y_predrf))"],"metadata":{"id":"0DXRtRnkxxFN","executionInfo":{"status":"ok","timestamp":1669557947698,"user_tz":-420,"elapsed":7848,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}}},"execution_count":50,"outputs":[]},{"cell_type":"code","source":["rf = RandomForestClassifier(\n"," n_estimators=13,max_depth=2, random_state=40).fit(X_train, y_train)\n","filenameRF = '/content/drive/MyDrive/datamining/tugas/model/randomforest.pkl'\n","joblib.dump(rf,filenameRF)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"xgbKAWqtZLXN","executionInfo":{"status":"ok","timestamp":1669557947701,"user_tz":-420,"elapsed":22,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"48de2222-f17d-4ea6-c248-527c526ec43e"},"execution_count":51,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['/content/drive/MyDrive/datamining/tugas/model/randomforest.pkl']"]},"metadata":{},"execution_count":51}]},{"cell_type":"markdown","source":["Visualisasi Hasil Akurasi "],"metadata":{"id":"cvvBwCB1yOG7"}},{"cell_type":"code","source":["plt.plot(n_estimator,akurasirf)\n","plt.xlabel('Value of N')\n","plt.ylabel('Testing Accuracy')\n","plt.show()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":0},"id":"sJBi98r0ySH6","executionInfo":{"status":"ok","timestamp":1669557947702,"user_tz":-420,"elapsed":21,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"d48686e6-0175-4864-d4f4-ada6bd3edc76"},"execution_count":52,"outputs":[{"output_type":"display_data","data":{"text/plain":["
"],"image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":["Mencari Akurasi Tertinggi dari N"],"metadata":{"id":"QPwR_ERIyR3a"}},{"cell_type":"code","source":["akurasirf.index(max(akurasirf))+1 , max(akurasirf)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"M6-f_7ZsyYyN","executionInfo":{"status":"ok","timestamp":1669557947703,"user_tz":-420,"elapsed":21,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"0c9ac547-4ced-460d-c2a0-e00b3683e5c2"},"execution_count":53,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(14, 0.7222222222222222)"]},"metadata":{},"execution_count":53}]},{"cell_type":"markdown","source":["## Eksekusi Pada Model `Stacking clasifier`"],"metadata":{"id":"j-G1UqBkyzje"}},{"cell_type":"code","source":["from sklearn.ensemble import StackingClassifier\n","from sklearn.tree import DecisionTreeClassifier\n","\n","# estimator menggunakan Random Forest, SVC GaussianNB\n","## untuk n_estimators menggunakan n dengan akurasi tertinggi \n","estimators = [\n"," ('rf', RandomForestClassifier(n_estimators=38, max_depth=2, random_state=40)),\n"," ('svc', SVC()),\n"," ('gnb', GaussianNB()),\n"," ('bagsvc', BaggingClassifier(base_estimator=SVC(),\n"," n_estimators=14, random_state=40)),\n"," ('baggnb' ,BaggingClassifier(base_estimator=GaussianNB(),\n"," n_estimators=9, random_state=40))\n","]\n","\n","sc = StackingClassifier(\n"," estimators=estimators, final_estimator=SVC()).fit(X_train, y_train)\n","\n","y_predsc = sc.predict(X_test)\n","akurasi = accuracy_score(y_test,y_predsc)"],"metadata":{"id":"xNsAZ2Ag1qT-","executionInfo":{"status":"ok","timestamp":1669557948289,"user_tz":-420,"elapsed":604,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}}},"execution_count":54,"outputs":[]},{"cell_type":"markdown","source":["### Hasil Akurasi Dan Score dari Stacking Clasifier"],"metadata":{"id":"QAkUH2ex38_T"}},{"cell_type":"code","source":["print(f'Akurasi Untuk Stacking Clasifier = {akurasi}')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nuq9t-_d4DP5","executionInfo":{"status":"ok","timestamp":1669557948289,"user_tz":-420,"elapsed":4,"user":{"displayName":"Caca Erha","userId":"13359221303846732984"}},"outputId":"030f2372-8398-44cb-a05a-44fd8223383e"},"execution_count":55,"outputs":[{"output_type":"stream","name":"stdout","text":["Akurasi Untuk Stacking Clasifier = 0.6666666666666666\n"]}]}]}