From e59da7a323db43538488556b0ac58dc8eb30f348 Mon Sep 17 00:00:00 2001 From: Modassir Afzal Date: Thu, 13 Oct 2022 02:20:52 +0530 Subject: [PATCH 1/7] Fixes: #6551 --- machine_learning/xgboostclassifier.py | 55 +++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 machine_learning/xgboostclassifier.py diff --git a/machine_learning/xgboostclassifier.py b/machine_learning/xgboostclassifier.py new file mode 100644 index 000000000000..cd92799bf1eb --- /dev/null +++ b/machine_learning/xgboostclassifier.py @@ -0,0 +1,55 @@ +from xgboost import XGBClassifier +#https://xgboost.readthedocs.io/en/stable/ +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + + + +training = pd.read_csv('../input/titanic/train.csv') +test = pd.read_csv('../input/titanic/test.csv') + +# Commented out IPython magic to ensure Python compatibility. +training['train_test'] = 1 +test['train_test'] = 0 +test['Survived'] = np.NaN +all_data = pd.concat([training,test]) +# %matplotlib inline +all_data.columns + +all_data.describe() + +all_data['cabin_mul']=all_data.Cabin.apply(lambda cabin_class: 0 if pd.isna(cabin_class) else len(x.split(' '))) +all_data['cabin_adv'] = all_data.Cabin.apply(lambda cabin_number: str(cabin_number)[0]) +all_data['name_title']= all_data.Name.apply(lambda name: name.split(',')[1].split('.')[0].strip()) +all_data.Age=all_data.Age.fillna(training.Age.median()) +all_data.Fare=all_data.Fare.fillna(training.Fare.median()) +all_data.dropna(subset=['Embarked'],inplace=True) +all_data['norm_fare']=np.log(all_data.Fare+1) +all_data.Pclass=all_data.Pclass.astype(str) +all_data['Age']=all_data['Age'].apply(np.int64) +all_dummies = pd.get_dummies(all_data[['Pclass','Sex','Age','SibSp','Parch','norm_fare', + 'Embarked','cabin_adv','cabin_mul', + 'name_title','train_test']]) + +from sklearn.preprocessing import StandardScaler +scale = StandardScaler() +all_dummies_scaled = all_dummies.copy() +all_dummies_scaled[['Age','SibSp','Parch','norm_fare']]= scale.fit_transform(all_dummies_scaled[['Age','SibSp','Parch','norm_fare']]) +all_dummies_scaled.head() + +x_train_scaled = all_dummies_scaled[all_dummies_scaled.train_test == 1].drop(['train_test'], axis =1) +x_test_scaled = all_dummies_scaled[all_dummies_scaled.train_test == 0].drop(['train_test'], axis =1) + +y_train = all_data[all_data.train_test==1].Survived + +from xgboost import XGBClassifier + +xgb = XGBClassifier() +xgb.fit(X_train_scaled,y_train) + +y_hat_base_vc = xgb.predict(x_test_scaled).astype(int) +basic_submission = {'PassengerId': test.PassengerId, 'Survived': y_hat_base_vc} +base_submission = pd.DataFrame(data=basic_submission) +base_submission.to_csv('xgb_submission.csv', index=False) \ No newline at end of file From d0fffd463a78a96b38943ad7dd2c030bf8d28814 Mon Sep 17 00:00:00 2001 From: Modassir Afzal Date: Thu, 13 Oct 2022 02:23:57 +0530 Subject: [PATCH 2/7] Fixes: #6551 --- machine_learning/xgboostregressor.py | 138 +++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 machine_learning/xgboostregressor.py diff --git a/machine_learning/xgboostregressor.py b/machine_learning/xgboostregressor.py new file mode 100644 index 000000000000..ad2110d6efff --- /dev/null +++ b/machine_learning/xgboostregressor.py @@ -0,0 +1,138 @@ +from xgboost import XGBRegressor +#https://xgboost.readthedocs.io/en/stable/ +import numpy as np +import pandas as pd + +import seaborn as sns +import matplotlib.pyplot as plt + + +import os +for dirname, _, filenames in os.walk('/kaggle/input'): + for filename in filenames: + print(os.path.join(dirname, filename)) + +train_ames=pd.read_csv('/kaggle/input/ames-housing-dataset/AmesHousing.csv') +test=pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/test.csv') +train=pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/train.csv') + +train_ames.columns = train_ames.columns.str.replace(' ', '') +train_ames=train_ames.rename(columns={"YearRemod/Add": "YearRemodAdd"}) + +data=pd.concat([train_ames,train,test], axis=0, sort=False) +print("Size of the Housing Dataset",len(data)) +useless = ['Id','PID','Order','SalePrice'] +data = data.drop(useless, axis = 1) +duplicate = data[data.duplicated(keep='last')].index +len(duplicate) + +duplicate=duplicate[0:390] +train_ames = train_ames.drop(duplicate, axis = 0) + +training=pd.concat([train_ames,train], axis=0, sort=False) +useless = ['Id','PID','Order'] +training = training.drop(useless, axis = 1) + +# Separating Target and Features + +target = training['SalePrice'] +test_id = test['Id'] +test = test.drop(['Id'],axis = 1) +training2 = training.drop(['SalePrice'], axis = 1) + + +# Concatenating train & test set + +train_test = pd.concat([training2,test], axis=0, sort=False) + +# Filling Categorical NaN (That we know how to fill due to the description file ) + +train_test['Functional'] = train_test['Functional'].fillna('Typ') +train_test['Electrical'] = train_test['Electrical'].fillna("SBrkr") +train_test['KitchenQual'] = train_test['KitchenQual'].fillna("TA") +train_test['Exterior1st'] = train_test['Exterior1st'].fillna(train_test['Exterior1st'].mode()[0]) +train_test['Exterior2nd'] = train_test['Exterior2nd'].fillna(train_test['Exterior2nd'].mode()[0]) +train_test['SaleType'] = train_test['SaleType'].fillna(train_test['SaleType'].mode()[0]) +train_test["PoolQC"] = train_test["PoolQC"].fillna("None") +train_test["Alley"] = train_test["Alley"].fillna("None") +train_test['FireplaceQu'] = train_test['FireplaceQu'].fillna("None") +train_test['Fence'] = train_test['Fence'].fillna("None") +train_test['MiscFeature'] = train_test['MiscFeature'].fillna("None") +for col in ('GarageArea', 'GarageCars'): + train_test[col] = train_test[col].fillna(0) + +for col in ['GarageType', 'GarageFinish', 'GarageQual', 'GarageCond']: + train_test[col] = train_test[col].fillna('None') + +for col in ('BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2'): + train_test[col] = train_test[col].fillna('None') + +for col in ('BsmtFinSF1', 'BsmtFinSF2', 'BsmtFullBath', 'BsmtHalfBath', 'MasVnrArea','BsmtUnfSF', 'TotalBsmtSF'): + train_test[col] = train_test[col].fillna(0) + +train_test['LotFrontage'] = train_test['LotFrontage'].fillna(train['LotFrontage'].median()) + + # Checking the features with NaN remained out + +for col in train_test: + if train_test[col].isna().sum() > 0: + print(train_test[col][1]) + +# Converting non-numeric predictors stored as numbers into string + +train_test['MSSubClass'] = train_test['MSSubClass'].apply(str) +train_test['YrSold'] = train_test['YrSold'].apply(str) +train_test['MoSold'] = train_test['MoSold'].apply(str) +train_test['OverallQual'] = train_test['OverallQual'].apply(str) +train_test['OverallCond'] = train_test['OverallCond'].apply(str) +train_test["SqFtPerRoom"] = train_test["GrLivArea"] / (train_test["TotRmsAbvGrd"] + + train_test["FullBath"] + + train_test["HalfBath"] + + train_test["KitchenAbvGr"]) + +train_test['Total_Home_Quality'] = train_test['OverallQual'] + train_test['OverallCond'] + +train_test['Total_Bathrooms'] = (train_test['FullBath'] + (0.5 * train_test['HalfBath']) + + train_test['BsmtFullBath'] + (0.5 * train_test['BsmtHalfBath'])) + +train_test["HighQualSF"] = train_test["1stFlrSF"] + train_test["2ndFlrSF"] +train_test['renovated']=train_test['YearRemodAdd']+train_test['YearBuilt'] + +# Removing the useless variables + +useless = ['GarageYrBlt','YearRemodAdd'] +train_test = train_test.drop(useless, axis = 1) +# Creating dummy variables from categorical features + +from scipy.stats import skew + +train_test_dummy = pd.get_dummies(train_test) + +numeric_features = train_test_dummy.dtypes[train_test_dummy.dtypes != object].index +skewed_features = train_test_dummy[numeric_features].apply(lambda skewed: skew(skewed)).sort_values(ascending=False) +high_skew = skewed_features[skewed_features > 0.5] +skew_index = high_skew.index + +# Normalize skewed features using log_transformation + +for i in skew_index: + train_test_dummy[i] = np.log1p(train_test_dummy[i] ) + +target_log = np.log1p(target) + +from xgboost import XGBRegressor + +# Train-Test separation + +x_train = train_test_dummy[0:4000] +x_test = train_test_dummy[4000:] + +xgb = XGBRegressor() +xgb.fit(x_train,target_log) + +test_pred = xgb.predict(x_test) +submission = pd.DataFrame(test_id, columns = ['Id']) +test_pred = np.expm1(test_pred) +submission['SalePrice'] = test_pred +submission.head() +submission.to_csv("xgb.csv", index = False, header = True) \ No newline at end of file From bfd165f4f431e072220b0e9d198dc29ebb86bbe6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Oct 2022 20:54:33 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/xgboostclassifier.py | 86 +++++++++++++++++---------- 1 file changed, 55 insertions(+), 31 deletions(-) diff --git a/machine_learning/xgboostclassifier.py b/machine_learning/xgboostclassifier.py index cd92799bf1eb..0d3ac3899459 100644 --- a/machine_learning/xgboostclassifier.py +++ b/machine_learning/xgboostclassifier.py @@ -1,55 +1,79 @@ -from xgboost import XGBClassifier -#https://xgboost.readthedocs.io/en/stable/ -import numpy as np -import pandas as pd -import seaborn as sns import matplotlib.pyplot as plt +# https://xgboost.readthedocs.io/en/stable/ +import numpy as np +import pandas as pd +import seaborn as sns +from xgboost import XGBClassifier - -training = pd.read_csv('../input/titanic/train.csv') -test = pd.read_csv('../input/titanic/test.csv') +training = pd.read_csv("../input/titanic/train.csv") +test = pd.read_csv("../input/titanic/test.csv") # Commented out IPython magic to ensure Python compatibility. -training['train_test'] = 1 -test['train_test'] = 0 -test['Survived'] = np.NaN -all_data = pd.concat([training,test]) +training["train_test"] = 1 +test["train_test"] = 0 +test["Survived"] = np.NaN +all_data = pd.concat([training, test]) # %matplotlib inline all_data.columns all_data.describe() -all_data['cabin_mul']=all_data.Cabin.apply(lambda cabin_class: 0 if pd.isna(cabin_class) else len(x.split(' '))) -all_data['cabin_adv'] = all_data.Cabin.apply(lambda cabin_number: str(cabin_number)[0]) -all_data['name_title']= all_data.Name.apply(lambda name: name.split(',')[1].split('.')[0].strip()) -all_data.Age=all_data.Age.fillna(training.Age.median()) -all_data.Fare=all_data.Fare.fillna(training.Fare.median()) -all_data.dropna(subset=['Embarked'],inplace=True) -all_data['norm_fare']=np.log(all_data.Fare+1) -all_data.Pclass=all_data.Pclass.astype(str) -all_data['Age']=all_data['Age'].apply(np.int64) -all_dummies = pd.get_dummies(all_data[['Pclass','Sex','Age','SibSp','Parch','norm_fare', - 'Embarked','cabin_adv','cabin_mul', - 'name_title','train_test']]) +all_data["cabin_mul"] = all_data.Cabin.apply( + lambda cabin_class: 0 if pd.isna(cabin_class) else len(x.split(" ")) +) +all_data["cabin_adv"] = all_data.Cabin.apply(lambda cabin_number: str(cabin_number)[0]) +all_data["name_title"] = all_data.Name.apply( + lambda name: name.split(",")[1].split(".")[0].strip() +) +all_data.Age = all_data.Age.fillna(training.Age.median()) +all_data.Fare = all_data.Fare.fillna(training.Fare.median()) +all_data.dropna(subset=["Embarked"], inplace=True) +all_data["norm_fare"] = np.log(all_data.Fare + 1) +all_data.Pclass = all_data.Pclass.astype(str) +all_data["Age"] = all_data["Age"].apply(np.int64) +all_dummies = pd.get_dummies( + all_data[ + [ + "Pclass", + "Sex", + "Age", + "SibSp", + "Parch", + "norm_fare", + "Embarked", + "cabin_adv", + "cabin_mul", + "name_title", + "train_test", + ] + ] +) from sklearn.preprocessing import StandardScaler + scale = StandardScaler() all_dummies_scaled = all_dummies.copy() -all_dummies_scaled[['Age','SibSp','Parch','norm_fare']]= scale.fit_transform(all_dummies_scaled[['Age','SibSp','Parch','norm_fare']]) +all_dummies_scaled[["Age", "SibSp", "Parch", "norm_fare"]] = scale.fit_transform( + all_dummies_scaled[["Age", "SibSp", "Parch", "norm_fare"]] +) all_dummies_scaled.head() -x_train_scaled = all_dummies_scaled[all_dummies_scaled.train_test == 1].drop(['train_test'], axis =1) -x_test_scaled = all_dummies_scaled[all_dummies_scaled.train_test == 0].drop(['train_test'], axis =1) +x_train_scaled = all_dummies_scaled[all_dummies_scaled.train_test == 1].drop( + ["train_test"], axis=1 +) +x_test_scaled = all_dummies_scaled[all_dummies_scaled.train_test == 0].drop( + ["train_test"], axis=1 +) -y_train = all_data[all_data.train_test==1].Survived +y_train = all_data[all_data.train_test == 1].Survived from xgboost import XGBClassifier xgb = XGBClassifier() -xgb.fit(X_train_scaled,y_train) +xgb.fit(X_train_scaled, y_train) y_hat_base_vc = xgb.predict(x_test_scaled).astype(int) -basic_submission = {'PassengerId': test.PassengerId, 'Survived': y_hat_base_vc} +basic_submission = {"PassengerId": test.PassengerId, "Survived": y_hat_base_vc} base_submission = pd.DataFrame(data=basic_submission) -base_submission.to_csv('xgb_submission.csv', index=False) \ No newline at end of file +base_submission.to_csv("xgb_submission.csv", index=False) From 4e0a7d9c4eec613556b263a10b4db6308e784f68 Mon Sep 17 00:00:00 2001 From: Modassir Afzal <60973906+Moddy2024@users.noreply.github.com> Date: Thu, 13 Oct 2022 02:25:37 +0530 Subject: [PATCH 4/7] Delete xgboostregressor.py --- machine_learning/xgboostregressor.py | 138 --------------------------- 1 file changed, 138 deletions(-) delete mode 100644 machine_learning/xgboostregressor.py diff --git a/machine_learning/xgboostregressor.py b/machine_learning/xgboostregressor.py deleted file mode 100644 index ad2110d6efff..000000000000 --- a/machine_learning/xgboostregressor.py +++ /dev/null @@ -1,138 +0,0 @@ -from xgboost import XGBRegressor -#https://xgboost.readthedocs.io/en/stable/ -import numpy as np -import pandas as pd - -import seaborn as sns -import matplotlib.pyplot as plt - - -import os -for dirname, _, filenames in os.walk('/kaggle/input'): - for filename in filenames: - print(os.path.join(dirname, filename)) - -train_ames=pd.read_csv('/kaggle/input/ames-housing-dataset/AmesHousing.csv') -test=pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/test.csv') -train=pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/train.csv') - -train_ames.columns = train_ames.columns.str.replace(' ', '') -train_ames=train_ames.rename(columns={"YearRemod/Add": "YearRemodAdd"}) - -data=pd.concat([train_ames,train,test], axis=0, sort=False) -print("Size of the Housing Dataset",len(data)) -useless = ['Id','PID','Order','SalePrice'] -data = data.drop(useless, axis = 1) -duplicate = data[data.duplicated(keep='last')].index -len(duplicate) - -duplicate=duplicate[0:390] -train_ames = train_ames.drop(duplicate, axis = 0) - -training=pd.concat([train_ames,train], axis=0, sort=False) -useless = ['Id','PID','Order'] -training = training.drop(useless, axis = 1) - -# Separating Target and Features - -target = training['SalePrice'] -test_id = test['Id'] -test = test.drop(['Id'],axis = 1) -training2 = training.drop(['SalePrice'], axis = 1) - - -# Concatenating train & test set - -train_test = pd.concat([training2,test], axis=0, sort=False) - -# Filling Categorical NaN (That we know how to fill due to the description file ) - -train_test['Functional'] = train_test['Functional'].fillna('Typ') -train_test['Electrical'] = train_test['Electrical'].fillna("SBrkr") -train_test['KitchenQual'] = train_test['KitchenQual'].fillna("TA") -train_test['Exterior1st'] = train_test['Exterior1st'].fillna(train_test['Exterior1st'].mode()[0]) -train_test['Exterior2nd'] = train_test['Exterior2nd'].fillna(train_test['Exterior2nd'].mode()[0]) -train_test['SaleType'] = train_test['SaleType'].fillna(train_test['SaleType'].mode()[0]) -train_test["PoolQC"] = train_test["PoolQC"].fillna("None") -train_test["Alley"] = train_test["Alley"].fillna("None") -train_test['FireplaceQu'] = train_test['FireplaceQu'].fillna("None") -train_test['Fence'] = train_test['Fence'].fillna("None") -train_test['MiscFeature'] = train_test['MiscFeature'].fillna("None") -for col in ('GarageArea', 'GarageCars'): - train_test[col] = train_test[col].fillna(0) - -for col in ['GarageType', 'GarageFinish', 'GarageQual', 'GarageCond']: - train_test[col] = train_test[col].fillna('None') - -for col in ('BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2'): - train_test[col] = train_test[col].fillna('None') - -for col in ('BsmtFinSF1', 'BsmtFinSF2', 'BsmtFullBath', 'BsmtHalfBath', 'MasVnrArea','BsmtUnfSF', 'TotalBsmtSF'): - train_test[col] = train_test[col].fillna(0) - -train_test['LotFrontage'] = train_test['LotFrontage'].fillna(train['LotFrontage'].median()) - - # Checking the features with NaN remained out - -for col in train_test: - if train_test[col].isna().sum() > 0: - print(train_test[col][1]) - -# Converting non-numeric predictors stored as numbers into string - -train_test['MSSubClass'] = train_test['MSSubClass'].apply(str) -train_test['YrSold'] = train_test['YrSold'].apply(str) -train_test['MoSold'] = train_test['MoSold'].apply(str) -train_test['OverallQual'] = train_test['OverallQual'].apply(str) -train_test['OverallCond'] = train_test['OverallCond'].apply(str) -train_test["SqFtPerRoom"] = train_test["GrLivArea"] / (train_test["TotRmsAbvGrd"] + - train_test["FullBath"] + - train_test["HalfBath"] + - train_test["KitchenAbvGr"]) - -train_test['Total_Home_Quality'] = train_test['OverallQual'] + train_test['OverallCond'] - -train_test['Total_Bathrooms'] = (train_test['FullBath'] + (0.5 * train_test['HalfBath']) + - train_test['BsmtFullBath'] + (0.5 * train_test['BsmtHalfBath'])) - -train_test["HighQualSF"] = train_test["1stFlrSF"] + train_test["2ndFlrSF"] -train_test['renovated']=train_test['YearRemodAdd']+train_test['YearBuilt'] - -# Removing the useless variables - -useless = ['GarageYrBlt','YearRemodAdd'] -train_test = train_test.drop(useless, axis = 1) -# Creating dummy variables from categorical features - -from scipy.stats import skew - -train_test_dummy = pd.get_dummies(train_test) - -numeric_features = train_test_dummy.dtypes[train_test_dummy.dtypes != object].index -skewed_features = train_test_dummy[numeric_features].apply(lambda skewed: skew(skewed)).sort_values(ascending=False) -high_skew = skewed_features[skewed_features > 0.5] -skew_index = high_skew.index - -# Normalize skewed features using log_transformation - -for i in skew_index: - train_test_dummy[i] = np.log1p(train_test_dummy[i] ) - -target_log = np.log1p(target) - -from xgboost import XGBRegressor - -# Train-Test separation - -x_train = train_test_dummy[0:4000] -x_test = train_test_dummy[4000:] - -xgb = XGBRegressor() -xgb.fit(x_train,target_log) - -test_pred = xgb.predict(x_test) -submission = pd.DataFrame(test_id, columns = ['Id']) -test_pred = np.expm1(test_pred) -submission['SalePrice'] = test_pred -submission.head() -submission.to_csv("xgb.csv", index = False, header = True) \ No newline at end of file From 1f06b24b8571830f1992f8c78576addd648889bb Mon Sep 17 00:00:00 2001 From: Modassir Afzal <60973906+Moddy2024@users.noreply.github.com> Date: Thu, 13 Oct 2022 02:42:52 +0530 Subject: [PATCH 5/7] Update xgboostclassifier.py --- machine_learning/xgboostclassifier.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/machine_learning/xgboostclassifier.py b/machine_learning/xgboostclassifier.py index 0d3ac3899459..6511469b88c8 100644 --- a/machine_learning/xgboostclassifier.py +++ b/machine_learning/xgboostclassifier.py @@ -1,11 +1,20 @@ import matplotlib.pyplot as plt -# https://xgboost.readthedocs.io/en/stable/ +""" +The Url for the algorithm +https://xgboost.readthedocs.io/en/stable/ +""" import numpy as np import pandas as pd import seaborn as sns from xgboost import XGBClassifier + """ + You have to download the dataset from kaggle in order to run this + https://www.kaggle.com/competitions/titanic/data + This is the link from where you can get the data. + """ + training = pd.read_csv("../input/titanic/train.csv") test = pd.read_csv("../input/titanic/test.csv") From c68a16d76ba450a7dc934bc00ee6e26ca3e42722 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Oct 2022 21:13:56 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/xgboostclassifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/xgboostclassifier.py b/machine_learning/xgboostclassifier.py index 6511469b88c8..8c8519891593 100644 --- a/machine_learning/xgboostclassifier.py +++ b/machine_learning/xgboostclassifier.py @@ -10,7 +10,7 @@ from xgboost import XGBClassifier """ - You have to download the dataset from kaggle in order to run this + You have to download the dataset from kaggle in order to run this https://www.kaggle.com/competitions/titanic/data This is the link from where you can get the data. """ From 19c9d25a0958475b0d032be70a11acfe5e4565f1 Mon Sep 17 00:00:00 2001 From: Modassir Afzal <60973906+Moddy2024@users.noreply.github.com> Date: Thu, 13 Oct 2022 02:45:38 +0530 Subject: [PATCH 7/7] Update xgboostclassifier.py --- machine_learning/xgboostclassifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/xgboostclassifier.py b/machine_learning/xgboostclassifier.py index 8c8519891593..d268fab73ea8 100644 --- a/machine_learning/xgboostclassifier.py +++ b/machine_learning/xgboostclassifier.py @@ -9,11 +9,11 @@ import seaborn as sns from xgboost import XGBClassifier - """ +""" You have to download the dataset from kaggle in order to run this https://www.kaggle.com/competitions/titanic/data This is the link from where you can get the data. - """ +""" training = pd.read_csv("../input/titanic/train.csv") test = pd.read_csv("../input/titanic/test.csv")