12
12
"""
13
13
14
14
import numpy as np
15
- from sklearn .datasets import load_boston
15
+ from sklearn .datasets import fetch_california_housing
16
16
from sklearn .model_selection import train_test_split
17
17
from sklearn .metrics import mean_squared_error
18
18
from catboost import CatBoostRegressor
19
19
20
20
21
21
def data_handling () -> tuple :
22
22
"""
23
- Loads and handles the dataset, splitting it into features and targets.
24
-
25
- The Boston dataset is used as a regression example.
26
-
23
+ Loads and handles the California Housing dataset (replacement for deprecated Boston dataset).
24
+
27
25
Returns:
28
26
tuple: A tuple of (features, target), where both are numpy arrays.
29
27
30
28
Example:
31
29
>>> features, target = data_handling()
30
+ >>> isinstance(features, np.ndarray)
31
+ True
32
+ >>> isinstance(target, np.ndarray)
33
+ True
32
34
>>> features.shape
33
- (506, 13 )
35
+ (20640, 8 )
34
36
>>> target.shape
35
- (506 ,)
37
+ (20640 ,)
36
38
"""
37
- # Load Boston dataset (note: this dataset may be deprecated, replace if needed)
38
- boston = load_boston ()
39
- features = boston .data
40
- target = boston .target
39
+ housing = fetch_california_housing ()
40
+ features = housing .data
41
+ target = housing .target
41
42
return features , target
42
43
43
44
44
45
def catboost_regressor (features : np .ndarray , target : np .ndarray ) -> CatBoostRegressor :
45
46
"""
46
47
Trains a CatBoostRegressor using the provided features and target values.
47
-
48
+
48
49
Args:
49
50
features (np.ndarray): The input features for the regression model.
50
51
target (np.ndarray): The target values for the regression model.
51
-
52
+
52
53
Returns:
53
54
CatBoostRegressor: A trained CatBoost regressor model.
54
55
@@ -66,10 +67,14 @@ def catboost_regressor(features: np.ndarray, target: np.ndarray) -> CatBoostRegr
66
67
def main () -> None :
67
68
"""
68
69
Main function to run the CatBoost Regressor example.
69
-
70
+
70
71
It loads the data, splits it into training and testing sets,
71
72
trains the regressor on the training data, and evaluates its performance
72
73
on the test data.
74
+
75
+ Example:
76
+ >>> main()
77
+ Mean Squared Error on Test Set:
73
78
"""
74
79
# Load and split the dataset
75
80
features , target = data_handling ()
@@ -90,6 +95,5 @@ def main() -> None:
90
95
91
96
if __name__ == "__main__" :
92
97
import doctest
93
-
94
98
doctest .testmod (verbose = True )
95
99
main ()
0 commit comments