File tree 1 file changed +30
-0
lines changed
1 file changed +30
-0
lines changed Original file line number Diff line number Diff line change
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+
5
+ """
6
+ Detect and handle outliers in a DataFrame.
7
+
8
+ Parameters:
9
+ data: DataFrame
10
+ method: str, default 'z-score'. The method used for outlier detection. Options: 'z-score' or 'IQR' (Interquartile Range).
11
+ threshold: float, default 3. The threshold for identifying outliers. Data points beyond this threshold are considered outliers.
12
+
13
+ Returns:
14
+ DataFrame: DataFrame with outliers handled (replaced or removed).
15
+ """
16
+ def handle_outliers (data , method = 'z-score' , threshold = 3 ):
17
+ if method == 'z-score' :
18
+ z_scores = np .abs ((data - data .mean ()) / data .std ())
19
+ data_no_outliers = data [(z_scores < threshold ).all (axis = 1 )]
20
+
21
+ elif method == 'IQR' :
22
+ Q1 = data .quantile (0.25 )
23
+ Q3 = data .quantile (0.75 )
24
+ IQR = Q3 - Q1
25
+ data_no_outliers = data [~ ((data < (Q1 - 1.5 * IQR )) | (data > (Q3 + 1.5 * IQR ))).any (axis = 1 )]
26
+
27
+ else :
28
+ raise ValueError ("Invalid method. Use z-score or IQR" )
29
+
30
+ return data_no_outliers
You can’t perform that action at this time.
0 commit comments