Skip to content

Commit b6aa180

Browse files
authored
Merge pull request #1 from Chisholm6192/outlier-fixing
CLN: Enforce deprecation of using alias for builtin/NumPy funcs (pandas-dev#57444)
2 parents 47cd690 + 3b25ed4 commit b6aa180

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

scripts/handle_outliers.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import pandas as pd
2+
import numpy as np
3+
4+
5+
"""
6+
Detect and handle outliers in a DataFrame.
7+
8+
Parameters:
9+
data: DataFrame
10+
method: str, default 'z-score'. The method used for outlier detection. Options: 'z-score' or 'IQR' (Interquartile Range).
11+
threshold: float, default 3. The threshold for identifying outliers. Data points beyond this threshold are considered outliers.
12+
13+
Returns:
14+
DataFrame: DataFrame with outliers handled (replaced or removed).
15+
"""
16+
def handle_outliers(data, method='z-score', threshold=3):
17+
if method == 'z-score':
18+
z_scores = np.abs((data - data.mean()) / data.std())
19+
data_no_outliers = data[(z_scores < threshold).all(axis=1)]
20+
21+
elif method == 'IQR':
22+
Q1 = data.quantile(0.25)
23+
Q3 = data.quantile(0.75)
24+
IQR = Q3 - Q1
25+
data_no_outliers = data[~((data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))).any(axis=1)]
26+
27+
else:
28+
raise ValueError("Invalid method. Use z-score or IQR")
29+
30+
return data_no_outliers

0 commit comments

Comments
 (0)