diff --git a/pandas/io/percentile_scaling.py b/pandas/io/percentile_scaling.py new file mode 100644 index 0000000000000..25ca1a748cfa6 --- /dev/null +++ b/pandas/io/percentile_scaling.py @@ -0,0 +1,11 @@ +import numpy as np + +def percentile_scaling(data): + data = np.array(data) + min_val = np.min(data) + max_val = np.max(data) + if max_val == min_val: + raise ValueError("Cannot scale data with identical values.") + + scaled = 100 * (data - min_val) / (max_val - min_val) + return scaled.tolist() diff --git a/pandas/tests/io/test_percentile_scaling.py b/pandas/tests/io/test_percentile_scaling.py new file mode 100644 index 0000000000000..131868fe3d481 --- /dev/null +++ b/pandas/tests/io/test_percentile_scaling.py @@ -0,0 +1,21 @@ +import unittest +from pandas.io.percentile_scaling import percentile_scaling + +class TestPercentileScaling(unittest.TestCase): + def test_scaling(self): + data = [10, 20, 30, 40, 50] + expected = [0.0, 25.0, 50.0, 75.0, 100.0] + result = percentile_scaling(data) + for r, e in zip(result, expected): + self.assertAlmostEqual(r, e) + + def test_identical_values(self): + with self.assertRaises(ValueError): + percentile_scaling([5, 5, 5]) + + def test_empty(self): + with self.assertRaises(ValueError): + percentile_scaling([]) + +if __name__ == "__main__": + unittest.main()