|
3 | 3 |
|
4 | 4 | import numpy as np
|
5 | 5 | from numpy.random import RandomState
|
| 6 | +from numpy import nan |
| 7 | +import datetime |
6 | 8 |
|
7 |
| -from pandas.core.api import Series, Categorical, CategoricalIndex |
| 9 | +from pandas import Series, Categorical, CategoricalIndex, Index |
8 | 10 | import pandas as pd
|
9 | 11 |
|
10 | 12 | from pandas import compat
|
| 13 | +import pandas.algos as _algos |
| 14 | +from pandas.compat import lrange |
11 | 15 | import pandas.core.algorithms as algos
|
12 | 16 | import pandas.util.testing as tm
|
13 | 17 | import pandas.hashtable as hashtable
|
14 | 18 | from pandas.compat.numpy import np_array_datetime64_compat
|
| 19 | +from pandas.util.testing import assert_almost_equal |
15 | 20 |
|
16 | 21 |
|
17 | 22 | class TestMatch(tm.TestCase):
|
@@ -705,6 +710,315 @@ def test_unique_label_indices():
|
705 | 710 | tm.assert_numpy_array_equal(left, right)
|
706 | 711 |
|
707 | 712 |
|
| 713 | +def test_rank(): |
| 714 | + tm._skip_if_no_scipy() |
| 715 | + from scipy.stats import rankdata |
| 716 | + |
| 717 | + def _check(arr): |
| 718 | + mask = ~np.isfinite(arr) |
| 719 | + arr = arr.copy() |
| 720 | + result = _algos.rank_1d_float64(arr) |
| 721 | + arr[mask] = np.inf |
| 722 | + exp = rankdata(arr) |
| 723 | + exp[mask] = nan |
| 724 | + assert_almost_equal(result, exp) |
| 725 | + |
| 726 | + _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan])) |
| 727 | + _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan])) |
| 728 | + |
| 729 | + |
| 730 | +def test_pad_backfill_object_segfault(): |
| 731 | + |
| 732 | + old = np.array([], dtype='O') |
| 733 | + new = np.array([datetime.datetime(2010, 12, 31)], dtype='O') |
| 734 | + |
| 735 | + result = _algos.pad_object(old, new) |
| 736 | + expected = np.array([-1], dtype=np.int64) |
| 737 | + assert (np.array_equal(result, expected)) |
| 738 | + |
| 739 | + result = _algos.pad_object(new, old) |
| 740 | + expected = np.array([], dtype=np.int64) |
| 741 | + assert (np.array_equal(result, expected)) |
| 742 | + |
| 743 | + result = _algos.backfill_object(old, new) |
| 744 | + expected = np.array([-1], dtype=np.int64) |
| 745 | + assert (np.array_equal(result, expected)) |
| 746 | + |
| 747 | + result = _algos.backfill_object(new, old) |
| 748 | + expected = np.array([], dtype=np.int64) |
| 749 | + assert (np.array_equal(result, expected)) |
| 750 | + |
| 751 | + |
| 752 | +def test_arrmap(): |
| 753 | + values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') |
| 754 | + result = _algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) |
| 755 | + assert (result.dtype == np.bool_) |
| 756 | + |
| 757 | + |
| 758 | +class TestTseriesUtil(tm.TestCase): |
| 759 | + _multiprocess_can_split_ = True |
| 760 | + |
| 761 | + def test_combineFunc(self): |
| 762 | + pass |
| 763 | + |
| 764 | + def test_reindex(self): |
| 765 | + pass |
| 766 | + |
| 767 | + def test_isnull(self): |
| 768 | + pass |
| 769 | + |
| 770 | + def test_groupby(self): |
| 771 | + pass |
| 772 | + |
| 773 | + def test_groupby_withnull(self): |
| 774 | + pass |
| 775 | + |
| 776 | + def test_backfill(self): |
| 777 | + old = Index([1, 5, 10]) |
| 778 | + new = Index(lrange(12)) |
| 779 | + |
| 780 | + filler = _algos.backfill_int64(old.values, new.values) |
| 781 | + |
| 782 | + expect_filler = np.array([0, 0, 1, 1, 1, 1, |
| 783 | + 2, 2, 2, 2, 2, -1], dtype=np.int64) |
| 784 | + self.assert_numpy_array_equal(filler, expect_filler) |
| 785 | + |
| 786 | + # corner case |
| 787 | + old = Index([1, 4]) |
| 788 | + new = Index(lrange(5, 10)) |
| 789 | + filler = _algos.backfill_int64(old.values, new.values) |
| 790 | + |
| 791 | + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) |
| 792 | + self.assert_numpy_array_equal(filler, expect_filler) |
| 793 | + |
| 794 | + def test_pad(self): |
| 795 | + old = Index([1, 5, 10]) |
| 796 | + new = Index(lrange(12)) |
| 797 | + |
| 798 | + filler = _algos.pad_int64(old.values, new.values) |
| 799 | + |
| 800 | + expect_filler = np.array([-1, 0, 0, 0, 0, 1, |
| 801 | + 1, 1, 1, 1, 2, 2], dtype=np.int64) |
| 802 | + self.assert_numpy_array_equal(filler, expect_filler) |
| 803 | + |
| 804 | + # corner case |
| 805 | + old = Index([5, 10]) |
| 806 | + new = Index(lrange(5)) |
| 807 | + filler = _algos.pad_int64(old.values, new.values) |
| 808 | + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) |
| 809 | + self.assert_numpy_array_equal(filler, expect_filler) |
| 810 | + |
| 811 | + |
| 812 | +def test_left_join_indexer_unique(): |
| 813 | + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) |
| 814 | + b = np.array([2, 2, 3, 4, 4], dtype=np.int64) |
| 815 | + |
| 816 | + result = _algos.left_join_indexer_unique_int64(b, a) |
| 817 | + expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) |
| 818 | + assert (np.array_equal(result, expected)) |
| 819 | + |
| 820 | + |
| 821 | +def test_left_outer_join_bug(): |
| 822 | + left = np.array([0, 1, 0, 1, 1, 2, 3, 1, 0, 2, 1, 2, 0, 1, 1, 2, 3, 2, 3, |
| 823 | + 2, 1, 1, 3, 0, 3, 2, 3, 0, 0, 2, 3, 2, 0, 3, 1, 3, 0, 1, |
| 824 | + 3, 0, 0, 1, 0, 3, 1, 0, 1, 0, 1, 1, 0, 2, 2, 2, 2, 2, 0, |
| 825 | + 3, 1, 2, 0, 0, 3, 1, 3, 2, 2, 0, 1, 3, 0, 2, 3, 2, 3, 3, |
| 826 | + 2, 3, 3, 1, 3, 2, 0, 0, 3, 1, 1, 1, 0, 2, 3, 3, 1, 2, 0, |
| 827 | + 3, 1, 2, 0, 2], dtype=np.int64) |
| 828 | + |
| 829 | + right = np.array([3, 1], dtype=np.int64) |
| 830 | + max_groups = 4 |
| 831 | + |
| 832 | + lidx, ridx = _algos.left_outer_join(left, right, max_groups, sort=False) |
| 833 | + |
| 834 | + exp_lidx = np.arange(len(left)) |
| 835 | + exp_ridx = -np.ones(len(left)) |
| 836 | + exp_ridx[left == 1] = 1 |
| 837 | + exp_ridx[left == 3] = 0 |
| 838 | + |
| 839 | + assert (np.array_equal(lidx, exp_lidx)) |
| 840 | + assert (np.array_equal(ridx, exp_ridx)) |
| 841 | + |
| 842 | + |
| 843 | +def test_inner_join_indexer(): |
| 844 | + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) |
| 845 | + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) |
| 846 | + |
| 847 | + index, ares, bres = _algos.inner_join_indexer_int64(a, b) |
| 848 | + |
| 849 | + index_exp = np.array([3, 5], dtype=np.int64) |
| 850 | + assert_almost_equal(index, index_exp) |
| 851 | + |
| 852 | + aexp = np.array([2, 4], dtype=np.int64) |
| 853 | + bexp = np.array([1, 2], dtype=np.int64) |
| 854 | + assert_almost_equal(ares, aexp) |
| 855 | + assert_almost_equal(bres, bexp) |
| 856 | + |
| 857 | + a = np.array([5], dtype=np.int64) |
| 858 | + b = np.array([5], dtype=np.int64) |
| 859 | + |
| 860 | + index, ares, bres = _algos.inner_join_indexer_int64(a, b) |
| 861 | + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) |
| 862 | + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) |
| 863 | + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) |
| 864 | + |
| 865 | + |
| 866 | +def test_outer_join_indexer(): |
| 867 | + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) |
| 868 | + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) |
| 869 | + |
| 870 | + index, ares, bres = _algos.outer_join_indexer_int64(a, b) |
| 871 | + |
| 872 | + index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) |
| 873 | + assert_almost_equal(index, index_exp) |
| 874 | + |
| 875 | + aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.int64) |
| 876 | + bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.int64) |
| 877 | + assert_almost_equal(ares, aexp) |
| 878 | + assert_almost_equal(bres, bexp) |
| 879 | + |
| 880 | + a = np.array([5], dtype=np.int64) |
| 881 | + b = np.array([5], dtype=np.int64) |
| 882 | + |
| 883 | + index, ares, bres = _algos.outer_join_indexer_int64(a, b) |
| 884 | + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) |
| 885 | + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) |
| 886 | + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) |
| 887 | + |
| 888 | + |
| 889 | +def test_left_join_indexer(): |
| 890 | + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) |
| 891 | + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) |
| 892 | + |
| 893 | + index, ares, bres = _algos.left_join_indexer_int64(a, b) |
| 894 | + |
| 895 | + assert_almost_equal(index, a) |
| 896 | + |
| 897 | + aexp = np.array([0, 1, 2, 3, 4], dtype=np.int64) |
| 898 | + bexp = np.array([-1, -1, 1, -1, 2], dtype=np.int64) |
| 899 | + assert_almost_equal(ares, aexp) |
| 900 | + assert_almost_equal(bres, bexp) |
| 901 | + |
| 902 | + a = np.array([5], dtype=np.int64) |
| 903 | + b = np.array([5], dtype=np.int64) |
| 904 | + |
| 905 | + index, ares, bres = _algos.left_join_indexer_int64(a, b) |
| 906 | + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) |
| 907 | + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) |
| 908 | + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) |
| 909 | + |
| 910 | + |
| 911 | +def test_left_join_indexer2(): |
| 912 | + idx = Index([1, 1, 2, 5]) |
| 913 | + idx2 = Index([1, 2, 5, 7, 9]) |
| 914 | + |
| 915 | + res, lidx, ridx = _algos.left_join_indexer_int64(idx2.values, idx.values) |
| 916 | + |
| 917 | + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) |
| 918 | + assert_almost_equal(res, exp_res) |
| 919 | + |
| 920 | + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) |
| 921 | + assert_almost_equal(lidx, exp_lidx) |
| 922 | + |
| 923 | + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) |
| 924 | + assert_almost_equal(ridx, exp_ridx) |
| 925 | + |
| 926 | + |
| 927 | +def test_outer_join_indexer2(): |
| 928 | + idx = Index([1, 1, 2, 5]) |
| 929 | + idx2 = Index([1, 2, 5, 7, 9]) |
| 930 | + |
| 931 | + res, lidx, ridx = _algos.outer_join_indexer_int64(idx2.values, idx.values) |
| 932 | + |
| 933 | + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) |
| 934 | + assert_almost_equal(res, exp_res) |
| 935 | + |
| 936 | + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) |
| 937 | + assert_almost_equal(lidx, exp_lidx) |
| 938 | + |
| 939 | + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) |
| 940 | + assert_almost_equal(ridx, exp_ridx) |
| 941 | + |
| 942 | + |
| 943 | +def test_inner_join_indexer2(): |
| 944 | + idx = Index([1, 1, 2, 5]) |
| 945 | + idx2 = Index([1, 2, 5, 7, 9]) |
| 946 | + |
| 947 | + res, lidx, ridx = _algos.inner_join_indexer_int64(idx2.values, idx.values) |
| 948 | + |
| 949 | + exp_res = np.array([1, 1, 2, 5], dtype=np.int64) |
| 950 | + assert_almost_equal(res, exp_res) |
| 951 | + |
| 952 | + exp_lidx = np.array([0, 0, 1, 2], dtype=np.int64) |
| 953 | + assert_almost_equal(lidx, exp_lidx) |
| 954 | + |
| 955 | + exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) |
| 956 | + assert_almost_equal(ridx, exp_ridx) |
| 957 | + |
| 958 | + |
| 959 | +def test_is_lexsorted(): |
| 960 | + failure = [ |
| 961 | + np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
| 962 | + 3, 3, |
| 963 | + 3, 3, |
| 964 | + 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, |
| 965 | + 2, 2, 2, 2, 2, 2, 2, |
| 966 | + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 967 | + 1, 1, 1, 1, 1, 1, 1, |
| 968 | + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 969 | + 1, 1, 1, 1, 1, 1, 1, |
| 970 | + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 971 | + 0, 0, 0, 0, 0, 0, 0, |
| 972 | + 0, 0, 0, 0, 0, 0, 0, 0, 0]), |
| 973 | + np.array([30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, |
| 974 | + 15, 14, |
| 975 | + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, |
| 976 | + 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, |
| 977 | + 12, 11, |
| 978 | + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, |
| 979 | + 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, |
| 980 | + 9, 8, |
| 981 | + 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22, |
| 982 | + 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, |
| 983 | + 6, 5, |
| 984 | + 4, 3, 2, 1, 0])] |
| 985 | + |
| 986 | + assert (not _algos.is_lexsorted(failure)) |
| 987 | + |
| 988 | +# def test_get_group_index(): |
| 989 | +# a = np.array([0, 1, 2, 0, 2, 1, 0, 0], dtype=np.int64) |
| 990 | +# b = np.array([1, 0, 3, 2, 0, 2, 3, 0], dtype=np.int64) |
| 991 | +# expected = np.array([1, 4, 11, 2, 8, 6, 3, 0], dtype=np.int64) |
| 992 | + |
| 993 | +# result = lib.get_group_index([a, b], (3, 4)) |
| 994 | + |
| 995 | +# assert(np.array_equal(result, expected)) |
| 996 | + |
| 997 | + |
| 998 | +def test_groupsort_indexer(): |
| 999 | + a = np.random.randint(0, 1000, 100).astype(np.int64) |
| 1000 | + b = np.random.randint(0, 1000, 100).astype(np.int64) |
| 1001 | + |
| 1002 | + result = _algos.groupsort_indexer(a, 1000)[0] |
| 1003 | + |
| 1004 | + # need to use a stable sort |
| 1005 | + expected = np.argsort(a, kind='mergesort') |
| 1006 | + assert (np.array_equal(result, expected)) |
| 1007 | + |
| 1008 | + # compare with lexsort |
| 1009 | + key = a * 1000 + b |
| 1010 | + result = _algos.groupsort_indexer(key, 1000000)[0] |
| 1011 | + expected = np.lexsort((b, a)) |
| 1012 | + assert (np.array_equal(result, expected)) |
| 1013 | + |
| 1014 | + |
| 1015 | +def test_ensure_platform_int(): |
| 1016 | + arr = np.arange(100) |
| 1017 | + |
| 1018 | + result = _algos.ensure_platform_int(arr) |
| 1019 | + assert (result is arr) |
| 1020 | + |
| 1021 | + |
708 | 1022 | if __name__ == '__main__':
|
709 | 1023 | import nose
|
710 | 1024 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
|
|
0 commit comments