\n",
+ " \n",
+ " \n",
+ " \n",
+ " 18\n",
+ " \n",
+ " | \n",
+ " 6\n",
+ " \n",
+ " | \n",
+ " 5.8\n",
+ " \n",
+ " | \n",
+ " -2.8\n",
+ " \n",
+ " | \n",
+ " -4.2\n",
+ " \n",
+ " | \n",
+ " 7.1\n",
+ " \n",
+ " | \n",
+ " -3.3\n",
+ " \n",
+ " | \n",
+ " -1.2\n",
+ " \n",
+ " | \n",
+ " 7.9\n",
+ " \n",
+ " | \n",
+ " -4.9\n",
+ " \n",
+ " | \n",
+ " 1.4\n",
+ " \n",
+ " | \n",
+ " -0.63\n",
+ " \n",
+ " | \n",
+ " 0.35\n",
+ " \n",
+ " | \n",
+ " 7.5\n",
+ " \n",
+ " | \n",
+ " 0.87\n",
+ " \n",
+ " | \n",
+ " -1.5\n",
+ " \n",
+ " | \n",
+ " -2.1\n",
+ " \n",
+ " | \n",
+ " -4.2\n",
+ " \n",
+ " | \n",
+ " -2.5\n",
+ " \n",
+ " | \n",
+ " -2.5\n",
+ " \n",
+ " | \n",
+ " -2.9\n",
+ " \n",
+ " | \n",
+ " 1.9\n",
+ " \n",
+ " | \n",
+ " -9.7\n",
+ " \n",
+ " | \n",
+ " 3.4\n",
+ " \n",
+ " | \n",
+ " 7.1\n",
+ " \n",
+ " | \n",
+ " 4.4\n",
+ " \n",
+ " |
\n",
" \n",
" \n",
" \n",
- " \n",
- " \n",
- " 0\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.03\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.84\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.59\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.96\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.62\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.84\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.05\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.87\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.92\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.15\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.08\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.25\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.05\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.42\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.29\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.59\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.82\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.68\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.58\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.75\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.56\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.03\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.0\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.66\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.27\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.02\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.09\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.86\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.68\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.81\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.35\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.79\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.82\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.26\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.44\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.65\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.76\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.37\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.0\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.73\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.87\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.21\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.24\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.02\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.82\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.21\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.86\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.68\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.89\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.03\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.91\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.61\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 3\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.62\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.71\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.31\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.43\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.17\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.43\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.86\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.15\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.08\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.12\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.6\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.89\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.27\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.67\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.71\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.31\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.59\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.35\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.83\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.91\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.81\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.28\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 4\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.35\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.48\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.86\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.7\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.19\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.02\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.81\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.72\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.72\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.08\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.18\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.83\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.08\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.27\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.88\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.97\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.53\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.21\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.34\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.34\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.49\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.09\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 5\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.84\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.65\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.0\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.34\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.99\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.94\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.94\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.24\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.09\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.85\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.35\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.63\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.54\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.51\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.14\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.77\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 6\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.74\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.35\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.85\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.76\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.34\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.57\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.82\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.54\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.43\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.83\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.03\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.62\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.68\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.93\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -8.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.34\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.81\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 7\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.44\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.69\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.3\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.21\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.93\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.63\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.83\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.5\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.73\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.18\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.04\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.99\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.89\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.71\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.95\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.67\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -7.26\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.97\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.39\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.66\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.92\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.65\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.99\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.19\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.83\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.63\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.53\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.3\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.61\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.82\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.4\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.6\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.48\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.04\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.6\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.51\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.85\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.4\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.08\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 9\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.38\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.54\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.49\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.05\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.64\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.44\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.35\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.96\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.26\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.37\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.82\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.05\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.61\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -8.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.41\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.71\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.89\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.93\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.14\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.0\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.16\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 10\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.84\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.9\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.98\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.49\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.59\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.59\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.71\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.79\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.48\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.97\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.44\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -7.77\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.49\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.7\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.61\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -7.72\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.54\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.02\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.81\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.86\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.47\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.17\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.38\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.9\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.49\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.02\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.04\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.6\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.49\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.96\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.47\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.88\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.92\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.55\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -8.15\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.42\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.24\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.17\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -7.9\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.36\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.31\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.83\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 12\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.19\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.27\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.93\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.64\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.72\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.84\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.89\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.63\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.53\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.75\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.27\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.53\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -7.57\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.85\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.17\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -8.99\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.42\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.6\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.31\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.45\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.87\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.05\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.76\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.25\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.17\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.99\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.56\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.71\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.79\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -7.58\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.0\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.67\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.05\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -8.71\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.47\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.87\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.71\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 14\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.88\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.58\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.57\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.93\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.97\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.72\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.61\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.29\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.21\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -6.68\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.5\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.19\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.43\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.64\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -9.36\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.36\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.53\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 15\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.64\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.31\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.98\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.26\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.91\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.3\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.03\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.68\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.33\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.19\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.01\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.22\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.31\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.74\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.44\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.3\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.36\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -11.27\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.59\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.69\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.91\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.08\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.34\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.44\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.3\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.04\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.47\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.28\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.84\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.58\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.79\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.8\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.85\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.53\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.97\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.15\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.56\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -13.13\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.07\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.94\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 17\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.64\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.57\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.53\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.76\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.58\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.58\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.75\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.58\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.78\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.63\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.29\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.56\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.76\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.05\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.27\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.31\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.95\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.06\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.43\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.84\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -12.57\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.56\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.36\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.7\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 18\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.99\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 5.82\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.85\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.15\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.12\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.32\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.21\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.93\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.85\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.44\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.63\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.35\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.47\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.87\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.09\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.55\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.46\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.89\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.9\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -9.74\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.43\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.07\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.39\n",
- " \n",
- " \n",
- " |
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 19\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 4.03\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.23\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.11\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.19\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -4.1\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.52\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.53\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.21\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -0.24\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 0.01\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.16\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 6.43\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.97\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.64\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.66\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -5.2\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -3.25\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -2.87\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -1.65\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 1.64\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " -10.66\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 2.83\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 7.48\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " 3.94\n",
- " \n",
+ " | \n",
+ " 19\n",
+ " \n",
+ " | \n",
+ " 4\n",
+ " \n",
+ " | \n",
+ " 6.2\n",
+ " \n",
+ " | \n",
+ " -4.1\n",
+ " \n",
+ " | \n",
+ " -4.1\n",
+ " \n",
+ " | \n",
+ " 7.2\n",
+ " \n",
+ " | \n",
+ " -4.1\n",
+ " \n",
+ " | \n",
+ " -1.5\n",
+ " \n",
+ " | \n",
+ " 6.5\n",
+ " \n",
+ " | \n",
+ " -5.2\n",
+ " \n",
+ " | \n",
+ " -0.24\n",
+ " \n",
+ " | \n",
+ " 0.0072\n",
+ " \n",
+ " | \n",
+ " 1.2\n",
+ " \n",
+ " | \n",
+ " 6.4\n",
+ " \n",
+ " | \n",
+ " -2\n",
+ " \n",
+ " | \n",
+ " -2.6\n",
+ " \n",
+ " | \n",
+ " -1.7\n",
+ " \n",
+ " | \n",
+ " -5.2\n",
+ " \n",
+ " | \n",
+ " -3.3\n",
+ " \n",
+ " | \n",
+ " -2.9\n",
+ " \n",
+ " | \n",
+ " -1.7\n",
+ " \n",
+ " | \n",
+ " 1.6\n",
+ " \n",
+ " | \n",
+ " -11\n",
+ " \n",
+ " | \n",
+ " 2.8\n",
+ " \n",
+ " | \n",
+ " 7.5\n",
+ " \n",
+ " | \n",
+ " 3.9\n",
" \n",
" |
\n",
" \n",
@@ -20991,10 +18738,10 @@
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 31,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 6fa58bf620005..7494f8ae88307 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -53,6 +53,10 @@ advanced indexing.
but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be
a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `)
+.. warning::
+
+ Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `.
+
See the :ref:`MultiIndex / Advanced Indexing ` for ``MultiIndex`` and more advanced indexing documentation.
See the :ref:`cookbook` for some advanced strategies
@@ -1625,6 +1629,7 @@ This is the correct access method
This *can* work at times, but is not guaranteed, and so should be avoided
.. ipython:: python
+ :okwarning:
dfc = dfc.copy()
dfc['A'][0] = 111
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 3836180af520f..f0e6955f38612 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -225,7 +225,7 @@ Recommended Dependencies
* `numexpr `__: for accelerating certain numerical operations.
``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups.
- If installed, must be Version 2.1 or higher. Version 2.4.6 or higher on Windows is highly recommended.
+ If installed, must be Version 2.1 or higher (excluding a buggy 2.4.4). Version 2.4.6 or higher is highly recommended.
* `bottleneck `__: for accelerating certain types of ``nan``
evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups.
@@ -253,7 +253,6 @@ Optional Dependencies
- `SQLite `__: for SQLite, this is included in Python's standard library by default.
* `matplotlib `__: for plotting
-* `statsmodels `__: Needed for parts of :mod:`pandas.stats`
* `openpyxl `__, `xlrd/xlwt `__: Needed for Excel I/O
* `XlsxWriter `__: Alternative Excel writer
* `Jinja2 `__: Template engine for conditional HTML formatting.
@@ -267,9 +266,11 @@ Optional Dependencies
`__, or `xclip
`__: necessary to use
:func:`~pandas.io.clipboard.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
-* Google's `python-gflags `__
- and `google-api-python-client `__: Needed for :mod:`~pandas.io.gbq`
-* `httplib2 `__: Needed for :mod:`~pandas.io.gbq`
+* Google's `python-gflags `__ ,
+ `oauth2client `__ ,
+ `httplib2 `__
+ and `google-api-python-client `__
+ : Needed for :mod:`~pandas.io.gbq`
* One of the following combinations of libraries is needed to use the
top-level :func:`~pandas.io.html.read_html` function:
diff --git a/doc/source/io.rst b/doc/source/io.rst
index acb8bc1eceda5..06863d14f298d 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4168,6 +4168,12 @@ DataFrame with a shape and data types derived from the source table.
Additionally, DataFrames can be inserted into new BigQuery tables or appended
to existing tables.
+You will need to install some additional dependencies:
+
+- Google's `python-gflags `__
+- `httplib2 `__
+- `google-api-python-client `__
+
.. warning::
To use this module, you will need a valid BigQuery account. Refer to the
@@ -4191,6 +4197,9 @@ The key functions are:
Authentication
''''''''''''''
+
+.. versionadded:: 0.18.0
+
Authentication to the Google ``BigQuery`` service is via ``OAuth 2.0``.
Is possible to authenticate with either user account credentials or service account credentials.
@@ -4206,6 +4215,8 @@ is particularly useful when working on remote servers (eg. jupyter iPython noteb
Additional information on service accounts can be found
`here `__.
+You will need to install an additional dependency: `oauth2client `__.
+
.. note::
The `'private_key'` parameter can be set to either the file path of the service account key
@@ -4552,7 +4563,7 @@ SAS Formats
.. versionadded:: 0.17.0
The top-level function :func:`read_sas` can read (but not write) SAS
-`xport` (.XPT) and `SAS7BDAT` (.sas7bdat) format files (v0.18.0).
+`xport` (.XPT) and `SAS7BDAT` (.sas7bdat) format files were added in *v0.18.0*.
SAS files only contain two value types: ASCII text and floating point
values (usually 8 bytes but sometimes truncated). For xport files,
diff --git a/doc/source/options.rst b/doc/source/options.rst
index be1543f20a461..98187d7be762e 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -107,6 +107,7 @@ All options also have a default value, and you can use ``reset_option`` to do ju
It's also possible to reset multiple options at once (using a regex):
.. ipython:: python
+ :okwarning:
pd.reset_option("^display")
@@ -266,8 +267,10 @@ Options are 'right', and 'left'.
-List of Options
----------------
+.. _options.available:
+
+Available Options
+-----------------
========================== ============ ==================================
Option Default Function
@@ -437,6 +440,7 @@ For instance:
.. ipython:: python
:suppress:
+ :okwarning:
pd.reset_option('^display\.')
@@ -499,5 +503,3 @@ Enabling ``display.unicode.ambiguous_as_wide`` lets pandas to figure these chara
pd.set_option('display.unicode.east_asian_width', False)
pd.set_option('display.unicode.ambiguous_as_wide', False)
-
-
diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst
index 74cdc5a526585..71d3bbed223e5 100644
--- a/doc/source/r_interface.rst
+++ b/doc/source/r_interface.rst
@@ -136,6 +136,7 @@ DataFrames into the equivalent R object (that is, **data.frame**):
.. ipython:: python
+ import pandas.rpy.common as com
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C':[7,8,9]},
index=["one", "two", "three"])
r_dataframe = com.convert_to_r_dataframe(df)
@@ -154,6 +155,7 @@ R matrices bear no information on the data type):
.. ipython:: python
+ import pandas.rpy.common as com
r_matrix = com.convert_to_r_matrix(df)
print(type(r_matrix))
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 04d74270ec938..859a01890d68f 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -12,7 +12,7 @@
import matplotlib.pyplot as plt
plt.close('all')
- options.display.max_rows=15
+ pd.options.display.max_rows=15
import pandas.util.testing as tm
*************
@@ -40,7 +40,7 @@ analysis / manipulation tool available in any language.
pandas 0.18.0
-------------
-**Release date:** (January ??, 2016)
+**Release date:** (March 13, 2016)
This is a major release from 0.17.1 and includes a small number of API changes, several new features,
enhancements, and performance improvements along with a large number of bug fixes. We recommend that all
@@ -70,6 +70,107 @@ of all enhancements and bugs that have been fixed in 0.17.1.
Thanks
~~~~~~
+- ARF
+- Alex Alekseyev
+- Andrew McPherson
+- Andrew Rosenfeld
+- Anthonios Partheniou
+- Anton I. Sipos
+- Ben
+- Ben North
+- Bran Yang
+- Chris
+- Chris Carroux
+- Christopher C. Aycock
+- Christopher Scanlin
+- Cody
+- Da Wang
+- Daniel Grady
+- Dorozhko Anton
+- Dr-Irv
+- Erik M. Bray
+- Evan Wright
+- Francis T. O'Donovan
+- Frank Cleary
+- Gianluca Rossi
+- Graham Jeffries
+- Guillaume Horel
+- Henry Hammond
+- Isaac Schwabacher
+- Jean-Mathieu Deschenes
+- Jeff Reback
+- Joe Jevnik
+- John Freeman
+- John Fremlin
+- Jonas Hoersch
+- Joris Van den Bossche
+- Joris Vankerschaver
+- Justin Lecher
+- Justin Lin
+- Ka Wo Chen
+- Keming Zhang
+- Kerby Shedden
+- Kyle
+- Marco Farrugia
+- MasonGallo
+- MattRijk
+- Matthew Lurie
+- Maximilian Roos
+- Mayank Asthana
+- Mortada Mehyar
+- Moussa Taifi
+- Navreet Gill
+- Nicolas Bonnotte
+- Paul Reiners
+- Philip Gura
+- Pietro Battiston
+- RahulHP
+- Randy Carnevale
+- Rinoc Johnson
+- Rishipuri
+- Sangmin Park
+- Scott E Lasley
+- Sereger13
+- Shannon Wang
+- Skipper Seabold
+- Thierry Moisan
+- Thomas A Caswell
+- Toby Dylan Hocking
+- Tom Augspurger
+- Travis
+- Trent Hauck
+- Tux1
+- Varun
+- Wes McKinney
+- Will Thompson
+- Yoav Ram
+- Yoong Kang Lim
+- Yoshiki Vázquez Baeza
+- Young Joong Kim
+- Younggun Kim
+- Yuval Langer
+- alex argunov
+- behzad nouri
+- boombard
+- brian-pantano
+- chromy
+- daniel
+- dgram0
+- gfyoung
+- hack-c
+- hcontrast
+- jfoo
+- kaustuv deolal
+- llllllllll
+- ranarag
+- rockg
+- scls19fr
+- seales
+- sinhrks
+- srib
+- surveymedia.ca
+- tworec
+
pandas 0.17.1
-------------
diff --git a/doc/source/text.rst b/doc/source/text.rst
index 53567ea25aeac..655df5c5e566c 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -9,7 +9,7 @@
randn = np.random.randn
np.set_printoptions(precision=4, suppress=True)
from pandas.compat import lrange
- options.display.max_rows=15
+ pd.options.display.max_rows=15
======================
Working with Text Data
@@ -256,7 +256,7 @@ It raises ``ValueError`` if ``expand=False``.
.. code-block:: python
>>> s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=False)
- ValueError: This pattern contains no groups to capture.
+ ValueError: only one regex group is supported with Index
The table below summarizes the behavior of ``extract(expand=False)``
(input subject in first column, number of groups in regex in
@@ -375,53 +375,54 @@ Method Summary
.. csv-table::
:header: "Method", "Description"
:widths: 20, 80
-
- :meth:`~Series.str.cat`,Concatenate strings
- :meth:`~Series.str.split`,Split strings on delimiter
- :meth:`~Series.str.rsplit`,Split strings on delimiter working from the end of the string
- :meth:`~Series.str.get`,Index into each element (retrieve i-th element)
- :meth:`~Series.str.join`,Join strings in each element of the Series with passed separator
- :meth:`~Series.str.get_dummies`,Split strings on delimiter, returning DataFrame of dummy variables
- :meth:`~Series.str.contains`,Return boolean array if each string contains pattern/regex
- :meth:`~Series.str.replace`,Replace occurrences of pattern/regex with some other string
- :meth:`~Series.str.repeat`,Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
- :meth:`~Series.str.pad`,"Add whitespace to left, right, or both sides of strings"
- :meth:`~Series.str.center`,Equivalent to ``str.center``
- :meth:`~Series.str.ljust`,Equivalent to ``str.ljust``
- :meth:`~Series.str.rjust`,Equivalent to ``str.rjust``
- :meth:`~Series.str.zfill`,Equivalent to ``str.zfill``
- :meth:`~Series.str.wrap`,Split long strings into lines with length less than a given width
- :meth:`~Series.str.slice`,Slice each string in the Series
- :meth:`~Series.str.slice_replace`,Replace slice in each string with passed value
- :meth:`~Series.str.count`,Count occurrences of pattern
- :meth:`~Series.str.startswith`,Equivalent to ``str.startswith(pat)`` for each element
- :meth:`~Series.str.endswith`,Equivalent to ``str.endswith(pat)`` for each element
- :meth:`~Series.str.findall`,Compute list of all occurrences of pattern/regex for each string
- :meth:`~Series.str.match`,"Call ``re.match`` on each element, returning matched groups as list"
- :meth:`~Series.str.extract`,"Call ``re.search`` on each element, returning DataFrame with one row for each element and one column for each regex capture group"
- :meth:`~Series.str.extractall`,"Call ``re.findall`` on each element, returning DataFrame with one row for each match and one column for each regex capture group"
- :meth:`~Series.str.len`,Compute string lengths
- :meth:`~Series.str.strip`,Equivalent to ``str.strip``
- :meth:`~Series.str.rstrip`,Equivalent to ``str.rstrip``
- :meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
- :meth:`~Series.str.partition`,Equivalent to ``str.partition``
- :meth:`~Series.str.rpartition`,Equivalent to ``str.rpartition``
- :meth:`~Series.str.lower`,Equivalent to ``str.lower``
- :meth:`~Series.str.upper`,Equivalent to ``str.upper``
- :meth:`~Series.str.find`,Equivalent to ``str.find``
- :meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
- :meth:`~Series.str.index`,Equivalent to ``str.index``
- :meth:`~Series.str.rindex`,Equivalent to ``str.rindex``
- :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
- :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
- :meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``
- :meth:`~Series.str.translate`,Equivalent to ``str.translate``
- :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
- :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
- :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
- :meth:`~Series.str.isspace`,Equivalent to ``str.isspace``
- :meth:`~Series.str.islower`,Equivalent to ``str.islower``
- :meth:`~Series.str.isupper`,Equivalent to ``str.isupper``
- :meth:`~Series.str.istitle`,Equivalent to ``str.istitle``
- :meth:`~Series.str.isnumeric`,Equivalent to ``str.isnumeric``
- :meth:`~Series.str.isdecimal`,Equivalent to ``str.isdecimal``
+ :delim: ;
+
+ :meth:`~Series.str.cat`;Concatenate strings
+ :meth:`~Series.str.split`;Split strings on delimiter
+ :meth:`~Series.str.rsplit`;Split strings on delimiter working from the end of the string
+ :meth:`~Series.str.get`;Index into each element (retrieve i-th element)
+ :meth:`~Series.str.join`;Join strings in each element of the Series with passed separator
+ :meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables
+ :meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex
+ :meth:`~Series.str.replace`;Replace occurrences of pattern/regex with some other string
+ :meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
+ :meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings"
+ :meth:`~Series.str.center`;Equivalent to ``str.center``
+ :meth:`~Series.str.ljust`;Equivalent to ``str.ljust``
+ :meth:`~Series.str.rjust`;Equivalent to ``str.rjust``
+ :meth:`~Series.str.zfill`;Equivalent to ``str.zfill``
+ :meth:`~Series.str.wrap`;Split long strings into lines with length less than a given width
+ :meth:`~Series.str.slice`;Slice each string in the Series
+ :meth:`~Series.str.slice_replace`;Replace slice in each string with passed value
+ :meth:`~Series.str.count`;Count occurrences of pattern
+ :meth:`~Series.str.startswith`;Equivalent to ``str.startswith(pat)`` for each element
+ :meth:`~Series.str.endswith`;Equivalent to ``str.endswith(pat)`` for each element
+ :meth:`~Series.str.findall`;Compute list of all occurrences of pattern/regex for each string
+ :meth:`~Series.str.match`;"Call ``re.match`` on each element, returning matched groups as list"
+ :meth:`~Series.str.extract`;"Call ``re.search`` on each element, returning DataFrame with one row for each element and one column for each regex capture group"
+ :meth:`~Series.str.extractall`;"Call ``re.findall`` on each element, returning DataFrame with one row for each match and one column for each regex capture group"
+ :meth:`~Series.str.len`;Compute string lengths
+ :meth:`~Series.str.strip`;Equivalent to ``str.strip``
+ :meth:`~Series.str.rstrip`;Equivalent to ``str.rstrip``
+ :meth:`~Series.str.lstrip`;Equivalent to ``str.lstrip``
+ :meth:`~Series.str.partition`;Equivalent to ``str.partition``
+ :meth:`~Series.str.rpartition`;Equivalent to ``str.rpartition``
+ :meth:`~Series.str.lower`;Equivalent to ``str.lower``
+ :meth:`~Series.str.upper`;Equivalent to ``str.upper``
+ :meth:`~Series.str.find`;Equivalent to ``str.find``
+ :meth:`~Series.str.rfind`;Equivalent to ``str.rfind``
+ :meth:`~Series.str.index`;Equivalent to ``str.index``
+ :meth:`~Series.str.rindex`;Equivalent to ``str.rindex``
+ :meth:`~Series.str.capitalize`;Equivalent to ``str.capitalize``
+ :meth:`~Series.str.swapcase`;Equivalent to ``str.swapcase``
+ :meth:`~Series.str.normalize`;Return Unicode normal form. Equivalent to ``unicodedata.normalize``
+ :meth:`~Series.str.translate`;Equivalent to ``str.translate``
+ :meth:`~Series.str.isalnum`;Equivalent to ``str.isalnum``
+ :meth:`~Series.str.isalpha`;Equivalent to ``str.isalpha``
+ :meth:`~Series.str.isdigit`;Equivalent to ``str.isdigit``
+ :meth:`~Series.str.isspace`;Equivalent to ``str.isspace``
+ :meth:`~Series.str.islower`;Equivalent to ``str.islower``
+ :meth:`~Series.str.isupper`;Equivalent to ``str.isupper``
+ :meth:`~Series.str.istitle`;Equivalent to ``str.istitle``
+ :meth:`~Series.str.isnumeric`;Equivalent to ``str.isnumeric``
+ :meth:`~Series.str.isdecimal`;Equivalent to ``str.isdecimal``
diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt
index 48ce09f32b12b..f409be7dd0f41 100644
--- a/doc/source/whatsnew/v0.10.0.txt
+++ b/doc/source/whatsnew/v0.10.0.txt
@@ -292,6 +292,7 @@ Updated PyTables Support
store.select('df')
.. ipython:: python
+ :okwarning:
wp = Panel(randn(2, 5, 4), items=['Item1', 'Item2'],
major_axis=date_range('1/1/2000', periods=5),
diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt
index 9651c1efeff4a..3d992206cb426 100644
--- a/doc/source/whatsnew/v0.15.0.txt
+++ b/doc/source/whatsnew/v0.15.0.txt
@@ -420,7 +420,7 @@ Rolling/Expanding Moments improvements
New behavior
- .. ipython:: python
+ .. code-block:: python
In [10]: pd.rolling_window(s, window=3, win_type='triang', center=True)
Out[10]:
diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt
index bd878db08a3ed..79efa2b278ae7 100644
--- a/doc/source/whatsnew/v0.15.1.txt
+++ b/doc/source/whatsnew/v0.15.1.txt
@@ -110,19 +110,18 @@ API changes
.. code-block:: python
- In [8]: s.loc[3.5:1.5]
- KeyError: 3.5
+ In [8]: s.loc[3.5:1.5]
+ KeyError: 3.5
current behavior:
.. ipython:: python
- s.loc[3.5:1.5]
-
+ s.loc[3.5:1.5]
- ``io.data.Options`` has been fixed for a change in the format of the Yahoo Options page (:issue:`8612`), (:issue:`8741`)
- .. note::
+ .. note::
As a result of a change in Yahoo's option page layout, when an expiry date is given,
``Options`` methods now return data for a single expiry date. Previously, methods returned all
@@ -146,6 +145,7 @@ API changes
Current behavior:
.. ipython:: python
+ :okwarning:
from pandas.io.data import Options
aapl = Options('aapl','yahoo')
@@ -274,4 +274,3 @@ Bug Fixes
- Bug in Setting by indexer to a scalar value with a mixed-dtype `Panel4d` was failing (:issue:`8702`)
- Bug where ``DataReader``'s would fail if one of the symbols passed was invalid. Now returns data for valid symbols and np.nan for invalid (:issue:`8494`)
- Bug in ``get_quote_yahoo`` that wouldn't allow non-float return values (:issue:`5229`).
-
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 9f943fa68e639..92eafdac387fa 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -723,6 +723,7 @@ be broadcast:
or it can return False if broadcasting can not be done:
.. ipython:: python
+ :okwarning:
np.array([1, 2, 3]) == np.array([1, 2])
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index 8f6525f2cb6a5..dd1884efe5806 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -1,7 +1,7 @@
.. _whatsnew_0180:
-v0.18.0 (February ??, 2016)
----------------------------
+v0.18.0 (March 13, 2016)
+------------------------
This is a major release from 0.17.1 and includes a small number of API changes, several new features,
enhancements, and performance improvements along with a large number of bug fixes. We recommend that all
@@ -12,6 +12,10 @@ users upgrade to this version.
pandas >= 0.18.0 no longer supports compatibility with Python version 2.6
and 3.3 (:issue:`7718`, :issue:`11273`)
+.. warning::
+
+ ``numexpr`` version 2.4.4 will now show a warning and not be used as a computation back-end for pandas because of some buggy behavior. This does not affect other versions (>= 2.1 and >= 2.4.6). (:issue:`12489`)
+
Highlights include:
- Moving and expanding window functions are now methods on Series and DataFrame,
@@ -46,12 +50,13 @@ New features
Window functions are now methods
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Window functions have been refactored to be methods on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`)
+Window functions have been refactored to be methods on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`, :issue:`12373`)
+
.. ipython:: python
np.random.seed(1234)
- df = DataFrame({'A' : range(10), 'B' : np.random.randn(10)})
+ df = pd.DataFrame({'A' : range(10), 'B' : np.random.randn(10)})
df
Previous Behavior:
@@ -148,7 +153,7 @@ Previous Behavior:
.. code-block:: python
- In [3]: s = Series(range(1000))
+ In [3]: s = pd.Series(range(1000))
In [4]: s.index
Out[4]:
@@ -164,7 +169,7 @@ New Behavior:
.. ipython:: python
- s = Series(range(1000))
+ s = pd.Series(range(1000))
s.index
s.index.nbytes
@@ -186,9 +191,17 @@ In v0.18.0, the ``expand`` argument was added to
Currently the default is ``expand=None`` which gives a ``FutureWarning`` and uses ``expand=False``. To avoid this warning, please explicitly specify ``expand``.
-.. ipython:: python
+.. code-block:: python
+
+ In [1]: pd.Series(['a1', 'b2', 'c3']).str.extract('[ab](\d)', expand=None)
+ FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame)
+ but in a future version of pandas this will be changed to expand=True (return DataFrame)
- pd.Series(['a1', 'b2', 'c3']).str.extract('[ab](\d)', expand=None)
+ Out[1]:
+ 0 1
+ 1 2
+ 2 NaN
+ dtype: object
Extracting a regular expression with one group returns a Series if
``expand=False``.
@@ -209,7 +222,7 @@ returns an ``Index`` if ``expand=False``.
.. ipython:: python
s = pd.Series(["a1", "b2", "c3"], ["A11", "B22", "C33"])
- s
+ s.index
s.index.str.extract("(?P[a-zA-Z])", expand=False)
It returns a ``DataFrame`` with one column if ``expand=True``.
@@ -242,8 +255,8 @@ Addition of str.extractall
^^^^^^^^^^^^^^^^^^^^^^^^^^
The :ref:`.str.extractall ` method was added
-(:issue:`11386`). Unlike ``extract`` (which returns only the first
-match),
+(:issue:`11386`). Unlike ``extract``, which returns only the first
+match.
.. ipython:: python
@@ -251,7 +264,7 @@ match),
s
s.str.extract("(?P[ab])(?P\d)", expand=False)
-the ``extractall`` method returns all matches.
+The ``extractall`` method returns all matches.
.. ipython:: python
@@ -268,12 +281,12 @@ A new, friendlier ``ValueError`` is added to protect against the mistake of supp
.. ipython:: python
- Series(['a','b',np.nan,'c']).str.cat(sep=' ')
- Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?')
+ pd.Series(['a','b',np.nan,'c']).str.cat(sep=' ')
+ pd.Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?')
.. code-block:: python
- In [2]: Series(['a','b',np.nan,'c']).str.cat(' ')
+ In [2]: pd.Series(['a','b',np.nan,'c']).str.cat(' ')
ValueError: Did you mean to supply a `sep` keyword?
@@ -321,21 +334,21 @@ In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru t
.. ipython:: python
- s = Series(dr)
+ s = pd.Series(dr)
s
s.dt.round('D')
-Formatting of integer in FloatIndex
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Formatting of Integers in FloatIndex
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Integers in ``FloatIndex``, e.g. 1., are now formatted with a decimal point and a ``0`` digit, e.g. ``1.0`` (:issue:`11713`)
-This change not only affects the display in a jupyter notebook, but also the output of IO methods like ``.to_csv`` or ``.to_html``
+This change not only affects the display to the console, but also the output of IO methods like ``.to_csv`` or ``.to_html``.
Previous Behavior:
.. code-block:: python
- In [2]: s = Series([1,2,3], index=np.arange(3.))
+ In [2]: s = pd.Series([1,2,3], index=np.arange(3.))
In [3]: s
Out[3]:
@@ -357,11 +370,87 @@ New Behavior:
.. ipython:: python
- s = Series([1,2,3], index=np.arange(3.))
+ s = pd.Series([1,2,3], index=np.arange(3.))
s
s.index
print(s.to_csv(path=None))
+Changes to dtype assignment behaviors
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When a DataFrame's slice is updated with a new slice of the same dtype, the dtype of the DataFrame will now remain the same. (:issue:`10503`)
+
+Previous Behavior:
+
+.. code-block:: python
+
+ In [5]: df = pd.DataFrame({'a': [0, 1, 1],
+ 'b': pd.Series([100, 200, 300], dtype='uint32')})
+
+ In [7]: df.dtypes
+ Out[7]:
+ a int64
+ b uint32
+ dtype: object
+
+ In [8]: ix = df['a'] == 1
+
+ In [9]: df.loc[ix, 'b'] = df.loc[ix, 'b']
+
+ In [11]: df.dtypes
+ Out[11]:
+ a int64
+ b int64
+ dtype: object
+
+New Behavior:
+
+.. ipython:: python
+
+ df = pd.DataFrame({'a': [0, 1, 1],
+ 'b': pd.Series([100, 200, 300], dtype='uint32')})
+ df.dtypes
+ ix = df['a'] == 1
+ df.loc[ix, 'b'] = df.loc[ix, 'b']
+ df.dtypes
+
+When a DataFrame's integer slice is partially updated with a new slice of floats that could potentially be downcasted to integer without losing precision, the dtype of the slice will be set to float instead of integer.
+
+Previous Behavior:
+
+.. code-block:: python
+
+ In [4]: df = pd.DataFrame(np.array(range(1,10)).reshape(3,3),
+ columns=list('abc'),
+ index=[[4,4,8], [8,10,12]])
+
+ In [5]: df
+ Out[5]:
+ a b c
+ 4 8 1 2 3
+ 10 4 5 6
+ 8 12 7 8 9
+
+ In [7]: df.ix[4, 'c'] = np.array([0., 1.])
+
+ In [8]: df
+ Out[8]:
+ a b c
+ 4 8 1 2 0
+ 10 4 5 1
+ 8 12 7 8 9
+
+New Behavior:
+
+.. ipython:: python
+
+ df = pd.DataFrame(np.array(range(1,10)).reshape(3,3),
+ columns=list('abc'),
+ index=[[4,4,8], [8,10,12]])
+ df
+ df.ix[4, 'c'] = np.array([0., 1.])
+ df
+
.. _whatsnew_0180.enhancements.xarray:
to_xarray
@@ -369,7 +458,7 @@ to_xarray
In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity,
all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which has
-a pandas-like interface for > 2 ndim.
+a pandas-like interface for > 2 ndim. (:issue:`11972`)
See the `xarray full-documentation here `__.
@@ -397,17 +486,17 @@ Latex Representation
``DataFrame`` has gained a ``._repr_latex_()`` method in order to allow for conversion to latex in a ipython/jupyter notebook using nbconvert. (:issue:`11778`)
-Note that this must be activated by setting the option ``display.latex.repr`` to ``True`` (issue:`12182`)
+Note that this must be activated by setting the option ``pd.display.latex.repr=True`` (:issue:`12182`)
-For example, if you have a jupyter notebook you plan to convert to latex using nbconvert, place the statement ``pd.set_option('display.latex.repr', True)`` in the first cell to have the contained DataFrame output also stored as latex.
+For example, if you have a jupyter notebook you plan to convert to latex using nbconvert, place the statement ``pd.display.latex.repr=True`` in the first cell to have the contained DataFrame output also stored as latex.
-Options ``display.latex.escape`` and ``display.latex.longtable`` have also been added to the configuration and are used automatically by the ``to_latex``
-method. See the :ref:`options documentation` for more info.
+The options ``display.latex.escape`` and ``display.latex.longtable`` have also been added to the configuration and are used automatically by the ``to_latex``
+method. See the :ref:`available options docs ` for more info.
.. _whatsnew_0180.enhancements.sas:
-read_sas changes
-^^^^^^^^^^^^^^^^
+``pd.read_sas()`` changes
+^^^^^^^^^^^^^^^^^^^^^^^^^
``read_sas`` has gained the ability to read SAS7BDAT files, including compressed files. The files can be read in entirety, or incrementally. For full details see :ref:`here `. (:issue:`4052`)
@@ -429,8 +518,9 @@ Other enhancements
- Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`)
- ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`)
- ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`)
-- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`). For further details see :ref:`here `
+- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see :ref:`here `
- ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`).
+- Add missing methods/fields to ``.dt`` for ``Period`` (:issue:`8848`)
- The entire codebase has been ``PEP``-ified (:issue:`12096`)
.. _whatsnew_0180.api_breaking:
@@ -524,14 +614,12 @@ Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`1
Changes to msgpack
^^^^^^^^^^^^^^^^^^
-Forward incompatible changes in ``msgpack`` writing format were made over 0.17.0 and 0.18.0; older versions of pandas cannot read files packed by newer versions (:issue:`12129`, `10527`)
+Forward incompatible changes in ``msgpack`` writing format were made over 0.17.0 and 0.18.0; older versions of pandas cannot read files packed by newer versions (:issue:`12129`, :issue:`10527`)
-Bug in ``to_msgpack`` and ``read_msgpack`` introduced in 0.17.0 and fixed in 0.18.0, caused files packed in Python 2 unreadable by Python 3 (:issue:`12142`)
+Bugs in ``to_msgpack`` and ``read_msgpack`` introduced in 0.17.0 and fixed in 0.18.0, caused files packed in Python 2 unreadable by Python 3 (:issue:`12142`). The following table describes the backward and forward compat of msgpacks.
.. warning::
- As a result of a number of issues:
-
+----------------------+------------------------+
| Packed with | Can be unpacked with |
+======================+========================+
@@ -539,13 +627,14 @@ Bug in ``to_msgpack`` and ``read_msgpack`` introduced in 0.17.0 and fixed in 0.1
+----------------------+------------------------+
| pre-0.17 / Python 3 | any |
+----------------------+------------------------+
- | 0.17 / Python 2 | - 0.17 / Python 2 |
+ | 0.17 / Python 2 | - ==0.17 / Python 2 |
| | - >=0.18 / any Python |
+----------------------+------------------------+
| 0.17 / Python 3 | >=0.18 / any Python |
+----------------------+------------------------+
| 0.18 | >= 0.18 |
- +======================+========================+
+ +----------------------+------------------------+
+
0.18.0 is backward-compatible for reading files packed by older versions, except for files packed with 0.17 in Python 2, in which case only they can only be unpacked in Python 2.
@@ -623,7 +712,7 @@ other anchored offsets like ``MonthBegin`` and ``YearBegin``.
Resample API
^^^^^^^^^^^^
-Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`).
+Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`, :issue:`12448`).
.. ipython:: python
@@ -654,7 +743,7 @@ You could also specify a ``how`` directly
.. code-block:: python
- In [7]: df.resample('2s',how='sum')
+ In [7]: df.resample('2s', how='sum')
Out[7]:
A B C D
2010-01-01 09:00:00 0.971495 0.894701 0.714192 1.587231
@@ -663,42 +752,13 @@ You could also specify a ``how`` directly
2010-01-01 09:00:06 1.249976 1.219477 1.266330 1.224904
2010-01-01 09:00:08 1.020940 1.068634 1.146402 1.613897
-.. warning::
-
- This new API for resample includes some internal changes for the prior-to-0.18.0 API, to work with a deprecation warning in most cases, as the resample operation returns a deferred object. We can intercept operations and just do what the (pre 0.18.0) API did (with a warning). Here is a typical use case:
-
- .. code-block:: python
-
- In [4]: r = df.resample('2s')
-
- In [6]: r*10
- pandas/tseries/resample.py:80: FutureWarning: .resample() is now a deferred operation
- use .resample(...).mean() instead of .resample(...)
-
- Out[6]:
- A B C D
- 2010-01-01 09:00:00 4.857476 4.473507 3.570960 7.936154
- 2010-01-01 09:00:02 8.208011 7.943173 3.640340 5.310957
- 2010-01-01 09:00:04 4.339846 3.145823 4.241039 6.257326
- 2010-01-01 09:00:06 6.249881 6.097384 6.331650 6.124518
- 2010-01-01 09:00:08 5.104699 5.343172 5.732009 8.069486
-
- However, getting and assignment operations directly on a ``Resampler`` will raise a ``ValueError``:
-
- .. code-block:: python
-
- In [7]: r.iloc[0] = 5
- ValueError: .resample() is now a deferred operation
- use .resample(...).mean() instead of .resample(...)
- assignment will have no effect as you are working on a copy
-
**New API**:
-Now, you can write ``.resample`` as a 2-stage operation like groupby, which
+Now, you can write ``.resample(..)`` as a 2-stage operation like ``.groupby(...)``, which
yields a ``Resampler``.
.. ipython:: python
-
+ :okwarning:
r = df.resample('2s')
r
@@ -707,7 +767,7 @@ Downsampling
''''''''''''
You can then use this object to perform operations.
-These are downsampling operations (going from a lower frequency to a higher one).
+These are downsampling operations (going from a higher frequency to a lower one).
.. ipython:: python
@@ -740,7 +800,7 @@ Upsampling
.. currentmodule:: pandas.tseries.resample
-Upsampling operations take you from a higher frequency to a lower frequency. These are now
+Upsampling operations take you from a lower frequency to a higher frequency. These are now
performed with the ``Resampler`` objects with :meth:`~Resampler.backfill`,
:meth:`~Resampler.ffill`, :meth:`~Resampler.fillna` and :meth:`~Resampler.asfreq` methods.
@@ -781,6 +841,65 @@ New API
In the new API, you can either downsample OR upsample. The prior implementation would allow you to pass an aggregator function (like ``mean``) even though you were upsampling, providing a bit of confusion.
+Previous API will work but with deprecations
+''''''''''''''''''''''''''''''''''''''''''''
+
+.. warning::
+
+ This new API for resample includes some internal changes for the prior-to-0.18.0 API, to work with a deprecation warning in most cases, as the resample operation returns a deferred object. We can intercept operations and just do what the (pre 0.18.0) API did (with a warning). Here is a typical use case:
+
+ .. code-block:: python
+
+ In [4]: r = df.resample('2s')
+
+ In [6]: r*10
+ pandas/tseries/resample.py:80: FutureWarning: .resample() is now a deferred operation
+ use .resample(...).mean() instead of .resample(...)
+
+ Out[6]:
+ A B C D
+ 2010-01-01 09:00:00 4.857476 4.473507 3.570960 7.936154
+ 2010-01-01 09:00:02 8.208011 7.943173 3.640340 5.310957
+ 2010-01-01 09:00:04 4.339846 3.145823 4.241039 6.257326
+ 2010-01-01 09:00:06 6.249881 6.097384 6.331650 6.124518
+ 2010-01-01 09:00:08 5.104699 5.343172 5.732009 8.069486
+
+ However, getting and assignment operations directly on a ``Resampler`` will raise a ``ValueError``:
+
+ .. code-block:: python
+
+ In [7]: r.iloc[0] = 5
+ ValueError: .resample() is now a deferred operation
+ use .resample(...).mean() instead of .resample(...)
+
+ There is a situation where the new API can not perform all the operations when using original code.
+ This code is intending to resample every 2s, take the ``mean`` AND then take the ``min`` of those results.
+
+ .. code-block:: python
+
+ In [4]: df.resample('2s').min()
+ Out[4]:
+ A 0.433985
+ B 0.314582
+ C 0.357096
+ D 0.531096
+ dtype: float64
+
+ The new API will:
+
+ .. ipython:: python
+
+ df.resample('2s').min()
+
+ The good news is the return dimensions will differ between the new API and the old API, so this should loudly raise
+ an exception.
+
+ To replicate the original operation
+
+ .. ipython:: python
+
+ df.resample('2s').mean().min()
+
Changes to eval
^^^^^^^^^^^^^^^
@@ -790,9 +909,28 @@ in an inplace change to the ``DataFrame``. (:issue:`9297`)
.. ipython:: python
df = pd.DataFrame({'a': np.linspace(0, 10, 5), 'b': range(5)})
- df.eval('c = a + b')
df
+.. ipython:: python
+ :suppress:
+
+ df.eval('c = a + b', inplace=True)
+
+.. code-block:: python
+
+ In [12]: df.eval('c = a + b')
+ FutureWarning: eval expressions containing an assignment currentlydefault to operating inplace.
+ This will change in a future version of pandas, use inplace=True to avoid this warning.
+
+ In [13]: df
+ Out[13]:
+ a b c
+ 0 0.0 0 0.0
+ 1 2.5 1 3.5
+ 2 5.0 2 7.0
+ 3 7.5 3 10.5
+ 4 10.0 4 14.0
+
In version 0.18.0, a new ``inplace`` keyword was added to choose whether the
assignment should be done inplace or return a copy.
@@ -855,22 +993,18 @@ Other API Changes
In [2]: s.between_time('20150101 07:00:00','20150101 09:00:00')
ValueError: Cannot convert arg ['20150101 07:00:00'] to a time.
-- ``.memory_usage()`` now includes values in the index, as does memory_usage in ``.info`` (:issue:`11597`)
-
-- ``DataFrame.to_latex()`` now supports non-ascii encodings (eg utf-8) in Python 2 with the parameter ``encoding`` (:issue:`7061`)
-
+- ``.memory_usage()`` now includes values in the index, as does memory_usage in ``.info()`` (:issue:`11597`)
+- ``DataFrame.to_latex()`` now supports non-ascii encodings (eg ``utf-8``) in Python 2 with the parameter ``encoding`` (:issue:`7061`)
- ``pandas.merge()`` and ``DataFrame.merge()`` will show a specific error message when trying to merge with an object that is not of type ``DataFrame`` or a subclass (:issue:`12081`)
-
- ``DataFrame.unstack`` and ``Series.unstack`` now take ``fill_value`` keyword to allow direct replacement of missing values when an unstack results in missing values in the resulting ``DataFrame``. As an added benefit, specifying ``fill_value`` will preserve the data type of the original stacked data. (:issue:`9746`)
-
-- As part of the new API for :ref:`window functions ` and :ref:`resampling `, aggregation functions have been clarified, raising more informative error messages on invalid aggregations. (:issue:`9052`). A full set of examples are presented in :ref:`groupby `.
-
-- Statistical functions for ``NDFrame`` objects will now raise if non-numpy-compatible arguments are passed in for ``**kwargs`` (:issue:`12301`)
-
+- As part of the new API for :ref:`window functions ` and :ref:`resampling `, aggregation functions have been clarified, raising more informative error messages on invalid aggregations. (:issue:`9052`). A full set of examples are presented in :ref:`groupby `.
+- Statistical functions for ``NDFrame`` objects (like ``sum(), mean(), min()``) will now raise if non-numpy-compatible arguments are passed in for ``**kwargs`` (:issue:`12301`)
- ``.to_latex`` and ``.to_html`` gain a ``decimal`` parameter like ``.to_csv``; the default is ``'.'`` (:issue:`12031`)
-
- More helpful error message when constructing a ``DataFrame`` with empty data but with indices (:issue:`8020`)
-
+- ``.describe()`` will now properly handle bool dtype as a categorical (:issue:`6625`)
+- More helpful error message with an invalid ``.transform`` with user defined input (:issue:`10165`)
+- Exponentially weighted functions now allow specifying alpha directly (:issue:`10789`) and raise ``ValueError`` if parameters violate ``0 < alpha <= 1`` (:issue:`12492`)
+- ``Rolling.min`` and ``Rolling.max`` now take an as_float argument, that when False, will cause those functions to return output that has the same value as the input type. This is
.. _whatsnew_0180.deprecations:
@@ -908,7 +1042,7 @@ Deprecations
- The the ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. You can simply resample the input prior to creating a window function. (:issue:`11603`).
- For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D',how='max').rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window.
+ For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D').mean().rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window.
- ``pd.tseries.frequencies.get_offset_name`` function is deprecated. Use offset's ``.freqstr`` property as alternative (:issue:`11192`)
- ``pandas.stats.fama_macbeth`` routines are deprecated and will be removed in a future version (:issue:`6077`)
@@ -928,11 +1062,11 @@ Removal of deprecated float indexers
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In :issue:`4892` indexing with floating point numbers on a non-``Float64Index`` was deprecated (in version 0.14.0).
-In 0.18.0, this deprecation warning is removed and these will now raise a ``TypeError``. (:issue:`12165`)
+In 0.18.0, this deprecation warning is removed and these will now raise a ``TypeError``. (:issue:`12165`, :issue:`12333`)
.. ipython:: python
- s = pd.Series([1,2,3])
+ s = pd.Series([1, 2, 3], index=[4, 5, 6])
s
s2 = pd.Series([1, 2, 3], index=list('abc'))
s2
@@ -941,15 +1075,18 @@ Previous Behavior:
.. code-block:: python
- In [2]: s[1.0]
+ # this is label indexing
+ In [2]: s[5.0]
FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point
Out[2]: 2
+ # this is positional indexing
In [3]: s.iloc[1.0]
FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point
Out[3]: 2
- In [4]: s.loc[1.0]
+ # this is label indexing
+ In [4]: s.loc[5.0]
FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point
Out[4]: 2
@@ -966,33 +1103,61 @@ Previous Behavior:
New Behavior:
+For iloc, getting & setting via a float scalar will always raise.
+
.. code-block:: python
- In [2]: s[1.0]
- TypeError: cannot do label indexing on with these indexers [1.0] of
+ In [3]: s.iloc[2.0]
+ TypeError: cannot do label indexing on with these indexers [2.0] of
- In [3]: s.iloc[1.0]
- TypeError: cannot do label indexing on with these indexers [1.0] of
+Other indexers will coerce to a like integer for both getting and setting. The ``FutureWarning`` has been dropped for ``.loc``, ``.ix`` and ``[]``.
- In [4]: s.loc[1.0]
- TypeError: cannot do label indexing on with these indexers [1.0] of
+.. ipython:: python
- # .ix will now cause this to be a label lookup and coerce to and Index
- In [5]: s2.ix[1.0] = 10
+ s[5.0]
+ s.loc[5.0]
+ s.ix[5.0]
- In [6]: s2
- Out[3]:
- a 1
- b 2
- c 3
- 1.0 10
- dtype: int64
+and setting
+
+.. ipython:: python
+
+ s_copy = s.copy()
+ s_copy[5.0] = 10
+ s_copy
+ s_copy = s.copy()
+ s_copy.loc[5.0] = 10
+ s_copy
+ s_copy = s.copy()
+ s_copy.ix[5.0] = 10
+ s_copy
+
+Positional setting with ``.ix`` and a float indexer will ADD this value to the index, rather than previously setting the value by position.
+
+.. ipython:: python
+
+ s2.ix[1.0] = 10
+ s2
+
+Slicing will also coerce integer-like floats to integers for a non-``Float64Index``.
+
+.. ipython:: python
+
+ s.loc[5.0:6]
+ s.ix[5.0:6]
+
+Note that for floats that are NOT coercible to ints, the label based bounds will be excluded
+
+.. ipython:: python
+
+ s.loc[5.1:6]
+ s.ix[5.1:6]
Float indexing on a ``Float64Index`` is unchanged.
.. ipython:: python
- s = pd.Series([1,2,3],index=np.arange(3.))
+ s = pd.Series([1, 2, 3], index=np.arange(3.))
s[1.0]
s[1.0:2.5]
@@ -1050,10 +1215,10 @@ Bug Fixes
- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`)
- Bug in resampling with ``.nunique`` and a sparse index (:issue:`12352`)
- Removed some compiler warnings (:issue:`12471`)
-
+- Work around compat issues with ``boto`` in python 3.5 (:issue:`11915`)
- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`)
- Bug in subtraction of ``Series`` of a single tz-aware ``Timestamp`` (:issue:`12290`)
-
+- Use compat iterators in PY2 to support ``.next()`` (:issue:`12299`)
- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
@@ -1111,12 +1276,12 @@ Bug Fixes
- Removed ``millisecond`` property of ``DatetimeIndex``. This would always raise a ``ValueError`` (:issue:`12019`).
- Bug in ``Series`` constructor with read-only data (:issue:`11502`)
-
+- Removed ``pandas.util.testing.choice()``. Should use ``np.random.choice()``, instead. (:issue:`12386`)
- Bug in ``.loc`` setitem indexer preventing the use of a TZ-aware DatetimeIndex (:issue:`12050`)
- Bug in ``.style`` indexes and multi-indexes not appearing (:issue:`11655`)
- Bug in ``to_msgpack`` and ``from_msgpack`` which did not correctly serialize or deserialize ``NaT`` (:issue:`12307`).
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)
-
+- Bug in ``Timestamp`` constructor where microsecond resolution was lost if HHMMSS were not separated with ':' (:issue:`10041`)
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
- Bug in ``crosstab`` where arguments with non-overlapping indexes would return a ``KeyError`` (:issue:`10291`)
@@ -1124,3 +1289,6 @@ Bug Fixes
- Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`)
- Bug when initializing categorical series with a scalar value. (:issue:`12336`)
- Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`)
+- Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`)
+- Bug when setting columns of a ``DataFrame`` with duplicate column names (:issue:`12344`)
+- Bug in ``.rolling.min`` and ``.rolling.max`` where passing columns of type float32 raised a Value error (:issue:`12373`)
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
index 8c66cd0c1566d..70a1ad4a335ea 100644
--- a/doc/source/whatsnew/v0.18.1.txt
+++ b/doc/source/whatsnew/v0.18.1.txt
@@ -43,4 +43,3 @@ Performance Improvements
Bug Fixes
~~~~~~~~~
-
diff --git a/pandas/algos.pyx b/pandas/algos.pyx
index 0f9ceba48e608..6097b85877b97 100644
--- a/pandas/algos.pyx
+++ b/pandas/algos.pyx
@@ -1625,29 +1625,38 @@ def roll_median_c(ndarray[float64_t] arg, int win, int minp):
# of its Simplified BSD license
# https://github.com/kwgoodman/bottleneck
-cdef struct pairs:
- double value
- int death
-
from libc cimport stdlib
@cython.boundscheck(False)
@cython.wraparound(False)
-def roll_max(ndarray[float64_t] a, int window, int minp):
- "Moving max of 1d array of dtype=float64 along axis=0 ignoring NaNs."
- cdef np.float64_t ai, aold
+def roll_max(ndarray[numeric] a, int window, int minp):
+ "Moving max of 1d array of any numeric type along axis=0 ignoring NaNs."
+ return _roll_min_max(a, window, minp, 1)
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def roll_min(ndarray[numeric] a, int window, int minp):
+ "Moving min of 1d array of any numeric type along axis=0 ignoring NaNs."
+ return _roll_min_max(a, window, minp, 0)
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef _roll_min_max(ndarray[numeric] a, int window, int minp, bint is_max):
+ "Moving min/max of 1d array of any numeric type along axis=0 ignoring NaNs."
+ cdef numeric ai, aold
cdef Py_ssize_t count
- cdef pairs* ring
- cdef pairs* minpair
- cdef pairs* end
- cdef pairs* last
+ cdef Py_ssize_t* death
+ cdef numeric* ring
+ cdef numeric* minpair
+ cdef numeric* end
+ cdef numeric* last
cdef Py_ssize_t i0
cdef np.npy_intp *dim
dim = PyArray_DIMS(a)
cdef Py_ssize_t n0 = dim[0]
cdef np.npy_intp *dims = [n0]
- cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
- NPY_float64, 0)
+ cdef bint should_replace
+ cdef np.ndarray[numeric, ndim=1] y = PyArray_EMPTY(1, dims, PyArray_TYPE(a), 0)
if window < 1:
raise ValueError('Invalid window size %d'
@@ -1659,146 +1668,85 @@ def roll_max(ndarray[float64_t] a, int window, int minp):
minp = _check_minp(window, minp, n0)
with nogil:
- ring = stdlib.malloc(window * sizeof(pairs))
+ ring = stdlib.malloc(window * sizeof(numeric))
+ death = stdlib.malloc(window * sizeof(Py_ssize_t))
end = ring + window
last = ring
- minpair = ring
+ minvalue = ring
ai = a[0]
- if ai == ai:
- minpair.value = ai
+ if numeric in cython.floating:
+ if ai == ai:
+ minvalue[0] = ai
+ elif is_max:
+ minvalue[0] = MINfloat64
+ else:
+ minvalue[0] = MAXfloat64
else:
- minpair.value = MINfloat64
- minpair.death = window
+ minvalue[0] = ai
+ death[0] = window
count = 0
for i0 in range(n0):
ai = a[i0]
- if ai == ai:
- count += 1
+ if numeric in cython.floating:
+ if ai == ai:
+ count += 1
+ elif is_max:
+ ai = MINfloat64
+ else:
+ ai = MAXfloat64
else:
- ai = MINfloat64
+ count += 1
if i0 >= window:
aold = a[i0 - window]
if aold == aold:
count -= 1
- if minpair.death == i0:
- minpair += 1
- if minpair >= end:
- minpair = ring
- if ai >= minpair.value:
- minpair.value = ai
- minpair.death = i0 + window
- last = minpair
+ if death[minvalue-ring] == i0:
+ minvalue += 1
+ if minvalue >= end:
+ minvalue = ring
+ should_replace = ai >= minvalue[0] if is_max else ai <= minvalue[0]
+ if should_replace:
+ minvalue[0] = ai
+ death[minvalue-ring] = i0 + window
+ last = minvalue
else:
- while last.value <= ai:
+ should_replace = last[0] <= ai if is_max else last[0] >= ai
+ while should_replace:
if last == ring:
last = end
last -= 1
+ should_replace = last[0] <= ai if is_max else last[0] >= ai
last += 1
if last == end:
last = ring
- last.value = ai
- last.death = i0 + window
- if count >= minp:
- y[i0] = minpair.value
+ last[0] = ai
+ death[last - ring] = i0 + window
+ if numeric in cython.floating:
+ if count >= minp:
+ y[i0] = minvalue[0]
+ else:
+ y[i0] = NaN
else:
- y[i0] = NaN
+ y[i0] = minvalue[0]
for i0 in range(minp - 1):
- y[i0] = NaN
+ if numeric in cython.floating:
+ y[i0] = NaN
+ else:
+ y[i0] = 0
stdlib.free(ring)
+ stdlib.free(death)
return y
-
cdef double_t _get_max(object skiplist, int nobs, int minp):
if nobs >= minp:
return skiplist.get(nobs - 1)
else:
return NaN
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def roll_min(np.ndarray[np.float64_t, ndim=1] a, int window, int minp):
- "Moving min of 1d array of dtype=float64 along axis=0 ignoring NaNs."
- cdef np.float64_t ai, aold
- cdef Py_ssize_t count
- cdef pairs* ring
- cdef pairs* minpair
- cdef pairs* end
- cdef pairs* last
- cdef Py_ssize_t i0
- cdef np.npy_intp *dim
- dim = PyArray_DIMS(a)
- cdef Py_ssize_t n0 = dim[0]
- cdef np.npy_intp *dims = [n0]
- cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
- NPY_float64, 0)
-
- if window < 1:
- raise ValueError('Invalid window size %d'
- % (window))
-
- if minp > window:
- raise ValueError('Invalid min_periods size %d greater than window %d'
- % (minp, window))
-
- minp = _check_minp(window, minp, n0)
- with nogil:
- ring = stdlib.malloc(window * sizeof(pairs))
- end = ring + window
- last = ring
-
- minpair = ring
- ai = a[0]
- if ai == ai:
- minpair.value = ai
- else:
- minpair.value = MAXfloat64
- minpair.death = window
-
- count = 0
- for i0 in range(n0):
- ai = a[i0]
- if ai == ai:
- count += 1
- else:
- ai = MAXfloat64
- if i0 >= window:
- aold = a[i0 - window]
- if aold == aold:
- count -= 1
- if minpair.death == i0:
- minpair += 1
- if minpair >= end:
- minpair = ring
- if ai <= minpair.value:
- minpair.value = ai
- minpair.death = i0 + window
- last = minpair
- else:
- while last.value >= ai:
- if last == ring:
- last = end
- last -= 1
- last += 1
- if last == end:
- last = ring
- last.value = ai
- last.death = i0 + window
- if count >= minp:
- y[i0] = minpair.value
- else:
- y[i0] = NaN
-
- for i0 in range(minp - 1):
- y[i0] = NaN
-
- stdlib.free(ring)
- return y
-
cdef double_t _get_min(object skiplist, int nobs, int minp):
if nobs >= minp:
return skiplist.get(0)
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index cbdb69d1df8c3..aade3b8411bb9 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -153,31 +153,28 @@ def signature(f):
lfilter = builtins.filter
-def iteritems(obj, **kwargs):
- """replacement for six's iteritems for Python2/3 compat
- uses 'iteritems' if available and otherwise uses 'items'.
+if PY2:
+ def iteritems(obj, **kw):
+ return obj.iteritems(**kw)
- Passes kwargs to method.
- """
- func = getattr(obj, "iteritems", None)
- if not func:
- func = obj.items
- return func(**kwargs)
+ def iterkeys(obj, **kw):
+ return obj.iterkeys(**kw)
+ def itervalues(obj, **kw):
+ return obj.itervalues(**kw)
-def iterkeys(obj, **kwargs):
- func = getattr(obj, "iterkeys", None)
- if not func:
- func = obj.keys
- return func(**kwargs)
+ next = lambda it : it.next()
+else:
+ def iteritems(obj, **kw):
+ return iter(obj.items(**kw))
+ def iterkeys(obj, **kw):
+ return iter(obj.keys(**kw))
-def itervalues(obj, **kwargs):
- func = getattr(obj, "itervalues", None)
- if not func:
- func = obj.values
- return func(**kwargs)
+ def itervalues(obj, **kw):
+ return iter(obj.values(**kw))
+ next = next
def bind_method(cls, name, func):
"""Bind a method to class, python 2 and python 3 compatible.
diff --git a/pandas/computation/__init__.py b/pandas/computation/__init__.py
index e69de29bb2d1d..9e94215eecf62 100644
--- a/pandas/computation/__init__.py
+++ b/pandas/computation/__init__.py
@@ -0,0 +1,30 @@
+
+import warnings
+from distutils.version import LooseVersion
+
+_NUMEXPR_INSTALLED = False
+
+try:
+ import numexpr as ne
+ ver = ne.__version__
+ _NUMEXPR_INSTALLED = ver >= LooseVersion('2.1')
+
+ # we specifically disallow 2.4.4 as
+ # has some hard-to-diagnose bugs
+ if ver == LooseVersion('2.4.4'):
+ _NUMEXPR_INSTALLED = False
+ warnings.warn(
+ "The installed version of numexpr {ver} is not supported "
+ "in pandas and will be not be used\n".format(ver=ver),
+ UserWarning)
+
+ elif not _NUMEXPR_INSTALLED:
+ warnings.warn(
+ "The installed version of numexpr {ver} is not supported "
+ "in pandas and will be not be used\nThe minimum supported "
+ "version is 2.1\n".format(ver=ver), UserWarning)
+
+except ImportError: # pragma: no cover
+ pass
+
+__all__ = ['_NUMEXPR_INSTALLED']
diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index d2d16acc27fb6..c3300ffca468e 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -6,11 +6,11 @@
import warnings
import tokenize
from pandas.core import common as com
+from pandas.computation import _NUMEXPR_INSTALLED
from pandas.computation.expr import Expr, _parsers, tokenize_string
from pandas.computation.scope import _ensure_scope
from pandas.compat import string_types
from pandas.computation.engines import _engines
-from distutils.version import LooseVersion
def _check_engine(engine):
@@ -35,17 +35,11 @@ def _check_engine(engine):
# that won't necessarily be import-able)
# Could potentially be done on engine instantiation
if engine == 'numexpr':
- try:
- import numexpr
- except ImportError:
- raise ImportError("'numexpr' not found. Cannot use "
+ if not _NUMEXPR_INSTALLED:
+ raise ImportError("'numexpr' is not installed or an "
+ "unsupported version. Cannot use "
"engine='numexpr' for query/eval "
"if 'numexpr' is not installed")
- else:
- ne_version = numexpr.__version__
- if ne_version < LooseVersion('2.1'):
- raise ImportError("'numexpr' version is %s, "
- "must be >= 2.1" % ne_version)
def _check_parser(parser):
diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py
index 6e33250010c2b..086e92dbde1a0 100644
--- a/pandas/computation/expressions.py
+++ b/pandas/computation/expressions.py
@@ -9,20 +9,10 @@
import warnings
import numpy as np
from pandas.core.common import _values_from_object
-from distutils.version import LooseVersion
+from pandas.computation import _NUMEXPR_INSTALLED
-try:
+if _NUMEXPR_INSTALLED:
import numexpr as ne
- ver = ne.__version__
- _NUMEXPR_INSTALLED = ver >= LooseVersion('2.1')
- if not _NUMEXPR_INSTALLED:
- warnings.warn(
- "The installed version of numexpr {ver} is not supported "
- "in pandas and will be not be used\nThe minimum supported "
- "version is 2.1\n".format(ver=ver), UserWarning)
-
-except ImportError: # pragma: no cover
- _NUMEXPR_INSTALLED = False
_TEST_MODE = None
_TEST_RESULT = None
diff --git a/pandas/computation/tests/test_compat.py b/pandas/computation/tests/test_compat.py
new file mode 100644
index 0000000000000..80b415739c647
--- /dev/null
+++ b/pandas/computation/tests/test_compat.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+# flake8: noqa
+
+import nose
+from itertools import product
+from distutils.version import LooseVersion
+
+import pandas as pd
+from pandas.util import testing as tm
+
+from pandas.computation.engines import _engines
+import pandas.computation.expr as expr
+
+ENGINES_PARSERS = list(product(_engines, expr._parsers))
+
+
+def test_compat():
+ # test we have compat with our version of nu
+
+ from pandas.computation import _NUMEXPR_INSTALLED
+ try:
+ import numexpr as ne
+ ver = ne.__version__
+ if ver == LooseVersion('2.4.4'):
+ assert not _NUMEXPR_INSTALLED
+ elif ver < LooseVersion('2.1'):
+ with tm.assert_produces_warning(UserWarning,
+ check_stacklevel=False):
+ assert not _NUMEXPR_INSTALLED
+ else:
+ assert _NUMEXPR_INSTALLED
+
+ except ImportError:
+ raise nose.SkipTest("not testing numexpr version compat")
+
+
+def test_invalid_numexpr_version():
+ for engine, parser in ENGINES_PARSERS:
+ yield check_invalid_numexpr_version, engine, parser
+
+
+def check_invalid_numexpr_version(engine, parser):
+ def testit():
+ a, b = 1, 2
+ res = pd.eval('a + b', engine=engine, parser=parser)
+ tm.assert_equal(res, 3)
+
+ if engine == 'numexpr':
+ try:
+ import numexpr as ne
+ except ImportError:
+ raise nose.SkipTest("no numexpr")
+ else:
+ if ne.__version__ < LooseVersion('2.1'):
+ with tm.assertRaisesRegexp(ImportError, "'numexpr' version is "
+ ".+, must be >= 2.1"):
+ testit()
+ elif ne.__version__ == LooseVersion('2.4.4'):
+ raise nose.SkipTest("numexpr version==2.4.4")
+ else:
+ testit()
+ else:
+ testit()
+
+
+if __name__ == '__main__':
+ nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+ exit=False)
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index b70252ed9f35b..97db171312557 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -1633,8 +1633,8 @@ def test_result_types(self):
self.check_result_type(np.float64, np.float64)
def test_result_types2(self):
- # xref https://github.com/pydata/pandas/issues/12293
- tm._skip_if_windows()
+ # xref https://github.com/pydata/pandas/issues/12293
+ raise nose.SkipTest("unreliable tests on complex128")
# Did not test complex64 because DataFrame is converting it to
# complex128. Due to https://github.com/pydata/pandas/issues/10952
@@ -1782,33 +1782,6 @@ def test_name_error_exprs():
yield check_name_error_exprs, engine, parser
-def check_invalid_numexpr_version(engine, parser):
- def testit():
- a, b = 1, 2
- res = pd.eval('a + b', engine=engine, parser=parser)
- tm.assert_equal(res, 3)
-
- if engine == 'numexpr':
- try:
- import numexpr as ne
- except ImportError:
- raise nose.SkipTest("no numexpr")
- else:
- if ne.__version__ < LooseVersion('2.1'):
- with tm.assertRaisesRegexp(ImportError, "'numexpr' version is "
- ".+, must be >= 2.1"):
- testit()
- else:
- testit()
- else:
- testit()
-
-
-def test_invalid_numexpr_version():
- for engine, parser in ENGINES_PARSERS:
- yield check_invalid_numexpr_version, engine, parser
-
-
def check_invalid_local_variable_reference(engine, parser):
tm.skip_if_no_ne(engine)
diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py
index 91b33d30004b6..79718c79f9bdd 100644
--- a/pandas/core/datetools.py
+++ b/pandas/core/datetools.py
@@ -1,8 +1,10 @@
"""A collection of random tools for dealing with dates in Python"""
-from pandas.tseries.tools import * # noqa
-from pandas.tseries.offsets import * # noqa
-from pandas.tseries.frequencies import * # noqa
+# flake8: noqa
+
+from pandas.tseries.tools import *
+from pandas.tseries.offsets import *
+from pandas.tseries.frequencies import *
day = DateOffset()
bday = BDay()
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 101a5e64b65b5..1f1ff73869f73 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -10,7 +10,7 @@
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas import compat
from pandas.compat import (StringIO, lzip, range, map, zip, reduce, u,
- OrderedDict)
+ OrderedDict, unichr)
from pandas.util.terminal import get_terminal_size
from pandas.core.config import get_option, set_option
from pandas.io.common import _get_handle, UnicodeWriter, _expand_user
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6c2d4f7919ac6..01156252fcd6d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1627,6 +1627,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
human-readable units (base-2 representation).
null_counts : boolean, default None
Whether to show the non-null counts
+
- If None, then only show if the frame is smaller than
max_info_rows and max_info_columns.
- If True, always show counts.
@@ -2016,7 +2017,7 @@ def _getitem_array(self, key):
# with all other indexing behavior
if isinstance(key, Series) and not key.index.equals(self.index):
warnings.warn("Boolean Series key will be reindexed to match "
- "DataFrame index.", UserWarning)
+ "DataFrame index.", UserWarning, stacklevel=3)
elif len(key) != len(self.index):
raise ValueError('Item wrong length %d instead of %d.' %
(len(key), len(self.index)))
@@ -4932,6 +4933,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
0 or 'index' for row-wise, 1 or 'columns' for column-wise
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
.. versionadded:: 0.18.0
+
This optional parameter specifies the interpolation method to use,
when the desired quantile lies between two data points `i` and `j`:
@@ -4945,11 +4947,12 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
Returns
-------
quantiles : Series or DataFrame
- If ``q`` is an array, a DataFrame will be returned where the
- index is ``q``, the columns are the columns of self, and the
- values are the quantiles.
- If ``q`` is a float, a Series will be returned where the
- index is the columns of self and the values are the quantiles.
+
+ - If ``q`` is an array, a DataFrame will be returned where the
+ index is ``q``, the columns are the columns of self, and the
+ values are the quantiles.
+ - If ``q`` is a float, a Series will be returned where the
+ index is the columns of self and the values are the quantiles.
Examples
--------
@@ -4965,6 +4968,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
0.1 1.3 3.7
0.5 2.5 55.0
"""
+
self._check_percentile(q)
per = np.asarray(q) * 100
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 13e4de0e2c5f0..963c953154b57 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2041,11 +2041,13 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
method to use for filling holes in reindexed DataFrame.
Please note: this is only applicable to DataFrames/Series with a
monotonically increasing/decreasing index.
- * default: don't fill gaps
- * pad / ffill: propagate last valid observation forward to next
- valid
- * backfill / bfill: use next valid observation to fill gap
- * nearest: use nearest valid observations to fill gap
+
+ * default: don't fill gaps
+ * pad / ffill: propagate last valid observation forward to next
+ valid
+ * backfill / bfill: use next valid observation to fill gap
+ * nearest: use nearest valid observations to fill gap
+
copy : boolean, default True
Return a new object, even if the passed indexes are the same
level : int or name
@@ -2265,11 +2267,13 @@ def _reindex_multi(self, axes, copy, fill_value):
axis : %(axes_single_arg)s
method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional
Method to use for filling holes in reindexed DataFrame:
- * default: don't fill gaps
- * pad / ffill: propagate last valid observation forward to next
- valid
- * backfill / bfill: use next valid observation to fill gap
- * nearest: use nearest valid observations to fill gap
+
+ * default: don't fill gaps
+ * pad / ffill: propagate last valid observation forward to next
+ valid
+ * backfill / bfill: use next valid observation to fill gap
+ * nearest: use nearest valid observations to fill gap
+
copy : boolean, default True
Return a new object, even if the passed indexes are the same
level : int or name
@@ -3119,7 +3123,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
# fill in 2d chunks
result = dict([(col, s.fillna(method=method, value=value))
- for col, s in compat.iteritems(self)])
+ for col, s in self.iteritems()])
return self._constructor.from_dict(result).__finalize__(self)
# 2d or less
@@ -3932,57 +3936,18 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
Freq: 3T, dtype: int64
"""
- from pandas.tseries.resample import resample
+ from pandas.tseries.resample import (resample,
+ _maybe_process_deprecations)
axis = self._get_axis_number(axis)
r = resample(self, freq=rule, label=label, closed=closed,
axis=axis, kind=kind, loffset=loffset,
- fill_method=fill_method, convention=convention,
- limit=limit, base=base)
-
- # deprecation warnings
- # but call methods anyhow
-
- if how is not None:
-
- # .resample(..., how='sum')
- if isinstance(how, compat.string_types):
- method = "{0}()".format(how)
-
- # .resample(..., how=lambda x: ....)
- else:
- method = ".apply()"
-
- # if we have both a how and fill_method, then show
- # the following warning
- if fill_method is None:
- warnings.warn("how in .resample() is deprecated\n"
- "the new syntax is "
- ".resample(...).{method}".format(
- method=method),
- FutureWarning, stacklevel=2)
- r = r.aggregate(how)
-
- if fill_method is not None:
-
- # show the prior function call
- method = '.' + method if how is not None else ''
-
- args = "limit={0}".format(limit) if limit is not None else ""
- warnings.warn("fill_method is deprecated to .resample()\n"
- "the new syntax is .resample(...){method}"
- ".{fill_method}({args})".format(
- method=method,
- fill_method=fill_method,
- args=args),
- FutureWarning, stacklevel=2)
-
- if how is not None:
- r = getattr(r, fill_method)(limit=limit)
- else:
- r = r.aggregate(fill_method, limit=limit)
-
- return r
+ convention=convention,
+ base=base)
+ return _maybe_process_deprecations(r,
+ how=how,
+ fill_method=fill_method,
+ limit=limit)
def first(self, offset):
"""
@@ -4875,26 +4840,27 @@ def describe_numeric_1d(series, percentiles):
def describe_categorical_1d(data):
names = ['count', 'unique']
objcounts = data.value_counts()
- result = [data.count(), len(objcounts[objcounts != 0])]
+ count_unique = len(objcounts[objcounts != 0])
+ result = [data.count(), count_unique]
if result[1] > 0:
top, freq = objcounts.index[0], objcounts.iloc[0]
- if (data.dtype == object or
- com.is_categorical_dtype(data.dtype)):
- names += ['top', 'freq']
- result += [top, freq]
-
- elif com.is_datetime64_dtype(data):
+ if com.is_datetime64_dtype(data):
asint = data.dropna().values.view('i8')
names += ['top', 'freq', 'first', 'last']
result += [lib.Timestamp(top), freq,
lib.Timestamp(asint.min()),
lib.Timestamp(asint.max())]
+ else:
+ names += ['top', 'freq']
+ result += [top, freq]
return pd.Series(result, index=names, name=data.name)
def describe_1d(data, percentiles):
- if com.is_numeric_dtype(data):
+ if com.is_bool_dtype(data):
+ return describe_categorical_1d(data)
+ elif com.is_numeric_dtype(data):
return describe_numeric_1d(data, percentiles)
elif com.is_timedelta64_dtype(data):
return describe_numeric_1d(data, percentiles)
@@ -4906,7 +4872,7 @@ def describe_1d(data, percentiles):
elif (include is None) and (exclude is None):
if len(self._get_numeric_data()._info_axis) > 0:
# when some numerics are found, keep only numerics
- data = self.select_dtypes(include=[np.number, np.bool])
+ data = self.select_dtypes(include=[np.number])
else:
data = self
elif include == 'all':
@@ -5164,11 +5130,12 @@ def expanding(self, min_periods=1, freq=None, center=False, axis=0):
cls.expanding = expanding
@Appender(rwindow.ewm.__doc__)
- def ewm(self, com=None, span=None, halflife=None, min_periods=0,
- freq=None, adjust=True, ignore_na=False, axis=0):
+ def ewm(self, com=None, span=None, halflife=None, alpha=None,
+ min_periods=0, freq=None, adjust=True, ignore_na=False,
+ axis=0):
axis = self._get_axis_number(axis)
return rwindow.ewm(self, com=com, span=span, halflife=halflife,
- min_periods=min_periods, freq=freq,
+ alpha=alpha, min_periods=min_periods, freq=freq,
adjust=adjust, ignore_na=ignore_na, axis=axis)
cls.ewm = ewm
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 06f3e0409600e..c8598639d9fad 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1044,27 +1044,71 @@ def ohlc(self):
@Substitution(name='groupby')
@Appender(_doc_template)
- def resample(self, rule, **kwargs):
+ def resample(self, rule, how=None, fill_method=None, limit=None, **kwargs):
"""
Provide resampling when using a TimeGrouper
Return a new grouper with our resampler appended
"""
- from pandas.tseries.resample import TimeGrouper
+ from pandas.tseries.resample import (TimeGrouper,
+ _maybe_process_deprecations)
gpr = TimeGrouper(axis=self.axis, freq=rule, **kwargs)
# we by definition have at least 1 key as we are already a grouper
groupings = list(self.grouper.groupings)
groupings.append(gpr)
- return self.__class__(self.obj,
- keys=groupings,
- axis=self.axis,
- level=self.level,
- as_index=self.as_index,
- sort=self.sort,
- group_keys=self.group_keys,
- squeeze=self.squeeze,
- selection=self._selection)
+ result = self.__class__(self.obj,
+ keys=groupings,
+ axis=self.axis,
+ level=self.level,
+ as_index=self.as_index,
+ sort=self.sort,
+ group_keys=self.group_keys,
+ squeeze=self.squeeze,
+ selection=self._selection)
+
+ return _maybe_process_deprecations(result,
+ how=how,
+ fill_method=fill_method,
+ limit=limit)
+
+ @Substitution(name='groupby')
+ @Appender(_doc_template)
+ def pad(self, limit=None):
+ """
+ Forward fill the values
+
+ Parameters
+ ----------
+ limit : integer, optional
+ limit of how many values to fill
+
+ See Also
+ --------
+ Series.fillna
+ DataFrame.fillna
+ """
+ return self.apply(lambda x: x.ffill(limit=limit))
+ ffill = pad
+
+ @Substitution(name='groupby')
+ @Appender(_doc_template)
+ def backfill(self, limit=None):
+ """
+ Backward fill the values
+
+ Parameters
+ ----------
+ limit : integer, optional
+ limit of how many values to fill
+
+ See Also
+ --------
+ Series.fillna
+ DataFrame.fillna
+ """
+ return self.apply(lambda x: x.bfill(limit=limit))
+ bfill = backfill
@Substitution(name='groupby')
@Appender(_doc_template)
@@ -3346,9 +3390,9 @@ def _transform_general(self, func, *args, **kwargs):
path, res = self._choose_path(fast_path, slow_path, group)
except TypeError:
return self._transform_item_by_item(obj, fast_path)
- except Exception: # pragma: no cover
- res = fast_path(group)
- path = fast_path
+ except ValueError:
+ msg = 'transform must return a scalar value for each group'
+ raise ValueError(msg)
else:
res = path(group)
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index f0f5507bc3e85..b0dd2596fccd5 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -541,7 +541,7 @@ def can_do_equal_len():
if (len(indexer) > info_axis and
is_integer(indexer[info_axis]) and
all(is_null_slice(idx) for i, idx in enumerate(indexer)
- if i != info_axis)):
+ if i != info_axis) and item_labels.is_unique):
self.obj[item_labels[indexer[info_axis]]] = value
return
@@ -995,6 +995,10 @@ def _getitem_axis(self, key, axis=0):
return self._getitem_iterable(key, axis=axis)
else:
+
+ # maybe coerce a float scalar to integer
+ key = labels._maybe_cast_indexer(key)
+
if is_integer(key):
if axis == 0 and isinstance(labels, MultiIndex):
try:
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 0d9eccb882d03..484cb6afa77b2 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -702,7 +702,10 @@ def _is_empty_indexer(indexer):
values[indexer] = value
# coerce and try to infer the dtypes of the result
- if lib.isscalar(value):
+ if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
+ value.dtype):
+ dtype = value.dtype
+ elif lib.isscalar(value):
dtype, _ = _infer_dtype_from_scalar(value)
else:
dtype = 'infer'
@@ -714,8 +717,23 @@ def _is_empty_indexer(indexer):
block = block.convert(numeric=False)
return block
- except (ValueError, TypeError):
+ except ValueError:
raise
+ except TypeError:
+
+ # cast to the passed dtype if possible
+ # otherwise raise the original error
+ try:
+ # e.g. we are uint32 and our value is uint64
+ # this is for compat with older numpies
+ block = self.make_block(transf(values.astype(value.dtype)))
+ return block.setitem(indexer=indexer, value=value, mgr=mgr)
+
+ except:
+ pass
+
+ raise
+
except Exception:
pass
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 0abc154f467ab..adfbd6646b048 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -398,7 +398,7 @@ def to_sparse(self, fill_value=None, kind='block'):
y : SparseDataFrame
"""
from pandas.core.sparse import SparsePanel
- frames = dict(compat.iteritems(self))
+ frames = dict(self.iteritems())
return SparsePanel(frames, items=self.items,
major_axis=self.major_axis,
minor_axis=self.minor_axis, default_kind=kind,
@@ -450,7 +450,7 @@ def to_excel(self, path, na_rep='', engine=None, **kwargs):
writer = path
kwargs['na_rep'] = na_rep
- for item, df in compat.iteritems(self):
+ for item, df in self.iteritems():
name = str(item)
df.to_excel(writer, name, **kwargs)
writer.save()
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5eb1ab0f14ecf..d339a93a3ed9b 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1289,8 +1289,10 @@ def quantile(self, q=0.5, interpolation='linear'):
0 <= q <= 1, the quantile(s) to compute
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
.. versionadded:: 0.18.0
+
This optional parameter specifies the interpolation method to use,
when the desired quantile lies between two data points `i` and `j`:
+
* linear: `i + (j - i) * fraction`, where `fraction` is the
fractional part of the index surrounded by `i` and `j`.
* lower: `i`.
@@ -1306,15 +1308,15 @@ def quantile(self, q=0.5, interpolation='linear'):
Examples
--------
-
>>> s = Series([1, 2, 3, 4])
>>> s.quantile(.5)
- 2.5
+ 2.5
>>> s.quantile([.25, .5, .75])
0.25 1.75
0.50 2.50
0.75 3.25
dtype: float64
+
"""
self._check_percentile(q)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index c1ab46956c25f..a7ed1ba0c0be0 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -604,7 +604,7 @@ def str_extract(arr, pat, flags=0, expand=None):
return _str_extract_frame(arr._orig, pat, flags=flags)
else:
result, name = _str_extract_noexpand(arr._data, pat, flags=flags)
- return arr._wrap_result(result, name=name)
+ return arr._wrap_result(result, name=name, expand=expand)
def str_extractall(arr, pat, flags=0):
@@ -1292,7 +1292,10 @@ def __iter__(self):
i += 1
g = self.get(i)
- def _wrap_result(self, result, use_codes=True, name=None):
+ def _wrap_result(self, result, use_codes=True,
+ name=None, expand=None):
+
+ from pandas.core.index import Index, MultiIndex
# for category, we do the stuff on the categories, so blow it up
# to the full series again
@@ -1302,48 +1305,42 @@ def _wrap_result(self, result, use_codes=True, name=None):
if use_codes and self._is_categorical:
result = take_1d(result, self._orig.cat.codes)
- # leave as it is to keep extract and get_dummies results
- # can be merged to _wrap_result_expand in v0.17
- from pandas.core.series import Series
- from pandas.core.frame import DataFrame
- from pandas.core.index import Index
-
- if not hasattr(result, 'ndim'):
+ if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'):
return result
+ assert result.ndim < 3
- if result.ndim == 1:
- # Wait until we are sure result is a Series or Index before
- # checking attributes (GH 12180)
- name = name or getattr(result, 'name', None) or self._orig.name
- if isinstance(self._orig, Index):
- # if result is a boolean np.array, return the np.array
- # instead of wrapping it into a boolean Index (GH 8875)
- if is_bool_dtype(result):
- return result
- return Index(result, name=name)
- return Series(result, index=self._orig.index, name=name)
- else:
- assert result.ndim < 3
- return DataFrame(result, index=self._orig.index)
+ if expand is None:
+ # infer from ndim if expand is not specified
+ expand = False if result.ndim == 1 else True
+
+ elif expand is True and not isinstance(self._orig, Index):
+ # required when expand=True is explicitly specified
+ # not needed when infered
+
+ def cons_row(x):
+ if is_list_like(x):
+ return x
+ else:
+ return [x]
+
+ result = [cons_row(x) for x in result]
- def _wrap_result_expand(self, result, expand=False):
if not isinstance(expand, bool):
raise ValueError("expand must be True or False")
- # for category, we do the stuff on the categories, so blow it up
- # to the full series again
- if self._is_categorical:
- result = take_1d(result, self._orig.cat.codes)
-
- from pandas.core.index import Index, MultiIndex
- if not hasattr(result, 'ndim'):
- return result
+ if name is None:
+ name = getattr(result, 'name', None)
+ if name is None:
+ # do not use logical or, _orig may be a DataFrame
+ # which has "name" column
+ name = self._orig.name
+ # Wait until we are sure result is a Series or Index before
+ # checking attributes (GH 12180)
if isinstance(self._orig, Index):
- name = getattr(result, 'name', None)
# if result is a boolean np.array, return the np.array
# instead of wrapping it into a boolean Index (GH 8875)
- if hasattr(result, 'dtype') and is_bool_dtype(result):
+ if is_bool_dtype(result):
return result
if expand:
@@ -1354,18 +1351,10 @@ def _wrap_result_expand(self, result, expand=False):
else:
index = self._orig.index
if expand:
-
- def cons_row(x):
- if is_list_like(x):
- return x
- else:
- return [x]
-
cons = self._orig._constructor_expanddim
- data = [cons_row(x) for x in result]
- return cons(data, index=index)
+ return cons(result, index=index)
else:
- name = getattr(result, 'name', None)
+ # Must a Series
cons = self._orig._constructor
return cons(result, name=name, index=index)
@@ -1380,12 +1369,12 @@ def cat(self, others=None, sep=None, na_rep=None):
@copy(str_split)
def split(self, pat=None, n=-1, expand=False):
result = str_split(self._data, pat, n=n)
- return self._wrap_result_expand(result, expand=expand)
+ return self._wrap_result(result, expand=expand)
@copy(str_rsplit)
def rsplit(self, pat=None, n=-1, expand=False):
result = str_rsplit(self._data, pat, n=n)
- return self._wrap_result_expand(result, expand=expand)
+ return self._wrap_result(result, expand=expand)
_shared_docs['str_partition'] = ("""
Split the string at the %(side)s occurrence of `sep`, and return 3 elements
@@ -1440,7 +1429,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
def partition(self, pat=' ', expand=True):
f = lambda x: x.partition(pat)
result = _na_map(f, self._data)
- return self._wrap_result_expand(result, expand=expand)
+ return self._wrap_result(result, expand=expand)
@Appender(_shared_docs['str_partition'] % {
'side': 'last',
@@ -1451,7 +1440,7 @@ def partition(self, pat=' ', expand=True):
def rpartition(self, pat=' ', expand=True):
f = lambda x: x.rpartition(pat)
result = _na_map(f, self._data)
- return self._wrap_result_expand(result, expand=expand)
+ return self._wrap_result(result, expand=expand)
@copy(str_get)
def get(self, i):
@@ -1597,7 +1586,8 @@ def get_dummies(self, sep='|'):
# methods available for making the dummies...
data = self._orig.astype(str) if self._is_categorical else self._data
result = str_get_dummies(data, sep)
- return self._wrap_result(result, use_codes=(not self._is_categorical))
+ return self._wrap_result(result, use_codes=(not self._is_categorical),
+ expand=True)
@copy(str_translate)
def translate(self, table, deletechars=None):
diff --git a/pandas/core/style.py b/pandas/core/style.py
index 15fcec118e7d4..f66ac7485c76e 100644
--- a/pandas/core/style.py
+++ b/pandas/core/style.py
@@ -215,7 +215,7 @@ def _translate(self):
"class": " ".join(cs)})
head.append(row_es)
- if self.data.index.names:
+ if self.data.index.names and self.data.index.names != [None]:
index_header_row = []
for c, name in enumerate(self.data.index.names):
@@ -281,7 +281,7 @@ def format(self, formatter, subset=None):
----------
formatter: str, callable, or dict
subset: IndexSlice
- A argument to DataFrame.loc that restricts which elements
+ An argument to ``DataFrame.loc`` that restricts which elements
``formatter`` is applied to.
Returns
@@ -352,7 +352,7 @@ def render(self):
``Styler`` objects have defined the ``_repr_html_`` method
which automatically calls ``self.render()`` when it's the
last item in a Notebook cell. When calling ``Styler.render()``
- directly, wrap the resul in ``IPython.display.HTML`` to view
+ directly, wrap the result in ``IPython.display.HTML`` to view
the rendered HTML in the notebook.
"""
self._compute()
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 9c8490f608996..9964580b5b09b 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -124,13 +124,17 @@ def _dir_additions(self):
def _get_window(self, other=None):
return self.window
+ @property
+ def _window_type(self):
+ return self.__class__.__name__
+
def __unicode__(self):
""" provide a nice str repr of our rolling object """
attrs = ["{k}={v}".format(k=k, v=getattr(self, k))
for k in self._attributes
if getattr(self, k, None) is not None]
- return "{klass} [{attrs}]".format(klass=self.__class__.__name__,
+ return "{klass} [{attrs}]".format(klass=self._window_type,
attrs=','.join(attrs))
def _shallow_copy(self, obj=None, **kwargs):
@@ -144,21 +148,27 @@ def _shallow_copy(self, obj=None, **kwargs):
kwargs[attr] = getattr(self, attr)
return self._constructor(obj, **kwargs)
- def _prep_values(self, values=None, kill_inf=True, how=None):
+ def _prep_values(self, values=None, kill_inf=True, how=None,
+ as_float=True):
if values is None:
values = getattr(self._selected_obj, 'values', self._selected_obj)
- # coerce dtypes as appropriate
- if com.is_float_dtype(values.dtype):
- pass
- elif com.is_integer_dtype(values.dtype):
- values = values.astype(float)
- elif com.is_timedelta64_dtype(values.dtype):
- values = values.view('i8').astype(float)
- else:
+ # GH #12373 : rolling functions error on float32 data
+ # make sure the data is coerced to float64
+ if com.is_float_dtype(values.dtype) and as_float:
+ values = com._ensure_float64(values)
+ elif com.is_integer_dtype(values.dtype) and as_float:
+ values = com._ensure_float64(values)
+ elif com.needs_i8_conversion(values.dtype):
+ raise NotImplementedError("ops for {action} for this "
+ "dtype {dtype} are not "
+ "implemented".format(
+ action=self._window_type,
+ dtype=values.dtype))
+ elif as_float:
try:
- values = values.astype(float)
+ values = com._ensure_float64(values)
except (ValueError, TypeError):
raise TypeError("cannot handle this type -> {0}"
"".format(values.dtype))
@@ -198,6 +208,7 @@ def _wrap_results(self, results, blocks, obj):
results : list of ndarrays
blocks : list of blocks
obj : conformed data (may be resampled)
+ as_float: bool, cast results to float
"""
final = []
@@ -408,7 +419,7 @@ def _constructor(self):
return Rolling
def _apply(self, func, window=None, center=None, check_minp=None, how=None,
- **kwargs):
+ as_float=True, **kwargs):
"""
Rolling statistical measure using supplied function. Designed to be
used with passed-in Cython array-based functions.
@@ -421,6 +432,8 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None,
check_minp : function, default to _use_window
how : string, default to None (DEPRECATED)
how to resample
+ as_float: bool, default to True
+ Cast result to float, otherwise return as original type
Returns
-------
@@ -438,7 +451,7 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None,
results = []
for b in blocks:
try:
- values = self._prep_values(b.values)
+ values = self._prep_values(b.values, as_float=as_float)
except TypeError:
results.append(b.values.copy())
continue
@@ -457,7 +470,9 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None,
def func(arg, window, min_periods=None):
minp = check_minp(min_periods, window)
- return cfunc(arg, window, minp, **kwargs)
+ # GH #12373: rolling functions error on float32 data
+ return cfunc(com._ensure_float64(arg),
+ window, minp, **kwargs)
# calculation function
if center:
@@ -494,15 +509,26 @@ def count(self):
obj = self._convert_freq()
window = self._get_window()
window = min(window, len(obj)) if not self.center else window
- try:
- converted = np.isfinite(obj).astype(float)
- except TypeError:
- converted = np.isfinite(obj.astype(float)).astype(float)
- result = self._constructor(converted, window=window, min_periods=0,
- center=self.center).sum()
-
- result[result.isnull()] = 0
- return result
+
+ blocks, obj = self._create_blocks(how=None)
+ results = []
+ for b in blocks:
+
+ if com.needs_i8_conversion(b.values):
+ result = b.notnull().astype(int)
+ else:
+ try:
+ result = np.isfinite(b).astype(float)
+ except TypeError:
+ result = np.isfinite(b.astype(float)).astype(float)
+
+ result[pd.isnull(result)] = 0
+
+ result = self._constructor(result, window=window, min_periods=0,
+ center=self.center).sum()
+ results.append(result)
+
+ return self._wrap_results(results, blocks, obj)
_shared_docs['apply'] = dedent("""
%(name)s function apply
@@ -535,12 +561,14 @@ def sum(self, **kwargs):
Parameters
----------
how : string, default 'max' (DEPRECATED)
- Method for down- or re-sampling""")
+ Method for down- or re-sampling
+ as_float : bool, default True
+ Cast to float, otherwise return as original type""")
- def max(self, how=None, **kwargs):
+ def max(self, how=None, as_float=True, **kwargs):
if self.freq is not None and how is None:
how = 'max'
- return self._apply('roll_max', how=how, **kwargs)
+ return self._apply('roll_max', how=how, as_float=as_float, **kwargs)
_shared_docs['min'] = dedent("""
%(name)s minimum
@@ -548,12 +576,14 @@ def max(self, how=None, **kwargs):
Parameters
----------
how : string, default 'min' (DEPRECATED)
- Method for down- or re-sampling""")
+ Method for down- or re-sampling
+ as_float : bool, default True
+ Cast to float, otherwise return as original type""")
- def min(self, how=None, **kwargs):
+ def min(self, how=None, as_float=True, **kwargs):
if self.freq is not None and how is None:
how = 'min'
- return self._apply('roll_min', how=how, **kwargs)
+ return self._apply('roll_min', how=how, as_float=as_float, **kwargs)
def mean(self, **kwargs):
return self._apply('roll_mean', **kwargs)
@@ -657,6 +687,10 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
window = self._get_window(other)
def _get_cov(X, Y):
+ # GH #12373 : rolling functions error on float32 data
+ # to avoid potential overflow, cast the data to float64
+ X = X.astype('float64')
+ Y = Y.astype('float64')
mean = lambda x: x.rolling(window, self.min_periods,
center=self.center).mean(**kwargs)
count = (X + Y).rolling(window=window,
@@ -1012,13 +1046,21 @@ class EWM(_Rolling):
Parameters
----------
- com : float. optional
- Center of mass: :math:`\alpha = 1 / (1 + com)`,
+ com : float, optional
+ Specify decay in terms of center of mass,
+ :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`
span : float, optional
- Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)`
+ Specify decay in terms of span,
+ :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`
halflife : float, optional
- Specify decay in terms of halflife,
- :math:`\alpha = 1 - exp(log(0.5) / halflife)`
+ Specify decay in terms of half-life,
+ :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0`
+ alpha : float, optional
+ Specify smoothing factor :math:`\alpha` directly,
+ :math:`0 < \alpha \leq 1`
+
+ .. versionadded:: 0.18.0
+
min_periods : int, default 0
Minimum number of observations in window required to have a value
(otherwise result is NA).
@@ -1037,16 +1079,10 @@ class EWM(_Rolling):
Notes
-----
- Either center of mass, span or halflife must be specified
-
- EWMA is sometimes specified using a "span" parameter `s`, we have that the
- decay parameter :math:`\alpha` is related to the span as
- :math:`\alpha = 2 / (s + 1) = 1 / (1 + c)`
-
- where `c` is the center of mass. Given a span, the associated center of
- mass is :math:`c = (s - 1) / 2`
-
- So a "20-day EWMA" would have center 9.5.
+ Exactly one of center of mass, span, half-life, and alpha must be provided.
+ Allowed values and relationship between the parameters are specified in the
+ parameter descriptions above; see the link at the end of this section for
+ a detailed explanation.
The `freq` keyword is used to conform time series data to a specified
frequency by resampling the data. This is done with the default parameters
@@ -1070,14 +1106,15 @@ class EWM(_Rolling):
(if adjust is True), and 1-alpha and alpha (if adjust is False).
More details can be found at
- http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions
+ http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
"""
_attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis']
- def __init__(self, obj, com=None, span=None, halflife=None, min_periods=0,
- freq=None, adjust=True, ignore_na=False, axis=0):
+ def __init__(self, obj, com=None, span=None, halflife=None, alpha=None,
+ min_periods=0, freq=None, adjust=True, ignore_na=False,
+ axis=0):
self.obj = obj
- self.com = _get_center_of_mass(com, span, halflife)
+ self.com = _get_center_of_mass(com, span, halflife, alpha)
self.min_periods = min_periods
self.freq = freq
self.adjust = adjust
@@ -1294,20 +1331,32 @@ def dataframe_from_int_dict(data, frame_template):
return _flex_binary_moment(arg2, arg1, f)
-def _get_center_of_mass(com, span, halflife):
- valid_count = len([x for x in [com, span, halflife] if x is not None])
+def _get_center_of_mass(com, span, halflife, alpha):
+ valid_count = len([x for x in [com, span, halflife, alpha]
+ if x is not None])
if valid_count > 1:
- raise Exception("com, span, and halflife are mutually exclusive")
-
- if span is not None:
- # convert span to center of mass
+ raise ValueError("com, span, halflife, and alpha "
+ "are mutually exclusive")
+
+ # Convert to center of mass; domain checks ensure 0 < alpha <= 1
+ if com is not None:
+ if com < 0:
+ raise ValueError("com must satisfy: com >= 0")
+ elif span is not None:
+ if span < 1:
+ raise ValueError("span must satisfy: span >= 1")
com = (span - 1) / 2.
elif halflife is not None:
- # convert halflife to center of mass
+ if halflife <= 0:
+ raise ValueError("halflife must satisfy: halflife > 0")
decay = 1 - np.exp(np.log(0.5) / halflife)
com = 1 / decay - 1
- elif com is None:
- raise Exception("Must pass one of com, span, or halflife")
+ elif alpha is not None:
+ if alpha <= 0 or alpha > 1:
+ raise ValueError("alpha must satisfy: 0 < alpha <= 1")
+ com = (1.0 - alpha) / alpha
+ else:
+ raise ValueError("Must pass one of com, span, halflife, or alpha")
return float(com)
diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
index 8a679b1575e26..852cddc456213 100644
--- a/pandas/indexes/base.py
+++ b/pandas/indexes/base.py
@@ -902,6 +902,7 @@ def _mpl_repr(self):
_na_value = np.nan
"""The expected NA value to use with this index."""
+ # introspection
@property
def is_monotonic(self):
""" alias for is_monotonic_increasing (deprecated) """
@@ -954,11 +955,12 @@ def is_categorical(self):
return self.inferred_type in ['categorical']
def is_mixed(self):
- return 'mixed' in self.inferred_type
+ return self.inferred_type in ['mixed']
def holds_integer(self):
return self.inferred_type in ['integer', 'mixed-integer']
+ # validate / convert indexers
def _convert_scalar_indexer(self, key, kind=None):
"""
convert a scalar indexer
@@ -966,44 +968,42 @@ def _convert_scalar_indexer(self, key, kind=None):
Parameters
----------
key : label of the slice bound
- kind : optional, type of the indexing operation (loc/ix/iloc/None)
-
- right now we are converting
+ kind : {'ix', 'loc', 'getitem', 'iloc'} or None
"""
+ assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
+
if kind == 'iloc':
- if is_integer(key):
- return key
- return self._invalid_indexer('positional', key)
- else:
+ return self._validate_indexer('positional', key, kind)
- if len(self):
-
- # we can safely disallow
- # if we are not a MultiIndex
- # or a Float64Index
- # or have mixed inferred type (IOW we have the possiblity
- # of a float in with say strings)
- if is_float(key):
- if not (isinstance(self, ABCMultiIndex,) or
- self.is_floating() or self.is_mixed()):
- return self._invalid_indexer('label', key)
-
- # we can disallow integers with loc
- # if could not contain and integer
- elif is_integer(key) and kind == 'loc':
- if not (isinstance(self, ABCMultiIndex,) or
- self.holds_integer() or self.is_mixed()):
- return self._invalid_indexer('label', key)
+ if len(self) and not isinstance(self, ABCMultiIndex,):
- return key
+ # we can raise here if we are definitive that this
+ # is positional indexing (eg. .ix on with a float)
+ # or label indexing if we are using a type able
+ # to be represented in the index
- def _convert_slice_indexer_getitem(self, key, is_index_slice=False):
- """ called from the getitem slicers, determine how to treat the key
- whether positional or not """
- if self.is_integer() or is_index_slice:
- return key
- return self._convert_slice_indexer(key)
+ if kind in ['getitem', 'ix'] and is_float(key):
+ if not self.is_floating():
+ return self._invalid_indexer('label', key)
+
+ elif kind in ['loc'] and is_float(key):
+
+ # we want to raise KeyError on string/mixed here
+ # technically we *could* raise a TypeError
+ # on anything but mixed though
+ if self.inferred_type not in ['floating',
+ 'mixed-integer-float',
+ 'string',
+ 'unicode',
+ 'mixed']:
+ return self._invalid_indexer('label', key)
+
+ elif kind in ['loc'] and is_integer(key):
+ if not self.holds_integer():
+ return self._invalid_indexer('label', key)
+
+ return key
def _convert_slice_indexer(self, key, kind=None):
"""
@@ -1012,8 +1012,9 @@ def _convert_slice_indexer(self, key, kind=None):
Parameters
----------
key : label of the slice bound
- kind : optional, type of the indexing operation (loc/ix/iloc/None)
+ kind : {'ix', 'loc', 'getitem', 'iloc'} or None
"""
+ assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
# if we are not a slice, then we are done
if not isinstance(key, slice):
@@ -1021,38 +1022,14 @@ def _convert_slice_indexer(self, key, kind=None):
# validate iloc
if kind == 'iloc':
+ return slice(self._validate_indexer('slice', key.start, kind),
+ self._validate_indexer('slice', key.stop, kind),
+ self._validate_indexer('slice', key.step, kind))
- # need to coerce to_int if needed
- def f(c):
- v = getattr(key, c)
- if v is None or is_integer(v):
- return v
- self._invalid_indexer('slice {0} value'.format(c), v)
-
- return slice(*[f(c) for c in ['start', 'stop', 'step']])
-
- # validate slicers
- def validate(v):
- if v is None or is_integer(v):
- return True
-
- # dissallow floats (except for .ix)
- elif is_float(v):
- if kind == 'ix':
- return True
-
- return False
-
- return True
-
- for c in ['start', 'stop', 'step']:
- v = getattr(key, c)
- if not validate(v):
- self._invalid_indexer('slice {0} value'.format(c), v)
-
- # figure out if this is a positional indexer
+ # potentially cast the bounds to integers
start, stop, step = key.start, key.stop, key.step
+ # figure out if this is a positional indexer
def is_int(v):
return v is None or is_integer(v)
@@ -1061,8 +1038,14 @@ def is_int(v):
is_positional = is_index_slice and not self.is_integer()
if kind == 'getitem':
- return self._convert_slice_indexer_getitem(
- key, is_index_slice=is_index_slice)
+ """
+ called from the getitem slicers, validate that we are in fact
+ integers
+ """
+ if self.is_integer() or is_index_slice:
+ return slice(self._validate_indexer('slice', key.start, kind),
+ self._validate_indexer('slice', key.stop, kind),
+ self._validate_indexer('slice', key.step, kind))
# convert the slice to an indexer here
@@ -1889,7 +1872,10 @@ def get_loc(self, key, method=None, tolerance=None):
raise ValueError('tolerance argument only valid if using pad, '
'backfill or nearest lookups')
key = _values_from_object(key)
- return self._engine.get_loc(key)
+ try:
+ return self._engine.get_loc(key)
+ except KeyError:
+ return self._engine.get_loc(self._maybe_cast_indexer(key))
indexer = self.get_indexer([key], method=method, tolerance=tolerance)
if indexer.ndim > 1 or indexer.size > 1:
@@ -2721,6 +2707,37 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
return slice(start_slice, end_slice, step)
+ def _maybe_cast_indexer(self, key):
+ """
+ If we have a float key and are not a floating index
+ then try to cast to an int if equivalent
+ """
+
+ if is_float(key) and not self.is_floating():
+ try:
+ ckey = int(key)
+ if ckey == key:
+ key = ckey
+ except (ValueError, TypeError):
+ pass
+ return key
+
+ def _validate_indexer(self, form, key, kind):
+ """
+ if we are positional indexer
+ validate that we have appropriate typed bounds
+ must be an integer
+ """
+ assert kind in ['ix', 'loc', 'getitem', 'iloc']
+
+ if key is None:
+ pass
+ elif is_integer(key):
+ pass
+ elif kind in ['iloc', 'getitem']:
+ self._invalid_indexer(form, key)
+ return key
+
def _maybe_cast_slice_bound(self, label, side, kind):
"""
This function should be overloaded in subclasses that allow non-trivial
@@ -2731,7 +2748,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
----------
label : object
side : {'left', 'right'}
- kind : string / None
+ kind : {'ix', 'loc', 'getitem'}
Returns
-------
@@ -2742,6 +2759,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
Value of `side` parameter should be validated in caller.
"""
+ assert kind in ['ix', 'loc', 'getitem', None]
# We are a plain index here (sub-class override this method if they
# wish to have special treatment for floats/ints, e.g. Float64Index and
@@ -2783,9 +2801,11 @@ def get_slice_bound(self, label, side, kind):
----------
label : object
side : {'left', 'right'}
- kind : string / None, the type of indexer
+ kind : {'ix', 'loc', 'getitem'}
"""
+ assert kind in ['ix', 'loc', 'getitem', None]
+
if side not in ('left', 'right'):
raise ValueError("Invalid value for side kwarg,"
" must be either 'left' or 'right': %s" %
@@ -2841,7 +2861,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
If None, defaults to the end
step : int, defaults None
If None, defaults to 1
- kind : string, defaults None
+ kind : {'ix', 'loc', 'getitem'} or None
Returns
-------
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index fea153b2de391..d14568ceca258 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -1409,6 +1409,7 @@ def _tuple_index(self):
return Index(self._values)
def get_slice_bound(self, label, side, kind):
+
if not isinstance(label, tuple):
label = label,
return self._partial_tup_index(label, side=side)
@@ -1743,7 +1744,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
# we have a partial slice (like looking up a partial date
# string)
start = stop = level_index.slice_indexer(key.start, key.stop,
- key.step)
+ key.step, kind='loc')
step = start.step
if isinstance(start, slice) or isinstance(stop, slice):
diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py
index 0c102637ab70d..4b021c51456b9 100644
--- a/pandas/indexes/numeric.py
+++ b/pandas/indexes/numeric.py
@@ -7,6 +7,7 @@
from pandas.indexes.base import Index, InvalidIndexError
from pandas.util.decorators import Appender, cache_readonly
import pandas.core.common as com
+from pandas.core.common import is_dtype_equal, isnull
import pandas.indexes.base as ibase
@@ -29,7 +30,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
----------
label : object
side : {'left', 'right'}
- kind : string / None
+ kind : {'ix', 'loc', 'getitem'}
Returns
-------
@@ -40,18 +41,10 @@ def _maybe_cast_slice_bound(self, label, side, kind):
Value of `side` parameter should be validated in caller.
"""
+ assert kind in ['ix', 'loc', 'getitem', None]
- # we are a numeric index, so we accept
- # integer directly
- if com.is_integer(label):
- pass
-
- # disallow floats only if we not-strict
- elif com.is_float(label):
- if not (self.is_floating() or kind in ['ix']):
- self._invalid_indexer('slice', label)
-
- return label
+ # we will try to coerce to integers
+ return self._maybe_cast_indexer(label)
def _convert_tolerance(self, tolerance):
try:
@@ -140,6 +133,24 @@ def is_all_dates(self):
"""
return False
+ def _convert_scalar_indexer(self, key, kind=None):
+ """
+ convert a scalar indexer
+
+ Parameters
+ ----------
+ key : label of the slice bound
+ kind : {'ix', 'loc', 'getitem'} or None
+ """
+
+ assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
+
+ # don't coerce ilocs to integers
+ if kind != 'iloc':
+ key = self._maybe_cast_indexer(key)
+ return (super(Int64Index, self)
+ ._convert_scalar_indexer(key, kind=kind))
+
def equals(self, other):
"""
Determines if two Index objects contain the same elements.
@@ -247,18 +258,13 @@ def _convert_scalar_indexer(self, key, kind=None):
Parameters
----------
key : label of the slice bound
- kind : optional, type of the indexing operation (loc/ix/iloc/None)
-
- right now we are converting
- floats -> ints if the index supports it
+ kind : {'ix', 'loc', 'getitem'} or None
"""
- if kind == 'iloc':
- if com.is_integer(key):
- return key
+ assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
- return (super(Float64Index, self)
- ._convert_scalar_indexer(key, kind=kind))
+ if kind == 'iloc':
+ return self._validate_indexer('positional', key, kind)
return key
@@ -282,7 +288,7 @@ def _convert_slice_indexer(self, key, kind=None):
kind=kind)
# translate to locations
- return self.slice_indexer(key.start, key.stop, key.step)
+ return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
def _format_native_types(self, na_rep='', float_format=None, decimal='.',
quoting=None, **kwargs):
@@ -324,7 +330,7 @@ def equals(self, other):
try:
if not isinstance(other, Float64Index):
other = self._constructor(other)
- if (not com.is_dtype_equal(self.dtype, other.dtype) or
+ if (not is_dtype_equal(self.dtype, other.dtype) or
self.shape != other.shape):
return False
left, right = self._values, other._values
@@ -380,7 +386,7 @@ def isin(self, values, level=None):
if level is not None:
self._validate_index_level(level)
return lib.ismember_nans(np.array(self), value_set,
- com.isnull(list(value_set)).any())
+ isnull(list(value_set)).any())
Float64Index._add_numeric_methods()
diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py
index 0bed2ec231dbe..4b06af9240436 100644
--- a/pandas/indexes/range.py
+++ b/pandas/indexes/range.py
@@ -487,8 +487,8 @@ def __getitem__(self, key):
stop = l
# delegate non-integer slices
- if (start != int(start) and
- stop != int(stop) and
+ if (start != int(start) or
+ stop != int(stop) or
step != int(step)):
return super_getitem(key)
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 8c9c348b9a11c..be8c3ccfe08e6 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -146,6 +146,10 @@ def readline(self):
except ImportError:
# boto is only needed for reading from S3.
pass
+except TypeError:
+ # boto/boto3 issues
+ # GH11915
+ pass
def _is_url(url):
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index c7481a953e47b..e706434f29dc5 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -50,7 +50,6 @@ def _test_google_api_imports():
from apiclient.errors import HttpError # noqa
from oauth2client.client import AccessTokenRefreshError # noqa
from oauth2client.client import OAuth2WebServerFlow # noqa
- from oauth2client.client import SignedJwtAssertionCredentials # noqa
from oauth2client.file import Storage # noqa
from oauth2client.tools import run_flow, argparser # noqa
except ImportError as e:
@@ -179,7 +178,30 @@ def get_user_account_credentials(self):
return credentials
def get_service_account_credentials(self):
- from oauth2client.client import SignedJwtAssertionCredentials
+ # Bug fix for https://github.com/pydata/pandas/issues/12572
+ # We need to know that a supported version of oauth2client is installed
+ # Test that either of the following is installed:
+ # - SignedJwtAssertionCredentials from oauth2client.client
+ # - ServiceAccountCredentials from oauth2client.service_account
+ # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0
+ # ServiceAccountCredentials is available in oauthclient >= 2.0.0
+ oauth2client_v1 = True
+ oauth2client_v2 = True
+
+ try:
+ from oauth2client.client import SignedJwtAssertionCredentials
+ except ImportError:
+ oauth2client_v1 = False
+
+ try:
+ from oauth2client.service_account import ServiceAccountCredentials
+ except ImportError:
+ oauth2client_v2 = False
+
+ if not oauth2client_v1 and not oauth2client_v2:
+ raise ImportError("Missing oauth2client required for BigQuery "
+ "service account support")
+
from os.path import isfile
try:
@@ -197,11 +219,16 @@ def get_service_account_credentials(self):
json_key['private_key'] = bytes(
json_key['private_key'], 'UTF-8')
- return SignedJwtAssertionCredentials(
- json_key['client_email'],
- json_key['private_key'],
- self.scope,
- )
+ if oauth2client_v1:
+ return SignedJwtAssertionCredentials(
+ json_key['client_email'],
+ json_key['private_key'],
+ self.scope,
+ )
+ else:
+ return ServiceAccountCredentials.from_json_keyfile_dict(
+ json_key,
+ self.scope)
except (KeyError, ValueError, TypeError, AttributeError):
raise InvalidPrivateKeyFormat(
"Private key is missing or invalid. It should be service "
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index f7b38c75a24b9..2604b6e0784cf 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -121,6 +121,7 @@ class ParserWarning(Warning):
If True, skip over blank lines rather than interpreting as NaN values
parse_dates : boolean or list of ints or names or list of lists or dict, \
default False
+
* boolean. If True -> try parsing the index.
* list of ints or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
each as a separate date column.
@@ -128,6 +129,7 @@ class ParserWarning(Warning):
a single date column.
* dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result
'foo'
+
Note: A fast-path exists for iso8601-formatted dates.
infer_datetime_format : boolean, default False
If True and parse_dates is enabled for a column, attempt to infer
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index c94b387f7554a..14881e0fb5a54 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2726,7 +2726,7 @@ def write(self, obj, **kwargs):
self.attrs.default_kind = obj.default_kind
self.write_index('items', obj.items)
- for name, sdf in compat.iteritems(obj):
+ for name, sdf in obj.iteritems():
key = 'sparse_frame_%s' % name
if key not in self.group._v_children:
node = self._handle.create_group(self.group, key)
diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py
index c293ef5c9c2f6..d9c09fa788332 100644
--- a/pandas/io/tests/test_data.py
+++ b/pandas/io/tests/test_data.py
@@ -304,6 +304,7 @@ def setUpClass(cls):
super(TestYahooOptions, cls).setUpClass()
_skip_if_no_lxml()
_skip_if_no_bs()
+ raise nose.SkipTest('unreliable test')
# aapl has monthlies
cls.aapl = web.Options('aapl', 'yahoo')
@@ -370,6 +371,7 @@ def test_get_expiry_dates(self):
@network
def test_get_all_data(self):
+
try:
data = self.aapl.get_all_data(put=True)
except RemoteDataError as e:
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
index 5a1c2d63af365..865b7e8d689c0 100644
--- a/pandas/io/tests/test_gbq.py
+++ b/pandas/io/tests/test_gbq.py
@@ -77,7 +77,6 @@ def _test_imports():
from oauth2client.client import OAuth2WebServerFlow # noqa
from oauth2client.client import AccessTokenRefreshError # noqa
- from oauth2client.client import SignedJwtAssertionCredentials # noqa
from oauth2client.file import Storage # noqa
from oauth2client.tools import run_flow # noqa
@@ -115,6 +114,30 @@ def _test_imports():
raise ImportError(
"pandas requires httplib2 for Google BigQuery support")
+ # Bug fix for https://github.com/pydata/pandas/issues/12572
+ # We need to know that a supported version of oauth2client is installed
+ # Test that either of the following is installed:
+ # - SignedJwtAssertionCredentials from oauth2client.client
+ # - ServiceAccountCredentials from oauth2client.service_account
+ # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0
+ # ServiceAccountCredentials is available in oauthclient >= 2.0.0
+ oauth2client_v1 = True
+ oauth2client_v2 = True
+
+ try:
+ from oauth2client.client import SignedJwtAssertionCredentials # noqa
+ except ImportError:
+ oauth2client_v1 = False
+
+ try:
+ from oauth2client.service_account import ServiceAccountCredentials # noqa
+ except ImportError:
+ oauth2client_v2 = False
+
+ if not oauth2client_v1 and not oauth2client_v2:
+ raise ImportError("Missing oauth2client required for BigQuery "
+ "service account support")
+
def test_requirements():
try:
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index d3020e337322b..f32dfd37e837c 100755
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2635,6 +2635,26 @@ def test_eof_states(self):
self.assertRaises(Exception, self.read_csv,
StringIO(data), escapechar='\\')
+ def test_grow_boundary_at_cap(self):
+ # See gh-12494
+ #
+ # Cause of error was the fact that pandas
+ # was not increasing the buffer size when
+ # the desired space would fill the buffer
+ # to capacity, which later would cause a
+ # buffer overflow error when checking the
+ # EOF terminator of the CSV stream
+ def test_empty_header_read(count):
+ s = StringIO(',' * count)
+ expected = DataFrame(columns=[
+ 'Unnamed: {i}'.format(i=i)
+ for i in range(count + 1)])
+ df = read_csv(s)
+ tm.assert_frame_equal(df, expected)
+
+ for count in range(1, 101):
+ test_empty_header_read(count)
+
class TestPythonParser(ParserTests, tm.TestCase):
diff --git a/pandas/rpy/__init__.py b/pandas/rpy/__init__.py
index 8c92ce5842e15..b771a3d8374a3 100644
--- a/pandas/rpy/__init__.py
+++ b/pandas/rpy/__init__.py
@@ -2,6 +2,8 @@
# GH9602
# deprecate rpy to instead directly use rpy2
+# flake8: noqa
+
import warnings
warnings.warn("The pandas.rpy module is deprecated and will be "
"removed in a future version. We refer to external packages "
diff --git a/pandas/rpy/base.py b/pandas/rpy/base.py
index 4cd86d3c3f4e3..ac339dd366b0b 100644
--- a/pandas/rpy/base.py
+++ b/pandas/rpy/base.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+
import pandas.rpy.util as util
diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py
index 55adad3610816..95a072154dc68 100644
--- a/pandas/rpy/common.py
+++ b/pandas/rpy/common.py
@@ -2,6 +2,9 @@
Utilities for making working with rpy2 more user- and
developer-friendly.
"""
+
+# flake8: noqa
+
from __future__ import print_function
from distutils.version import LooseVersion
diff --git a/pandas/rpy/tests/test_common.py b/pandas/rpy/tests/test_common.py
index 4b579e9263742..c3f09e21b1545 100644
--- a/pandas/rpy/tests/test_common.py
+++ b/pandas/rpy/tests/test_common.py
@@ -2,6 +2,8 @@
Testing that functions from rpy work as expected
"""
+# flake8: noqa
+
import pandas as pd
import numpy as np
import unittest
diff --git a/pandas/rpy/vars.py b/pandas/rpy/vars.py
index 4756b2779224c..2073b47483141 100644
--- a/pandas/rpy/vars.py
+++ b/pandas/rpy/vars.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+
import pandas.rpy.util as util
diff --git a/pandas/sandbox/qtpandas.py b/pandas/sandbox/qtpandas.py
index 2655aa5a452c8..4f4d77bcdf268 100644
--- a/pandas/sandbox/qtpandas.py
+++ b/pandas/sandbox/qtpandas.py
@@ -4,6 +4,8 @@
@author: Jev Kuznetsov
'''
+# flake8: noqa
+
# GH9615
import warnings
diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py
index be4ce716a5a37..25b0e11448e97 100644
--- a/pandas/sparse/panel.py
+++ b/pandas/sparse/panel.py
@@ -393,7 +393,7 @@ def _combine(self, other, func, axis=0):
return self._combinePanel(other, func)
elif lib.isscalar(other):
new_frames = dict((k, func(v, other))
- for k, v in compat.iteritems(self))
+ for k, v in self.iteritems())
return self._new_like(new_frames)
def _combineFrame(self, other, func, axis=0):
@@ -470,7 +470,7 @@ def major_xs(self, key):
y : DataFrame
index -> minor axis, columns -> items
"""
- slices = dict((k, v.xs(key)) for k, v in compat.iteritems(self))
+ slices = dict((k, v.xs(key)) for k, v in self.iteritems())
return DataFrame(slices, index=self.minor_axis, columns=self.items)
def minor_xs(self, key):
@@ -487,7 +487,7 @@ def minor_xs(self, key):
y : SparseDataFrame
index -> major axis, columns -> items
"""
- slices = dict((k, v[key]) for k, v in compat.iteritems(self))
+ slices = dict((k, v[key]) for k, v in self.iteritems())
return SparseDataFrame(slices, index=self.major_axis,
columns=self.items,
default_fill_value=self.default_fill_value,
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
index 24a73b3825a70..dc66e01ac3f78 100644
--- a/pandas/sparse/tests/test_sparse.py
+++ b/pandas/sparse/tests/test_sparse.py
@@ -104,7 +104,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True):
def assert_sp_panel_equal(left, right, exact_indices=True):
- for item, frame in compat.iteritems(left):
+ for item, frame in left.iteritems():
assert (item in right)
# trade-off?
assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices)
diff --git a/pandas/src/datetime/np_datetime_strings.c b/pandas/src/datetime/np_datetime_strings.c
index 33ddc6c6e1f27..3a1d37f86cc28 100644
--- a/pandas/src/datetime/np_datetime_strings.c
+++ b/pandas/src/datetime/np_datetime_strings.c
@@ -355,6 +355,8 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
* + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
* + Accepts special values "NaT" (not a time), "Today", (current
* day according to local time) and "Now" (current time in UTC).
+ * + ':' separator between hours, minutes, and seconds is optional. When
+ * omitted, each component must be 2 digits if it appears. (GH-10041)
*
* 'str' must be a NULL-terminated string, and 'len' must be its length.
* 'unit' should contain -1 if the unit is unknown, or the unit
@@ -394,15 +396,21 @@ parse_iso_8601_datetime(char *str, int len,
char *substr, sublen;
PANDAS_DATETIMEUNIT bestunit;
- /* if date components in are separated by one of valid separators
- * months/days without leadings 0s will be parsed
+ /* If year-month-day are separated by a valid separator,
+ * months/days without leading zeroes will be parsed
* (though not iso8601). If the components aren't separated,
- * an error code will be retuned because the date is ambigous
+ * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
+ * forbidden here (but parsed as YYMMDD elsewhere).
*/
- int has_sep = 0;
- char sep = '\0';
- char valid_sep[] = {'-', '.', '/', '\\', ' '};
- int valid_sep_len = 5;
+ int has_ymd_sep = 0;
+ char ymd_sep = '\0';
+ char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '};
+ int valid_ymd_sep_len = sizeof(valid_ymd_sep);
+
+ /* hour-minute-second may or may not separated by ':'. If not, then
+ * each component must be 2 digits. */
+ int has_hms_sep = 0;
+ int hour_was_2_digits = 0;
/* Initialize the output to all zeros */
memset(out, 0, sizeof(pandas_datetimestruct));
@@ -550,7 +558,7 @@ parse_iso_8601_datetime(char *str, int len,
/* Check whether it's a leap-year */
year_leap = is_leapyear(out->year);
- /* Next character must be a separator, start of month or end */
+ /* Next character must be a separator, start of month, or end of string */
if (sublen == 0) {
if (out_local != NULL) {
*out_local = 0;
@@ -558,59 +566,50 @@ parse_iso_8601_datetime(char *str, int len,
bestunit = PANDAS_FR_Y;
goto finish;
}
- else if (!isdigit(*substr)) {
- for (i = 0; i < valid_sep_len; ++i) {
- if (*substr == valid_sep[i]) {
- has_sep = 1;
- sep = valid_sep[i];
- ++substr;
- --sublen;
+
+ if (!isdigit(*substr)) {
+ for (i = 0; i < valid_ymd_sep_len; ++i) {
+ if (*substr == valid_ymd_sep[i]) {
break;
}
}
- if (i == valid_sep_len) {
+ if (i == valid_ymd_sep_len) {
goto parse_error;
}
- }
-
- /* Can't have a trailing sep */
- if (sublen == 0) {
- goto parse_error;
- }
-
-
- /* PARSE THE MONTH (2 digits) */
- if (has_sep && ((sublen >= 2 && isdigit(substr[0]) && !isdigit(substr[1]))
- || (sublen == 1 && isdigit(substr[0])))) {
- out->month = (substr[0] - '0');
-
- if (out->month < 1) {
- PyErr_Format(PyExc_ValueError,
- "Month out of range in datetime string \"%s\"", str);
- goto error;
- }
+ has_ymd_sep = 1;
+ ymd_sep = valid_ymd_sep[i];
++substr;
--sublen;
+ /* Cannot have trailing separator */
+ if (sublen == 0 || !isdigit(*substr)) {
+ goto parse_error;
+ }
}
- else if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->month = 10 * (substr[0] - '0') + (substr[1] - '0');
- if (out->month < 1 || out->month > 12) {
- PyErr_Format(PyExc_ValueError,
- "Month out of range in datetime string \"%s\"", str);
- goto error;
- }
- substr += 2;
- sublen -= 2;
+ /* PARSE THE MONTH */
+ /* First digit required */
+ out->month = (*substr - '0');
+ ++substr;
+ --sublen;
+ /* Second digit optional if there was a separator */
+ if (isdigit(*substr)) {
+ out->month = 10 * out->month + (*substr - '0');
+ ++substr;
+ --sublen;
}
- else {
+ else if (!has_ymd_sep) {
goto parse_error;
}
+ if (out->month < 1 || out->month > 12) {
+ PyErr_Format(PyExc_ValueError,
+ "Month out of range in datetime string \"%s\"", str);
+ goto error;
+ }
- /* Next character must be a '-' or the end of the string */
+ /* Next character must be the separator, start of day, or end of string */
if (sublen == 0) {
- /* dates of form YYYYMM are not valid */
- if (!has_sep) {
+ /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
+ if (!has_ymd_sep) {
goto parse_error;
}
if (out_local != NULL) {
@@ -619,47 +618,40 @@ parse_iso_8601_datetime(char *str, int len,
bestunit = PANDAS_FR_M;
goto finish;
}
- else if (has_sep && *substr == sep) {
+
+ if (has_ymd_sep) {
+ /* Must have separator, but cannot be trailing */
+ if (*substr != ymd_sep || sublen == 1) {
+ goto parse_error;
+ }
++substr;
--sublen;
}
- else if (!isdigit(*substr)) {
- goto parse_error;
- }
- /* Can't have a trailing '-' */
- if (sublen == 0) {
- goto parse_error;
+ /* PARSE THE DAY */
+ /* First digit required */
+ if (!isdigit(*substr)) {
+ goto parse_error;
}
-
- /* PARSE THE DAY (2 digits) */
- if (has_sep && ((sublen >= 2 && isdigit(substr[0]) && !isdigit(substr[1]))
- || (sublen == 1 && isdigit(substr[0])))) {
- out->day = (substr[0] - '0');
-
- if (out->day < 1) {
- PyErr_Format(PyExc_ValueError,
- "Day out of range in datetime string \"%s\"", str);
- goto error;
- }
+ out->day = (*substr - '0');
+ ++substr;
+ --sublen;
+ /* Second digit optional if there was a separator */
+ if (isdigit(*substr)) {
+ out->day = 10 * out->day + (*substr - '0');
++substr;
--sublen;
}
- else if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->day = 10 * (substr[0] - '0') + (substr[1] - '0');
-
- if (out->day < 1 ||
- out->day > days_per_month_table[year_leap][out->month-1]) {
- PyErr_Format(PyExc_ValueError,
- "Day out of range in datetime string \"%s\"", str);
- goto error;
- }
- substr += 2;
- sublen -= 2;
- }
- else {
+ else if (!has_ymd_sep) {
goto parse_error;
}
+ if (out->day < 1 ||
+ out->day > days_per_month_table[year_leap][out->month-1])
+ {
+ PyErr_Format(PyExc_ValueError,
+ "Day out of range in datetime string \"%s\"", str);
+ goto error;
+ }
/* Next character must be a 'T', ' ', or end of string */
if (sublen == 0) {
@@ -669,104 +661,119 @@ parse_iso_8601_datetime(char *str, int len,
bestunit = PANDAS_FR_D;
goto finish;
}
- else if (*substr != 'T' && *substr != ' ') {
+
+ if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
goto parse_error;
}
- else {
+ ++substr;
+ --sublen;
+
+ /* PARSE THE HOURS */
+ /* First digit required */
+ if (!isdigit(*substr)) {
+ goto parse_error;
+ }
+ out->hour = (*substr - '0');
+ ++substr;
+ --sublen;
+ /* Second digit optional */
+ if (isdigit(*substr)) {
+ hour_was_2_digits = 1;
+ out->hour = 10 * out->hour + (*substr - '0');
++substr;
--sublen;
- }
-
- /* PARSE THE HOURS (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->hour = 10 * (substr[0] - '0') + (substr[1] - '0');
-
if (out->hour >= 24) {
PyErr_Format(PyExc_ValueError,
"Hours out of range in datetime string \"%s\"", str);
goto error;
}
- substr += 2;
- sublen -= 2;
- }
- else if (sublen >= 1 && isdigit(substr[0])) {
- out->hour = substr[0] - '0';
- ++substr;
- --sublen;
- }
- else {
- goto parse_error;
}
/* Next character must be a ':' or the end of the string */
- if (sublen > 0 && *substr == ':') {
+ if (sublen == 0) {
+ if (!hour_was_2_digits) {
+ goto parse_error;
+ }
+ bestunit = PANDAS_FR_h;
+ goto finish;
+ }
+
+ if (*substr == ':') {
+ has_hms_sep = 1;
++substr;
--sublen;
+ /* Cannot have a trailing separator */
+ if (sublen == 0 || !isdigit(*substr)) {
+ goto parse_error;
+ }
}
- else {
+ else if (!isdigit(*substr)) {
+ if (!hour_was_2_digits) {
+ goto parse_error;
+ }
bestunit = PANDAS_FR_h;
goto parse_timezone;
}
- /* Can't have a trailing ':' */
- if (sublen == 0) {
- goto parse_error;
- }
-
- /* PARSE THE MINUTES (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->min = 10 * (substr[0] - '0') + (substr[1] - '0');
-
+ /* PARSE THE MINUTES */
+ /* First digit required */
+ out->min = (*substr - '0');
+ ++substr;
+ --sublen;
+ /* Second digit optional if there was a separator */
+ if (isdigit(*substr)) {
+ out->min = 10 * out->min + (*substr - '0');
+ ++substr;
+ --sublen;
if (out->min >= 60) {
PyErr_Format(PyExc_ValueError,
- "Minutes out of range in datetime string \"%s\"", str);
+ "Minutes out of range in datetime string \"%s\"", str);
goto error;
}
- substr += 2;
- sublen -= 2;
- }
- else if (sublen >= 1 && isdigit(substr[0])) {
- out->min = substr[0] - '0';
- ++substr;
- --sublen;
}
- else {
+ else if (!has_hms_sep) {
goto parse_error;
}
- /* Next character must be a ':' or the end of the string */
- if (sublen > 0 && *substr == ':') {
+ if (sublen == 0) {
+ bestunit = PANDAS_FR_m;
+ goto finish;
+ }
+
+ /* If we make it through this condition block, then the next
+ * character is a digit. */
+ if (has_hms_sep && *substr == ':') {
++substr;
--sublen;
+ /* Cannot have a trailing ':' */
+ if (sublen == 0 || !isdigit(*substr)) {
+ goto parse_error;
+ }
+ }
+ else if (!has_hms_sep && isdigit(*substr)) {
}
else {
bestunit = PANDAS_FR_m;
goto parse_timezone;
}
- /* Can't have a trailing ':' */
- if (sublen == 0) {
- goto parse_error;
- }
-
- /* PARSE THE SECONDS (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->sec = 10 * (substr[0] - '0') + (substr[1] - '0');
-
+ /* PARSE THE SECONDS */
+ /* First digit required */
+ out->sec = (*substr - '0');
+ ++substr;
+ --sublen;
+ /* Second digit optional if there was a separator */
+ if (isdigit(*substr)) {
+ out->sec = 10 * out->sec + (*substr - '0');
+ ++substr;
+ --sublen;
if (out->sec >= 60) {
PyErr_Format(PyExc_ValueError,
- "Seconds out of range in datetime string \"%s\"", str);
+ "Seconds out of range in datetime string \"%s\"", str);
goto error;
}
- substr += 2;
- sublen -= 2;
- }
- else if (sublen >= 1 && isdigit(substr[0])) {
- out->sec = substr[0] - '0';
- ++substr;
- --sublen;
}
- else {
+ else if (!has_hms_sep) {
goto parse_error;
}
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index a19930a5cef30..dae15215929b7 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -111,7 +111,7 @@ static void *grow_buffer(void *buffer, int length, int *capacity,
void *newbuffer = buffer;
// Can we fit potentially nbytes tokens (+ null terminators) in the stream?
- while ( (length + space > cap) && (newbuffer != NULL) ){
+ while ( (length + space >= cap) && (newbuffer != NULL) ){
cap = cap? cap << 1 : 2;
buffer = newbuffer;
newbuffer = safe_realloc(newbuffer, elsize * cap);
diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py
index c875a9d49039b..46d30ab7fe313 100644
--- a/pandas/stats/moments.py
+++ b/pandas/stats/moments.py
@@ -67,13 +67,21 @@
"""
-_ewm_kw = r"""com : float. optional
- Center of mass: :math:`\alpha = 1 / (1 + com)`,
+_ewm_kw = r"""com : float, optional
+ Specify decay in terms of center of mass,
+ :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`
span : float, optional
- Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)`
+ Specify decay in terms of span,
+ :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`
halflife : float, optional
- Specify decay in terms of halflife,
- :math:`\alpha = 1 - exp(log(0.5) / halflife)`
+ Specify decay in terms of half-life,
+ :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0`
+alpha : float, optional
+ Specify smoothing factor :math:`\alpha` directly,
+ :math:`0 < \alpha \leq 1`
+
+ .. versionadded:: 0.18.0
+
min_periods : int, default 0
Minimum number of observations in window required to have a value
(otherwise result is NA).
@@ -92,16 +100,10 @@
_ewm_notes = r"""
Notes
-----
-Either center of mass, span or halflife must be specified
-
-EWMA is sometimes specified using a "span" parameter `s`, we have that the
-decay parameter :math:`\alpha` is related to the span as
-:math:`\alpha = 2 / (s + 1) = 1 / (1 + c)`
-
-where `c` is the center of mass. Given a span, the associated center of mass is
-:math:`c = (s - 1) / 2`
-
-So a "20-day EWMA" would have center 9.5.
+Exactly one of center of mass, span, half-life, and alpha must be provided.
+Allowed values and relationship between the parameters are specified in the
+parameter descriptions above; see the link at the end of this section for
+a detailed explanation.
When adjust is True (default), weighted averages are calculated using weights
(1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1.
@@ -121,7 +123,7 @@
True), and 1-alpha and alpha (if adjust is False).
More details can be found at
-http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions
+http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
"""
_expanding_kw = """min_periods : int, default None
@@ -323,14 +325,15 @@ def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs):
@Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw,
_type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
-def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,
- adjust=True, how=None, ignore_na=False):
+def ewma(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0,
+ freq=None, adjust=True, how=None, ignore_na=False):
return ensure_compat('ewm',
'mean',
arg,
com=com,
span=span,
halflife=halflife,
+ alpha=alpha,
min_periods=min_periods,
freq=freq,
adjust=adjust,
@@ -341,14 +344,15 @@ def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,
@Substitution("Exponentially-weighted moving variance", _unary_arg,
_ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
-def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
- freq=None, how=None, ignore_na=False, adjust=True):
+def ewmvar(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0,
+ bias=False, freq=None, how=None, ignore_na=False, adjust=True):
return ensure_compat('ewm',
'var',
arg,
com=com,
span=span,
halflife=halflife,
+ alpha=alpha,
min_periods=min_periods,
freq=freq,
adjust=adjust,
@@ -361,14 +365,15 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
@Substitution("Exponentially-weighted moving std", _unary_arg,
_ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
-def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
- freq=None, how=None, ignore_na=False, adjust=True):
+def ewmstd(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0,
+ bias=False, freq=None, how=None, ignore_na=False, adjust=True):
return ensure_compat('ewm',
'std',
arg,
com=com,
span=span,
halflife=halflife,
+ alpha=alpha,
min_periods=min_periods,
freq=freq,
adjust=adjust,
@@ -383,9 +388,9 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
@Substitution("Exponentially-weighted moving covariance", _binary_arg_flex,
_ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
-def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
- bias=False, freq=None, pairwise=None, how=None, ignore_na=False,
- adjust=True):
+def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None,
+ min_periods=0, bias=False, freq=None, pairwise=None, how=None,
+ ignore_na=False, adjust=True):
if arg2 is None:
arg2 = arg1
pairwise = True if pairwise is None else pairwise
@@ -401,6 +406,7 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
com=com,
span=span,
halflife=halflife,
+ alpha=alpha,
min_periods=min_periods,
bias=bias,
freq=freq,
@@ -414,8 +420,9 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
@Substitution("Exponentially-weighted moving correlation", _binary_arg_flex,
_ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
-def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
- freq=None, pairwise=None, how=None, ignore_na=False, adjust=True):
+def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None,
+ min_periods=0, freq=None, pairwise=None, how=None, ignore_na=False,
+ adjust=True):
if arg2 is None:
arg2 = arg1
pairwise = True if pairwise is None else pairwise
@@ -430,6 +437,7 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
com=com,
span=span,
halflife=halflife,
+ alpha=alpha,
min_periods=min_periods,
freq=freq,
how=how,
diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py
index deff392d6a16c..2c69eb64fd61d 100644
--- a/pandas/stats/tests/test_fama_macbeth.py
+++ b/pandas/stats/tests/test_fama_macbeth.py
@@ -44,7 +44,7 @@ def checkFamaMacBethExtended(self, window_type, x, y, **kwds):
end = index[i + window - 1]
x2 = {}
- for k, v in compat.iteritems(x):
+ for k, v in x.iteritems():
x2[k] = v.truncate(start, end)
y2 = y.truncate(start, end)
diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py
index 175ad9dc33dc2..8e659d42bab25 100644
--- a/pandas/stats/tests/test_ols.py
+++ b/pandas/stats/tests/test_ols.py
@@ -573,7 +573,7 @@ def test_wls_panel(self):
stack_y = y.stack()
stack_x = DataFrame(dict((k, v.stack())
- for k, v in compat.iteritems(x)))
+ for k, v in x.iteritems()))
weights = x.std('items')
stack_weights = weights.stack()
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 4154c24f227f9..8d0ddc678a11f 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -241,24 +241,21 @@ def test_bool_describe_in_mixed_frame(self):
'int_data': [10, 20, 30, 40, 50],
})
- # Boolean data and integer data is included in .describe() output,
- # string data isn't
- self.assert_numpy_array_equal(df.describe().columns, [
- 'bool_data', 'int_data'])
-
- bool_describe = df.describe()['bool_data']
-
- # Both the min and the max values should stay booleans
- self.assertEqual(bool_describe['min'].dtype, np.bool_)
- self.assertEqual(bool_describe['max'].dtype, np.bool_)
+ # Integer data are included in .describe() output,
+ # Boolean and string data are not.
+ result = df.describe()
+ expected = DataFrame({'int_data': [5, 30, df.int_data.std(),
+ 10, 20, 30, 40, 50]},
+ index=['count', 'mean', 'std', 'min', '25%',
+ '50%', '75%', 'max'])
+ assert_frame_equal(result, expected)
- self.assertFalse(bool_describe['min'])
- self.assertTrue(bool_describe['max'])
+ # Top value is a boolean value that is False
+ result = df.describe(include=['bool'])
- # For numeric operations, like mean or median, the values True/False
- # are cast to the integer values 1 and 0
- assert_almost_equal(bool_describe['mean'], 0.4)
- assert_almost_equal(bool_describe['50%'], 0)
+ expected = DataFrame({'bool_data': [5, 2, False, 3]},
+ index=['count', 'unique', 'top', 'freq'])
+ assert_frame_equal(result, expected)
def test_reduce_mixed_frame(self):
# GH 6806
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index 264302866b023..2a3ee774af6e5 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -216,7 +216,7 @@ def test_getitem_boolean(self):
# we are producing a warning that since the passed boolean
# key is not the same as the given index, we will reindex
# not sure this is really necessary
- with tm.assert_produces_warning(UserWarning):
+ with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
indexer_obj = indexer_obj.reindex(self.tsframe.index[::-1])
subframe_obj = self.tsframe[indexer_obj]
assert_frame_equal(subframe_obj, subframe)
diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py
index 1b24e829088f2..77974718714f8 100644
--- a/pandas/tests/frame/test_nonunique_indexes.py
+++ b/pandas/tests/frame/test_nonunique_indexes.py
@@ -452,3 +452,19 @@ def test_as_matrix_duplicates(self):
dtype=object)
self.assertTrue(np.array_equal(result, expected))
+
+ def test_set_value_by_index(self):
+ # See gh-12344
+ df = DataFrame(np.arange(9).reshape(3, 3).T)
+ df.columns = list('AAA')
+ expected = df.iloc[:, 2]
+
+ df.iloc[:, 0] = 3
+ assert_series_equal(df.iloc[:, 2], expected)
+
+ df = DataFrame(np.arange(9).reshape(3, 3).T)
+ df.columns = [2, float(2), str(2)]
+ expected = df.iloc[:, 1]
+
+ df.iloc[:, 0] = 3
+ assert_series_equal(df.iloc[:, 1], expected)
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 6db507f0e4151..a52cb018c7bae 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -96,8 +96,8 @@ class TestDataFrameQueryWithMultiIndex(tm.TestCase):
def check_query_with_named_multiindex(self, parser, engine):
tm.skip_if_no_ne(engine)
- a = tm.choice(['red', 'green'], size=10)
- b = tm.choice(['eggs', 'ham'], size=10)
+ a = np.random.choice(['red', 'green'], size=10)
+ b = np.random.choice(['eggs', 'ham'], size=10)
index = MultiIndex.from_arrays([a, b], names=['color', 'food'])
df = DataFrame(randn(10, 2), index=index)
ind = Series(df.index.get_level_values('color').values, index=index,
@@ -149,8 +149,8 @@ def test_query_with_named_multiindex(self):
def check_query_with_unnamed_multiindex(self, parser, engine):
tm.skip_if_no_ne(engine)
- a = tm.choice(['red', 'green'], size=10)
- b = tm.choice(['eggs', 'ham'], size=10)
+ a = np.random.choice(['red', 'green'], size=10)
+ b = np.random.choice(['eggs', 'ham'], size=10)
index = MultiIndex.from_arrays([a, b])
df = DataFrame(randn(10, 2), index=index)
ind = Series(df.index.get_level_values(0).values, index=index)
@@ -243,7 +243,7 @@ def test_query_with_unnamed_multiindex(self):
def check_query_with_partially_named_multiindex(self, parser, engine):
tm.skip_if_no_ne(engine)
- a = tm.choice(['red', 'green'], size=10)
+ a = np.random.choice(['red', 'green'], size=10)
b = np.arange(10)
index = MultiIndex.from_arrays([a, b])
index.names = [None, 'rating']
@@ -975,7 +975,7 @@ def check_query_lex_compare_strings(self, parser, engine):
tm.skip_if_no_ne(engine=engine)
import operator as opr
- a = Series(tm.choice(list('abcde'), 20))
+ a = Series(np.random.choice(list('abcde'), 20))
b = Series(np.arange(a.size))
df = DataFrame({'X': a, 'Y': b})
diff --git a/pandas/tests/indexing/__init__.py b/pandas/tests/indexing/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
new file mode 100644
index 0000000000000..4e31fb350f6ee
--- /dev/null
+++ b/pandas/tests/indexing/test_categorical.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+
+import pandas as pd
+import numpy as np
+from pandas import Series, DataFrame
+from pandas.util.testing import assert_series_equal, assert_frame_equal
+from pandas.util import testing as tm
+
+
+class TestCategoricalIndex(tm.TestCase):
+
+ def setUp(self):
+
+ self.df = DataFrame({'A': np.arange(6, dtype='int64'),
+ 'B': Series(list('aabbca')).astype(
+ 'category', categories=list(
+ 'cab'))}).set_index('B')
+ self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
+ 'B': Series(list('aabbca')).astype(
+ 'category', categories=list(
+ 'cabe'))}).set_index('B')
+ self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
+ 'B': (Series([1, 1, 2, 1, 3, 2])
+ .astype('category', categories=[3, 2, 1],
+ ordered=True))}).set_index('B')
+ self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
+ 'B': (Series([1, 1, 2, 1, 3, 2])
+ .astype('category', categories=[3, 2, 1],
+ ordered=False))}).set_index('B')
+
+ def test_loc_scalar(self):
+ result = self.df.loc['a']
+ expected = (DataFrame({'A': [0, 1, 5],
+ 'B': (Series(list('aaa'))
+ .astype('category',
+ categories=list('cab')))})
+ .set_index('B'))
+ assert_frame_equal(result, expected)
+
+ df = self.df.copy()
+ df.loc['a'] = 20
+ expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20],
+ 'B': (Series(list('aabbca'))
+ .astype('category',
+ categories=list('cab')))})
+ .set_index('B'))
+ assert_frame_equal(df, expected)
+
+ # value not in the categories
+ self.assertRaises(KeyError, lambda: df.loc['d'])
+
+ def f():
+ df.loc['d'] = 10
+
+ self.assertRaises(TypeError, f)
+
+ def f():
+ df.loc['d', 'A'] = 10
+
+ self.assertRaises(TypeError, f)
+
+ def f():
+ df.loc['d', 'C'] = 10
+
+ self.assertRaises(TypeError, f)
+
+ def test_loc_listlike(self):
+
+ # list of labels
+ result = self.df.loc[['c', 'a']]
+ expected = self.df.iloc[[4, 0, 1, 5]]
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.loc[['a', 'b', 'e']]
+ exp_index = pd.CategoricalIndex(
+ list('aaabbe'), categories=list('cabe'), name='B')
+ expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ # element in the categories but not in the values
+ self.assertRaises(KeyError, lambda: self.df2.loc['e'])
+
+ # assign is ok
+ df = self.df2.copy()
+ df.loc['e'] = 20
+ result = df.loc[['a', 'b', 'e']]
+ exp_index = pd.CategoricalIndex(
+ list('aaabbe'), categories=list('cabe'), name='B')
+ expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
+ assert_frame_equal(result, expected)
+
+ df = self.df2.copy()
+ result = df.loc[['a', 'b', 'e']]
+ exp_index = pd.CategoricalIndex(
+ list('aaabbe'), categories=list('cabe'), name='B')
+ expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ # not all labels in the categories
+ self.assertRaises(KeyError, lambda: self.df2.loc[['a', 'd']])
+
+ def test_loc_listlike_dtypes(self):
+ # GH 11586
+
+ # unique categories and codes
+ index = pd.CategoricalIndex(['a', 'b', 'c'])
+ df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
+
+ # unique slice
+ res = df.loc[['a', 'b']]
+ exp = DataFrame({'A': [1, 2],
+ 'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b']))
+ tm.assert_frame_equal(res, exp, check_index_type=True)
+
+ # duplicated slice
+ res = df.loc[['a', 'a', 'b']]
+ exp = DataFrame({'A': [1, 1, 2],
+ 'B': [4, 4, 5]},
+ index=pd.CategoricalIndex(['a', 'a', 'b']))
+ tm.assert_frame_equal(res, exp, check_index_type=True)
+
+ with tm.assertRaisesRegexp(
+ KeyError,
+ 'a list-indexer must only include values that are '
+ 'in the categories'):
+ df.loc[['a', 'x']]
+
+ # duplicated categories and codes
+ index = pd.CategoricalIndex(['a', 'b', 'a'])
+ df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
+
+ # unique slice
+ res = df.loc[['a', 'b']]
+ exp = DataFrame({'A': [1, 3, 2],
+ 'B': [4, 6, 5]},
+ index=pd.CategoricalIndex(['a', 'a', 'b']))
+ tm.assert_frame_equal(res, exp, check_index_type=True)
+
+ # duplicated slice
+ res = df.loc[['a', 'a', 'b']]
+ exp = DataFrame(
+ {'A': [1, 3, 1, 3, 2],
+ 'B': [4, 6, 4, 6, 5
+ ]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
+ tm.assert_frame_equal(res, exp, check_index_type=True)
+
+ with tm.assertRaisesRegexp(
+ KeyError,
+ 'a list-indexer must only include values '
+ 'that are in the categories'):
+ df.loc[['a', 'x']]
+
+ # contains unused category
+ index = pd.CategoricalIndex(
+ ['a', 'b', 'a', 'c'], categories=list('abcde'))
+ df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
+
+ res = df.loc[['a', 'b']]
+ exp = DataFrame({'A': [1, 3, 2],
+ 'B': [5, 7, 6]}, index=pd.CategoricalIndex(
+ ['a', 'a', 'b'], categories=list('abcde')))
+ tm.assert_frame_equal(res, exp, check_index_type=True)
+
+ res = df.loc[['a', 'e']]
+ exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
+ index=pd.CategoricalIndex(['a', 'a', 'e'],
+ categories=list('abcde')))
+ tm.assert_frame_equal(res, exp, check_index_type=True)
+
+ # duplicated slice
+ res = df.loc[['a', 'a', 'b']]
+ exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
+ index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
+ categories=list('abcde')))
+ tm.assert_frame_equal(res, exp, check_index_type=True)
+
+ with tm.assertRaisesRegexp(
+ KeyError,
+ 'a list-indexer must only include values '
+ 'that are in the categories'):
+ df.loc[['a', 'x']]
+
+ def test_read_only_source(self):
+ # GH 10043
+ rw_array = np.eye(10)
+ rw_df = DataFrame(rw_array)
+
+ ro_array = np.eye(10)
+ ro_array.setflags(write=False)
+ ro_df = DataFrame(ro_array)
+
+ assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
+ assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
+ assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
+ assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
+
+ assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
+ assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
+ assert_series_equal(rw_df.loc[1], ro_df.loc[1])
+ assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
+
+ def test_reindexing(self):
+
+ # reindexing
+ # convert to a regular index
+ result = self.df2.reindex(['a', 'b', 'e'])
+ expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
+ 'B': Series(list('aaabbe'))}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(['a', 'b'])
+ expected = DataFrame({'A': [0, 1, 5, 2, 3],
+ 'B': Series(list('aaabb'))}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(['e'])
+ expected = DataFrame({'A': [np.nan],
+ 'B': Series(['e'])}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(['d'])
+ expected = DataFrame({'A': [np.nan],
+ 'B': Series(['d'])}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ # since we are actually reindexing with a Categorical
+ # then return a Categorical
+ cats = list('cabe')
+
+ result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats))
+ expected = DataFrame({'A': [0, 1, 5, np.nan],
+ 'B': Series(list('aaad')).astype(
+ 'category', categories=cats)}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(pd.Categorical(['a'], categories=cats))
+ expected = DataFrame({'A': [0, 1, 5],
+ 'B': Series(list('aaa')).astype(
+ 'category', categories=cats)}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(['a', 'b', 'e'])
+ expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
+ 'B': Series(list('aaabbe'))}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(['a', 'b'])
+ expected = DataFrame({'A': [0, 1, 5, 2, 3],
+ 'B': Series(list('aaabb'))}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(['e'])
+ expected = DataFrame({'A': [np.nan],
+ 'B': Series(['e'])}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ # give back the type of categorical that we received
+ result = self.df2.reindex(pd.Categorical(
+ ['a', 'd'], categories=cats, ordered=True))
+ expected = DataFrame(
+ {'A': [0, 1, 5, np.nan],
+ 'B': Series(list('aaad')).astype('category', categories=cats,
+ ordered=True)}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ result = self.df2.reindex(pd.Categorical(
+ ['a', 'd'], categories=['a', 'd']))
+ expected = DataFrame({'A': [0, 1, 5, np.nan],
+ 'B': Series(list('aaad')).astype(
+ 'category', categories=['a', 'd'
+ ])}).set_index('B')
+ assert_frame_equal(result, expected, check_index_type=True)
+
+ # passed duplicate indexers are not allowed
+ self.assertRaises(ValueError, lambda: self.df2.reindex(['a', 'a']))
+
+ # args NotImplemented ATM
+ self.assertRaises(NotImplementedError,
+ lambda: self.df2.reindex(['a'], method='ffill'))
+ self.assertRaises(NotImplementedError,
+ lambda: self.df2.reindex(['a'], level=1))
+ self.assertRaises(NotImplementedError,
+ lambda: self.df2.reindex(['a'], limit=2))
+
+ def test_loc_slice(self):
+ # slicing
+ # not implemented ATM
+ # GH9748
+
+ self.assertRaises(TypeError, lambda: self.df.loc[1:5])
+
+ # result = df.loc[1:5]
+ # expected = df.iloc[[1,2,3,4]]
+ # assert_frame_equal(result, expected)
+
+ def test_boolean_selection(self):
+
+ df3 = self.df3
+ df4 = self.df4
+
+ result = df3[df3.index == 'a']
+ expected = df3.iloc[[]]
+ assert_frame_equal(result, expected)
+
+ result = df4[df4.index == 'a']
+ expected = df4.iloc[[]]
+ assert_frame_equal(result, expected)
+
+ result = df3[df3.index == 1]
+ expected = df3.iloc[[0, 1, 3]]
+ assert_frame_equal(result, expected)
+
+ result = df4[df4.index == 1]
+ expected = df4.iloc[[0, 1, 3]]
+ assert_frame_equal(result, expected)
+
+ # since we have an ordered categorical
+
+ # CategoricalIndex([1, 1, 2, 1, 3, 2],
+ # categories=[3, 2, 1],
+ # ordered=True,
+ # name=u'B')
+ result = df3[df3.index < 2]
+ expected = df3.iloc[[4]]
+ assert_frame_equal(result, expected)
+
+ result = df3[df3.index > 1]
+ expected = df3.iloc[[]]
+ assert_frame_equal(result, expected)
+
+ # unordered
+ # cannot be compared
+
+ # CategoricalIndex([1, 1, 2, 1, 3, 2],
+ # categories=[3, 2, 1],
+ # ordered=False,
+ # name=u'B')
+ self.assertRaises(TypeError, lambda: df4[df4.index < 2])
+ self.assertRaises(TypeError, lambda: df4[df4.index > 1])
diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
new file mode 100644
index 0000000000000..2a2f8678694de
--- /dev/null
+++ b/pandas/tests/indexing/test_floats.py
@@ -0,0 +1,676 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+from pandas import Series, DataFrame, Index, Float64Index
+from pandas.util.testing import assert_series_equal, assert_almost_equal
+import pandas.util.testing as tm
+
+
+class TestFloatIndexers(tm.TestCase):
+
+ def check(self, result, original, indexer, getitem):
+ """
+ comparator for results
+ we need to take care if we are indexing on a
+ Series or a frame
+ """
+ if isinstance(original, Series):
+ expected = original.iloc[indexer]
+ else:
+ if getitem:
+ expected = original.iloc[:, indexer]
+ else:
+ expected = original.iloc[indexer]
+
+ assert_almost_equal(result, expected)
+
+ def test_scalar_error(self):
+
+ # GH 4892
+ # float_indexers should raise exceptions
+ # on appropriate Index types & accessors
+ # this duplicates the code below
+ # but is spefically testing for the error
+ # message
+
+ for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
+ tm.makeCategoricalIndex,
+ tm.makeDateIndex, tm.makeTimedeltaIndex,
+ tm.makePeriodIndex, tm.makeIntIndex,
+ tm.makeRangeIndex]:
+
+ i = index(5)
+
+ s = Series(np.arange(len(i)), index=i)
+
+ def f():
+ s.iloc[3.0]
+ self.assertRaisesRegexp(TypeError,
+ 'cannot do positional indexing',
+ f)
+
+ def f():
+ s.iloc[3.0] = 0
+ self.assertRaises(TypeError, f)
+
+ def test_scalar_non_numeric(self):
+
+ # GH 4892
+ # float_indexers should raise exceptions
+ # on appropriate Index types & accessors
+
+ for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
+ tm.makeCategoricalIndex,
+ tm.makeDateIndex, tm.makeTimedeltaIndex,
+ tm.makePeriodIndex]:
+
+ i = index(5)
+
+ for s in [Series(
+ np.arange(len(i)), index=i), DataFrame(
+ np.random.randn(
+ len(i), len(i)), index=i, columns=i)]:
+
+ # getting
+ for idxr, getitem in [(lambda x: x.ix, False),
+ (lambda x: x.iloc, False),
+ (lambda x: x, True)]:
+
+ def f():
+ idxr(s)[3.0]
+
+ # gettitem on a DataFrame is a KeyError as it is indexing
+ # via labels on the columns
+ if getitem and isinstance(s, DataFrame):
+ error = KeyError
+ else:
+ error = TypeError
+ self.assertRaises(error, f)
+
+ # label based can be a TypeError or KeyError
+ def f():
+ s.loc[3.0]
+
+ if s.index.inferred_type in ['string', 'unicode', 'mixed']:
+ error = KeyError
+ else:
+ error = TypeError
+ self.assertRaises(error, f)
+
+ # contains
+ self.assertFalse(3.0 in s)
+
+ # setting with a float fails with iloc
+ def f():
+ s.iloc[3.0] = 0
+ self.assertRaises(TypeError, f)
+
+ # setting with an indexer
+ if s.index.inferred_type in ['categorical']:
+ # Value or Type Error
+ pass
+ elif s.index.inferred_type in ['datetime64', 'timedelta64',
+ 'period']:
+
+ # these should prob work
+ # and are inconsisten between series/dataframe ATM
+ # for idxr in [lambda x: x.ix,
+ # lambda x: x]:
+ # s2 = s.copy()
+ # def f():
+ # idxr(s2)[3.0] = 0
+ # self.assertRaises(TypeError, f)
+ pass
+
+ else:
+
+ s2 = s.copy()
+ s2.loc[3.0] = 10
+ self.assertTrue(s2.index.is_object())
+
+ for idxr in [lambda x: x.ix,
+ lambda x: x]:
+ s2 = s.copy()
+ idxr(s2)[3.0] = 0
+ self.assertTrue(s2.index.is_object())
+
+ # fallsback to position selection, series only
+ s = Series(np.arange(len(i)), index=i)
+ s[3]
+ self.assertRaises(TypeError, lambda: s[3.0])
+
+ def test_scalar_with_mixed(self):
+
+ s2 = Series([1, 2, 3], index=['a', 'b', 'c'])
+ s3 = Series([1, 2, 3], index=['a', 'b', 1.5])
+
+ # lookup in a pure string index
+ # with an invalid indexer
+ for idxr in [lambda x: x.ix,
+ lambda x: x,
+ lambda x: x.iloc]:
+
+ def f():
+ idxr(s2)[1.0]
+
+ self.assertRaises(TypeError, f)
+
+ self.assertRaises(KeyError, lambda: s2.loc[1.0])
+
+ result = s2.loc['b']
+ expected = 2
+ self.assertEqual(result, expected)
+
+ # mixed index so we have label
+ # indexing
+ for idxr in [lambda x: x.ix,
+ lambda x: x]:
+
+ def f():
+ idxr(s3)[1.0]
+
+ self.assertRaises(TypeError, f)
+
+ result = idxr(s3)[1]
+ expected = 2
+ self.assertEqual(result, expected)
+
+ self.assertRaises(TypeError, lambda: s3.iloc[1.0])
+ self.assertRaises(KeyError, lambda: s3.loc[1.0])
+
+ result = s3.loc[1.5]
+ expected = 3
+ self.assertEqual(result, expected)
+
+ def test_scalar_integer(self):
+
+ # test how scalar float indexers work on int indexes
+
+ # integer index
+ for index in [tm.makeIntIndex, tm.makeRangeIndex]:
+
+ i = index(5)
+ for s in [Series(np.arange(len(i))),
+ DataFrame(np.random.randn(len(i), len(i)),
+ index=i, columns=i)]:
+
+ # coerce to equal int
+ for idxr, getitem in [(lambda x: x.ix, False),
+ (lambda x: x.loc, False),
+ (lambda x: x, True)]:
+
+ result = idxr(s)[3.0]
+ self.check(result, s, 3, getitem)
+
+ # coerce to equal int
+ for idxr, getitem in [(lambda x: x.ix, False),
+ (lambda x: x.loc, False),
+ (lambda x: x, True)]:
+
+ if isinstance(s, Series):
+ compare = self.assertEqual
+ expected = 100
+ else:
+ compare = tm.assert_series_equal
+ if getitem:
+ expected = Series(100,
+ index=range(len(s)), name=3)
+ else:
+ expected = Series(100.,
+ index=range(len(s)), name=3)
+
+ s2 = s.copy()
+ idxr(s2)[3.0] = 100
+
+ result = idxr(s2)[3.0]
+ compare(result, expected)
+
+ result = idxr(s2)[3]
+ compare(result, expected)
+
+ # contains
+ # coerce to equal int
+ self.assertTrue(3.0 in s)
+
+ def test_scalar_float(self):
+
+ # scalar float indexers work on a float index
+ index = Index(np.arange(5.))
+ for s in [Series(np.arange(len(index)), index=index),
+ DataFrame(np.random.randn(len(index), len(index)),
+ index=index, columns=index)]:
+
+ # assert all operations except for iloc are ok
+ indexer = index[3]
+ for idxr, getitem in [(lambda x: x.ix, False),
+ (lambda x: x.loc, False),
+ (lambda x: x, True)]:
+
+ # getting
+ result = idxr(s)[indexer]
+ self.check(result, s, 3, getitem)
+
+ # setting
+ s2 = s.copy()
+
+ def f():
+ idxr(s2)[indexer] = expected
+ result = idxr(s2)[indexer]
+ self.check(result, s, 3, getitem)
+
+ # random integer is a KeyError
+ self.assertRaises(KeyError, lambda: idxr(s)[3.5])
+
+ # contains
+ self.assertTrue(3.0 in s)
+
+ # iloc succeeds with an integer
+ expected = s.iloc[3]
+ s2 = s.copy()
+
+ s2.iloc[3] = expected
+ result = s2.iloc[3]
+ self.check(result, s, 3, False)
+
+ # iloc raises with a float
+ self.assertRaises(TypeError, lambda: s.iloc[3.0])
+
+ def g():
+ s2.iloc[3.0] = 0
+ self.assertRaises(TypeError, g)
+
+ def test_slice_non_numeric(self):
+
+ # GH 4892
+ # float_indexers should raise exceptions
+ # on appropriate Index types & accessors
+
+ for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
+ tm.makeDateIndex, tm.makeTimedeltaIndex,
+ tm.makePeriodIndex]:
+
+ index = index(5)
+ for s in [Series(range(5), index=index),
+ DataFrame(np.random.randn(5, 2), index=index)]:
+
+ # getitem
+ for l in [slice(3.0, 4),
+ slice(3, 4.0),
+ slice(3.0, 4.0)]:
+
+ def f():
+ s.iloc[l]
+ self.assertRaises(TypeError, f)
+
+ for idxr in [lambda x: x.ix,
+ lambda x: x.loc,
+ lambda x: x.iloc,
+ lambda x: x]:
+
+ def f():
+ idxr(s)[l]
+ self.assertRaises(TypeError, f)
+
+ # setitem
+ for l in [slice(3.0, 4),
+ slice(3, 4.0),
+ slice(3.0, 4.0)]:
+
+ def f():
+ s.iloc[l] = 0
+ self.assertRaises(TypeError, f)
+
+ for idxr in [lambda x: x.ix,
+ lambda x: x.loc,
+ lambda x: x.iloc,
+ lambda x: x]:
+ def f():
+ idxr(s)[l] = 0
+ self.assertRaises(TypeError, f)
+
+ def test_slice_integer(self):
+
+ # same as above, but for Integer based indexes
+ # these coerce to a like integer
+ # oob indiciates if we are out of bounds
+ # of positional indexing
+ for index, oob in [(tm.makeIntIndex(5), False),
+ (tm.makeRangeIndex(5), False),
+ (tm.makeIntIndex(5) + 10, True)]:
+
+ # s is an in-range index
+ s = Series(range(5), index=index)
+
+ # getitem
+ for l in [slice(3.0, 4),
+ slice(3, 4.0),
+ slice(3.0, 4.0)]:
+
+ for idxr in [lambda x: x.loc,
+ lambda x: x.ix]:
+
+ result = idxr(s)[l]
+
+ # these are all label indexing
+ # except getitem which is positional
+ # empty
+ if oob:
+ indexer = slice(0, 0)
+ else:
+ indexer = slice(3, 5)
+ self.check(result, s, indexer, False)
+
+ # positional indexing
+ def f():
+ s[l]
+
+ self.assertRaises(TypeError, f)
+
+ # getitem out-of-bounds
+ for l in [slice(-6, 6),
+ slice(-6.0, 6.0)]:
+
+ for idxr in [lambda x: x.loc,
+ lambda x: x.ix]:
+ result = idxr(s)[l]
+
+ # these are all label indexing
+ # except getitem which is positional
+ # empty
+ if oob:
+ indexer = slice(0, 0)
+ else:
+ indexer = slice(-6, 6)
+ self.check(result, s, indexer, False)
+
+ # positional indexing
+ def f():
+ s[slice(-6.0, 6.0)]
+
+ self.assertRaises(TypeError, f)
+
+ # getitem odd floats
+ for l, res1 in [(slice(2.5, 4), slice(3, 5)),
+ (slice(2, 3.5), slice(2, 4)),
+ (slice(2.5, 3.5), slice(3, 4))]:
+
+ for idxr in [lambda x: x.loc,
+ lambda x: x.ix]:
+
+ result = idxr(s)[l]
+ if oob:
+ res = slice(0, 0)
+ else:
+ res = res1
+
+ self.check(result, s, res, False)
+
+ # positional indexing
+ def f():
+ s[l]
+
+ self.assertRaises(TypeError, f)
+
+ # setitem
+ for l in [slice(3.0, 4),
+ slice(3, 4.0),
+ slice(3.0, 4.0)]:
+
+ for idxr in [lambda x: x.loc,
+ lambda x: x.ix]:
+ sc = s.copy()
+ idxr(sc)[l] = 0
+ result = idxr(sc)[l].values.ravel()
+ self.assertTrue((result == 0).all())
+
+ # positional indexing
+ def f():
+ s[l] = 0
+
+ self.assertRaises(TypeError, f)
+
+ def test_integer_positional_indexing(self):
+ """ make sure that we are raising on positional indexing
+ w.r.t. an integer index """
+
+ s = Series(range(2, 6), index=range(2, 6))
+
+ result = s[2:4]
+ expected = s.iloc[2:4]
+ assert_series_equal(result, expected)
+
+ for idxr in [lambda x: x,
+ lambda x: x.iloc]:
+
+ for l in [slice(2, 4.0),
+ slice(2.0, 4),
+ slice(2.0, 4.0)]:
+
+ def f():
+ idxr(s)[l]
+
+ self.assertRaises(TypeError, f)
+
+ def test_slice_integer_frame_getitem(self):
+
+ # similar to above, but on the getitem dim (of a DataFrame)
+ for index in [tm.makeIntIndex, tm.makeRangeIndex]:
+
+ index = index(5)
+ s = DataFrame(np.random.randn(5, 2), index=index)
+
+ for idxr in [lambda x: x.loc,
+ lambda x: x.ix]:
+
+ # getitem
+ for l in [slice(0.0, 1),
+ slice(0, 1.0),
+ slice(0.0, 1.0)]:
+
+ result = idxr(s)[l]
+ indexer = slice(0, 2)
+ self.check(result, s, indexer, False)
+
+ # positional indexing
+ def f():
+ s[l]
+
+ self.assertRaises(TypeError, f)
+
+ # getitem out-of-bounds
+ for l in [slice(-10, 10),
+ slice(-10.0, 10.0)]:
+
+ result = idxr(s)[l]
+ self.check(result, s, slice(-10, 10), True)
+
+ # positional indexing
+ def f():
+ s[slice(-10.0, 10.0)]
+
+ self.assertRaises(TypeError, f)
+
+ # getitem odd floats
+ for l, res in [(slice(0.5, 1), slice(1, 2)),
+ (slice(0, 0.5), slice(0, 1)),
+ (slice(0.5, 1.5), slice(1, 2))]:
+
+ result = idxr(s)[l]
+ self.check(result, s, res, False)
+
+ # positional indexing
+ def f():
+ s[l]
+
+ self.assertRaises(TypeError, f)
+
+ # setitem
+ for l in [slice(3.0, 4),
+ slice(3, 4.0),
+ slice(3.0, 4.0)]:
+
+ sc = s.copy()
+ idxr(sc)[l] = 0
+ result = idxr(sc)[l].values.ravel()
+ self.assertTrue((result == 0).all())
+
+ # positional indexing
+ def f():
+ s[l] = 0
+
+ self.assertRaises(TypeError, f)
+
+ def test_slice_float(self):
+
+ # same as above, but for floats
+ index = Index(np.arange(5.)) + 0.1
+ for s in [Series(range(5), index=index),
+ DataFrame(np.random.randn(5, 2), index=index)]:
+
+ for l in [slice(3.0, 4),
+ slice(3, 4.0),
+ slice(3.0, 4.0)]:
+
+ expected = s.iloc[3:4]
+ for idxr in [lambda x: x.ix,
+ lambda x: x.loc,
+ lambda x: x]:
+
+ # getitem
+ result = idxr(s)[l]
+ self.assertTrue(result.equals(expected))
+
+ # setitem
+ s2 = s.copy()
+ idxr(s2)[l] = 0
+ result = idxr(s2)[l].values.ravel()
+ self.assertTrue((result == 0).all())
+
+ def test_floating_index_doc_example(self):
+
+ index = Index([1.5, 2, 3, 4.5, 5])
+ s = Series(range(5), index=index)
+ self.assertEqual(s[3], 2)
+ self.assertEqual(s.ix[3], 2)
+ self.assertEqual(s.loc[3], 2)
+ self.assertEqual(s.iloc[3], 3)
+
+ def test_floating_misc(self):
+
+ # related 236
+ # scalar/slicing of a float index
+ s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
+
+ # label based slicing
+ result1 = s[1.0:3.0]
+ result2 = s.ix[1.0:3.0]
+ result3 = s.loc[1.0:3.0]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+
+ # exact indexing when found
+ result1 = s[5.0]
+ result2 = s.loc[5.0]
+ result3 = s.ix[5.0]
+ self.assertEqual(result1, result2)
+ self.assertEqual(result1, result3)
+
+ result1 = s[5]
+ result2 = s.loc[5]
+ result3 = s.ix[5]
+ self.assertEqual(result1, result2)
+ self.assertEqual(result1, result3)
+
+ self.assertEqual(s[5.0], s[5])
+
+ # value not found (and no fallbacking at all)
+
+ # scalar integers
+ self.assertRaises(KeyError, lambda: s.loc[4])
+ self.assertRaises(KeyError, lambda: s.ix[4])
+ self.assertRaises(KeyError, lambda: s[4])
+
+ # fancy floats/integers create the correct entry (as nan)
+ # fancy tests
+ expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
+ for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
+ assert_series_equal(s[fancy_idx], expected)
+ assert_series_equal(s.loc[fancy_idx], expected)
+ assert_series_equal(s.ix[fancy_idx], expected)
+
+ expected = Series([2, 0], index=Index([5, 0], dtype='int64'))
+ for fancy_idx in [[5, 0], np.array([5, 0])]: # int
+ assert_series_equal(s[fancy_idx], expected)
+ assert_series_equal(s.loc[fancy_idx], expected)
+ assert_series_equal(s.ix[fancy_idx], expected)
+
+ # all should return the same as we are slicing 'the same'
+ result1 = s.loc[2:5]
+ result2 = s.loc[2.0:5.0]
+ result3 = s.loc[2.0:5]
+ result4 = s.loc[2.1:5]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+ assert_series_equal(result1, result4)
+
+ # previously this did fallback indexing
+ result1 = s[2:5]
+ result2 = s[2.0:5.0]
+ result3 = s[2.0:5]
+ result4 = s[2.1:5]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+ assert_series_equal(result1, result4)
+
+ result1 = s.ix[2:5]
+ result2 = s.ix[2.0:5.0]
+ result3 = s.ix[2.0:5]
+ result4 = s.ix[2.1:5]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+ assert_series_equal(result1, result4)
+
+ # combined test
+ result1 = s.loc[2:5]
+ result2 = s.ix[2:5]
+ result3 = s[2:5]
+
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+
+ # list selection
+ result1 = s[[0.0, 5, 10]]
+ result2 = s.loc[[0.0, 5, 10]]
+ result3 = s.ix[[0.0, 5, 10]]
+ result4 = s.iloc[[0, 2, 4]]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+ assert_series_equal(result1, result4)
+
+ result1 = s[[1.6, 5, 10]]
+ result2 = s.loc[[1.6, 5, 10]]
+ result3 = s.ix[[1.6, 5, 10]]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+ assert_series_equal(result1, Series(
+ [np.nan, 2, 4], index=[1.6, 5, 10]))
+
+ result1 = s[[0, 1, 2]]
+ result2 = s.ix[[0, 1, 2]]
+ result3 = s.loc[[0, 1, 2]]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+ assert_series_equal(result1, Series(
+ [0.0, np.nan, np.nan], index=[0, 1, 2]))
+
+ result1 = s.loc[[2.5, 5]]
+ result2 = s.ix[[2.5, 5]]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0]))
+
+ result1 = s[[2.5]]
+ result2 = s.ix[[2.5]]
+ result3 = s.loc[[2.5]]
+ assert_series_equal(result1, result2)
+ assert_series_equal(result1, result3)
+ assert_series_equal(result1, Series([1], index=[2.5]))
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/indexing/test_indexing.py
similarity index 84%
rename from pandas/tests/test_indexing.py
rename to pandas/tests/indexing/test_indexing.py
index 6a904c67fffeb..89552ab776608 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -17,7 +17,7 @@
from pandas import option_context
from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice
from pandas.core.api import (DataFrame, Index, Series, Panel, isnull,
- MultiIndex, Float64Index, Timestamp, Timedelta)
+ MultiIndex, Timestamp, Timedelta)
from pandas.util.testing import (assert_almost_equal, assert_series_equal,
assert_frame_equal, assert_panel_equal,
assert_attr_equal)
@@ -699,6 +699,29 @@ def test_iloc_setitem(self):
expected = Series([0, 1, 0], index=[4, 5, 6])
assert_series_equal(s, expected)
+ def test_loc_setitem_slice(self):
+ # GH10503
+
+ # assigning the same type should not change the type
+ df1 = DataFrame({'a': [0, 1, 1],
+ 'b': Series([100, 200, 300], dtype='uint32')})
+ ix = df1['a'] == 1
+ newb1 = df1.loc[ix, 'b'] + 1
+ df1.loc[ix, 'b'] = newb1
+ expected = DataFrame({'a': [0, 1, 1],
+ 'b': Series([100, 201, 301], dtype='uint32')})
+ assert_frame_equal(df1, expected)
+
+ # assigning a new type should get the inferred type
+ df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
+ dtype='uint64')
+ ix = df1['a'] == 1
+ newb2 = df2.loc[ix, 'b']
+ df1.loc[ix, 'b'] = newb2
+ expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
+ dtype='uint64')
+ assert_frame_equal(df2, expected)
+
def test_ix_loc_setitem_consistency(self):
# GH 5771
@@ -3256,12 +3279,12 @@ def test_multiindex_assignment(self):
df.ix[4, 'c'] = arr
assert_series_equal(df.ix[4, 'c'], Series(arr, index=[8, 10], name='c',
- dtype='int64'))
+ dtype='float64'))
# scalar ok
df.ix[4, 'c'] = 10
assert_series_equal(df.ix[4, 'c'], Series(10, index=[8, 10], name='c',
- dtype='int64'))
+ dtype='float64'))
# invalid assignments
def f():
@@ -3495,29 +3518,29 @@ def test_iloc_mask(self):
'integer type is not available'),
}
- warnings.filterwarnings(action='ignore', category=UserWarning)
- result = dict()
- for idx in [None, 'index', 'locs']:
- mask = (df.nums > 2).values
- if idx:
- mask = Series(mask, list(reversed(getattr(df, idx))))
- for method in ['', '.loc', '.iloc']:
- try:
- if method:
- accessor = getattr(df, method[1:])
- else:
- accessor = df
- ans = str(bin(accessor[mask]['nums'].sum()))
- except Exception as e:
- ans = str(e)
-
- key = tuple([idx, method])
- r = expected.get(key)
- if r != ans:
- raise AssertionError(
- "[%s] does not match [%s], received [%s]"
- % (key, ans, r))
- warnings.filterwarnings(action='always', category=UserWarning)
+ # UserWarnings from reindex of a boolean mask
+ with warnings.catch_warnings(record=True):
+ result = dict()
+ for idx in [None, 'index', 'locs']:
+ mask = (df.nums > 2).values
+ if idx:
+ mask = Series(mask, list(reversed(getattr(df, idx))))
+ for method in ['', '.loc', '.iloc']:
+ try:
+ if method:
+ accessor = getattr(df, method[1:])
+ else:
+ accessor = df
+ ans = str(bin(accessor[mask]['nums'].sum()))
+ except Exception as e:
+ ans = str(e)
+
+ key = tuple([idx, method])
+ r = expected.get(key)
+ if r != ans:
+ raise AssertionError(
+ "[%s] does not match [%s], received [%s]"
+ % (key, ans, r))
def test_ix_slicing_strings(self):
# GH3836
@@ -4956,324 +4979,6 @@ def test_float64index_slicing_bug(self):
result = s.value_counts()
str(result)
- def test_floating_index_doc_example(self):
-
- index = Index([1.5, 2, 3, 4.5, 5])
- s = Series(range(5), index=index)
- self.assertEqual(s[3], 2)
- self.assertEqual(s.ix[3], 2)
- self.assertEqual(s.loc[3], 2)
- self.assertEqual(s.iloc[3], 3)
-
- def test_floating_index(self):
-
- # related 236
- # scalar/slicing of a float index
- s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
-
- # label based slicing
- result1 = s[1.0:3.0]
- result2 = s.ix[1.0:3.0]
- result3 = s.loc[1.0:3.0]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
-
- # exact indexing when found
- result1 = s[5.0]
- result2 = s.loc[5.0]
- result3 = s.ix[5.0]
- self.assertEqual(result1, result2)
- self.assertEqual(result1, result3)
-
- result1 = s[5]
- result2 = s.loc[5]
- result3 = s.ix[5]
- self.assertEqual(result1, result2)
- self.assertEqual(result1, result3)
-
- self.assertEqual(s[5.0], s[5])
-
- # value not found (and no fallbacking at all)
-
- # scalar integers
- self.assertRaises(KeyError, lambda: s.loc[4])
- self.assertRaises(KeyError, lambda: s.ix[4])
- self.assertRaises(KeyError, lambda: s[4])
-
- # fancy floats/integers create the correct entry (as nan)
- # fancy tests
- expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
- for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
- assert_series_equal(s[fancy_idx], expected)
- assert_series_equal(s.loc[fancy_idx], expected)
- assert_series_equal(s.ix[fancy_idx], expected)
-
- expected = Series([2, 0], index=Index([5, 0], dtype='int64'))
- for fancy_idx in [[5, 0], np.array([5, 0])]: # int
- assert_series_equal(s[fancy_idx], expected)
- assert_series_equal(s.loc[fancy_idx], expected)
- assert_series_equal(s.ix[fancy_idx], expected)
-
- # all should return the same as we are slicing 'the same'
- result1 = s.loc[2:5]
- result2 = s.loc[2.0:5.0]
- result3 = s.loc[2.0:5]
- result4 = s.loc[2.1:5]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
- assert_series_equal(result1, result4)
-
- # previously this did fallback indexing
- result1 = s[2:5]
- result2 = s[2.0:5.0]
- result3 = s[2.0:5]
- result4 = s[2.1:5]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
- assert_series_equal(result1, result4)
-
- result1 = s.ix[2:5]
- result2 = s.ix[2.0:5.0]
- result3 = s.ix[2.0:5]
- result4 = s.ix[2.1:5]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
- assert_series_equal(result1, result4)
-
- # combined test
- result1 = s.loc[2:5]
- result2 = s.ix[2:5]
- result3 = s[2:5]
-
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
-
- # list selection
- result1 = s[[0.0, 5, 10]]
- result2 = s.loc[[0.0, 5, 10]]
- result3 = s.ix[[0.0, 5, 10]]
- result4 = s.iloc[[0, 2, 4]]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
- assert_series_equal(result1, result4)
-
- result1 = s[[1.6, 5, 10]]
- result2 = s.loc[[1.6, 5, 10]]
- result3 = s.ix[[1.6, 5, 10]]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
- assert_series_equal(result1, Series(
- [np.nan, 2, 4], index=[1.6, 5, 10]))
-
- result1 = s[[0, 1, 2]]
- result2 = s.ix[[0, 1, 2]]
- result3 = s.loc[[0, 1, 2]]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
- assert_series_equal(result1, Series(
- [0.0, np.nan, np.nan], index=[0, 1, 2]))
-
- result1 = s.loc[[2.5, 5]]
- result2 = s.ix[[2.5, 5]]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0]))
-
- result1 = s[[2.5]]
- result2 = s.ix[[2.5]]
- result3 = s.loc[[2.5]]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
- assert_series_equal(result1, Series([1], index=[2.5]))
-
- def test_scalar_indexer(self):
- # float indexing checked above
-
- def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None):
-
- # related 236/4850
- # trying to access with a float index
- s = Series(np.arange(len(index)), index=index)
-
- if iloc is None:
- iloc = TypeError
- self.assertRaises(iloc, lambda: s.iloc[3.5])
- if loc is None:
- loc = TypeError
- self.assertRaises(loc, lambda: s.loc[3.5])
- if ix is None:
- ix = TypeError
- self.assertRaises(ix, lambda: s.ix[3.5])
- if getitem is None:
- getitem = TypeError
- self.assertRaises(getitem, lambda: s[3.5])
-
- for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
- tm.makeIntIndex, tm.makeRangeIndex, tm.makeDateIndex,
- tm.makePeriodIndex]:
- check_invalid(index())
- check_invalid(Index(np.arange(5) * 2.5),
- loc=KeyError,
- ix=KeyError,
- getitem=KeyError)
-
- def check_index(index, error):
- index = index()
- s = Series(np.arange(len(index)), index=index)
-
- # positional selection
- result1 = s[5]
- self.assertRaises(TypeError, lambda: s[5.0])
- result3 = s.iloc[5]
- self.assertRaises(TypeError, lambda: s.iloc[5.0])
-
- # by value
- self.assertRaises(TypeError, lambda: s.loc[5])
- self.assertRaises(TypeError, lambda: s.loc[5.0])
-
- # this is fallback, so it works
- result5 = s.ix[5]
- self.assertRaises(error, lambda: s.ix[5.0])
-
- self.assertEqual(result1, result3)
- self.assertEqual(result1, result5)
-
- # string-like
- for index in [tm.makeStringIndex, tm.makeUnicodeIndex]:
- check_index(index, TypeError)
-
- # datetimelike
- for index in [tm.makeDateIndex, tm.makeTimedeltaIndex,
- tm.makePeriodIndex]:
- check_index(index, TypeError)
-
- # exact indexing when found on IntIndex
- s = Series(np.arange(10), dtype='int64')
-
- self.assertRaises(TypeError, lambda: s[5.0])
- self.assertRaises(TypeError, lambda: s.loc[5.0])
- self.assertRaises(TypeError, lambda: s.ix[5.0])
- result4 = s[5]
- result5 = s.loc[5]
- result6 = s.ix[5]
- self.assertEqual(result4, result5)
- self.assertEqual(result4, result6)
-
- def test_slice_indexer(self):
- def check_iloc_compat(s):
- # these are exceptions
- self.assertRaises(TypeError, lambda: s.iloc[6.0:8])
- self.assertRaises(TypeError, lambda: s.iloc[6.0:8.0])
- self.assertRaises(TypeError, lambda: s.iloc[6:8.0])
-
- def check_slicing_positional(index):
-
- s = Series(np.arange(len(index)) + 10, index=index)
-
- # these are all positional
- result1 = s[2:5]
- result2 = s.ix[2:5]
- result3 = s.iloc[2:5]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
-
- # loc will fail
- self.assertRaises(TypeError, lambda: s.loc[2:5])
-
- # make all float slicing fail
- self.assertRaises(TypeError, lambda: s[2.0:5])
- self.assertRaises(TypeError, lambda: s[2.0:5.0])
- self.assertRaises(TypeError, lambda: s[2:5.0])
-
- self.assertRaises(TypeError, lambda: s.ix[2.0:5])
- self.assertRaises(TypeError, lambda: s.ix[2.0:5.0])
- self.assertRaises(TypeError, lambda: s.ix[2:5.0])
-
- self.assertRaises(TypeError, lambda: s.loc[2.0:5])
- self.assertRaises(TypeError, lambda: s.loc[2.0:5.0])
- self.assertRaises(TypeError, lambda: s.loc[2:5.0])
-
- check_iloc_compat(s)
-
- # all index types except int, float
- for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
- tm.makeDateIndex, tm.makeTimedeltaIndex,
- tm.makePeriodIndex]:
- check_slicing_positional(index())
-
- ############
- # IntIndex #
- ############
- for index in [tm.makeIntIndex(), tm.makeRangeIndex()]:
-
- s = Series(np.arange(len(index), dtype='int64') + 10, index + 5)
-
- # this is positional
- result1 = s[2:5]
- result4 = s.iloc[2:5]
- assert_series_equal(result1, result4)
-
- # these are all label based
- result2 = s.ix[2:5]
- result3 = s.loc[2:5]
- assert_series_equal(result2, result3)
-
- # float slicers on an int index with ix
- expected = Series([11, 12, 13], index=[6, 7, 8])
- result = s.ix[6.0:8.5]
- assert_series_equal(result, expected)
-
- result = s.ix[5.5:8.5]
- assert_series_equal(result, expected)
-
- result = s.ix[5.5:8.0]
- assert_series_equal(result, expected)
-
- for method in ['loc', 'iloc']:
- # make all float slicing fail for .loc with an int index
- self.assertRaises(TypeError,
- lambda: getattr(s, method)[6.0:8])
- self.assertRaises(TypeError,
- lambda: getattr(s, method)[6.0:8.0])
- self.assertRaises(TypeError,
- lambda: getattr(s, method)[6:8.0])
-
- # make all float slicing fail for [] with an int index
- self.assertRaises(TypeError, lambda: s[6.0:8])
- self.assertRaises(TypeError, lambda: s[6.0:8.0])
- self.assertRaises(TypeError, lambda: s[6:8.0])
-
- check_iloc_compat(s)
-
- ##############
- # FloatIndex #
- ##############
- s.index = s.index.astype('float64')
-
- # these are all value based
- result1 = s[6:8]
- result2 = s.ix[6:8]
- result3 = s.loc[6:8]
- assert_series_equal(result1, result2)
- assert_series_equal(result1, result3)
-
- # these are valid for all methods
- # these are treated like labels (e.g. the rhs IS included)
- def compare(slicers, expected):
- for method in [lambda x: x, lambda x: x.loc, lambda x: x.ix]:
- for slices in slicers:
-
- result = method(s)[slices]
- assert_series_equal(result, expected)
-
- compare([slice(6.0, 8), slice(6.0, 8.0), slice(6, 8.0)],
- s[(s.index >= 6.0) & (s.index <= 8)])
- compare([slice(6.5, 8), slice(6.5, 8.5)],
- s[(s.index >= 6.5) & (s.index <= 8.5)])
- compare([slice(6, 8.5)], s[(s.index >= 6.0) & (s.index <= 8.5)])
- compare([slice(6.5, 6.5)], s[(s.index >= 6.5) & (s.index <= 6.5)])
-
- check_iloc_compat(s)
-
def test_set_ix_out_of_bounds_axis_0(self):
df = pd.DataFrame(
randn(2, 5), index=["row%s" % i for i in range(2)],
@@ -5339,347 +5044,46 @@ def test_index_type_coercion(self):
self.assertTrue(s.index.is_integer())
- for attr in ['ix', 'loc']:
+ for indexer in [lambda x: x.ix,
+ lambda x: x.loc,
+ lambda x: x]:
s2 = s.copy()
- getattr(s2, attr)[0.1] = 0
+ indexer(s2)[0.1] = 0
self.assertTrue(s2.index.is_floating())
- self.assertTrue(getattr(s2, attr)[0.1] == 0)
+ self.assertTrue(indexer(s2)[0.1] == 0)
s2 = s.copy()
- getattr(s2, attr)[0.0] = 0
+ indexer(s2)[0.0] = 0
exp = s.index
if 0 not in s:
exp = Index(s.index.tolist() + [0])
tm.assert_index_equal(s2.index, exp)
s2 = s.copy()
- getattr(s2, attr)['0'] = 0
+ indexer(s2)['0'] = 0
self.assertTrue(s2.index.is_object())
- # setitem
- s2 = s.copy()
- s2[0.1] = 0
- self.assertTrue(s2.index.is_floating())
- self.assertTrue(s2[0.1] == 0)
-
- s2 = s.copy()
- s2[0.0] = 0
- exp = s.index
- if 0 not in s:
- exp = Index(s.index.tolist() + [0])
- tm.assert_index_equal(s2.index, exp)
-
- s2 = s.copy()
- s2['0'] = 0
- self.assertTrue(s2.index.is_object())
-
for s in [Series(range(5), index=np.arange(5.))]:
self.assertTrue(s.index.is_floating())
- for attr in ['ix', 'loc']:
+ for idxr in [lambda x: x.ix,
+ lambda x: x.loc,
+ lambda x: x]:
s2 = s.copy()
- getattr(s2, attr)[0.1] = 0
+ idxr(s2)[0.1] = 0
self.assertTrue(s2.index.is_floating())
- self.assertTrue(getattr(s2, attr)[0.1] == 0)
+ self.assertTrue(idxr(s2)[0.1] == 0)
s2 = s.copy()
- getattr(s2, attr)[0.0] = 0
+ idxr(s2)[0.0] = 0
tm.assert_index_equal(s2.index, s.index)
s2 = s.copy()
- getattr(s2, attr)['0'] = 0
+ idxr(s2)['0'] = 0
self.assertTrue(s2.index.is_object())
- # setitem
- s2 = s.copy()
- s2[0.1] = 0
- self.assertTrue(s2.index.is_floating())
- self.assertTrue(s2[0.1] == 0)
-
- s2 = s.copy()
- s2[0.0] = 0
- tm.assert_index_equal(s2.index, s.index)
-
- s2 = s.copy()
- s2['0'] = 0
- self.assertTrue(s2.index.is_object())
-
- def test_invalid_scalar_float_indexers_error(self):
-
- for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
- tm.makeCategoricalIndex,
- tm.makeDateIndex, tm.makeTimedeltaIndex,
- tm.makePeriodIndex]:
-
- i = index(5)
-
- s = Series(np.arange(len(i)), index=i)
-
- def f():
- s.iloc[3.0]
- self.assertRaisesRegexp(TypeError,
- 'cannot do positional indexing',
- f)
-
- def test_invalid_scalar_float_indexers(self):
-
- # GH 4892
- # float_indexers should raise exceptions
- # on appropriate Index types & accessors
-
- for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
- tm.makeCategoricalIndex,
- tm.makeDateIndex, tm.makeTimedeltaIndex,
- tm.makePeriodIndex]:
-
- i = index(5)
-
- for s in [Series(
- np.arange(len(i)), index=i), DataFrame(
- np.random.randn(
- len(i), len(i)), index=i, columns=i)]:
-
- for attr in ['iloc', 'loc', 'ix', '__getitem__']:
- def f():
- getattr(s, attr)()[3.0]
- self.assertRaises(TypeError, f)
-
- # setting only fails with iloc as
- # the others expand the index
- def f():
- s.iloc[3.0] = 0
- self.assertRaises(TypeError, f)
-
- # fallsback to position selection ,series only
- s = Series(np.arange(len(i)), index=i)
- s[3]
- self.assertRaises(TypeError, lambda: s[3.0])
-
- # integer index
- for index in [tm.makeIntIndex, tm.makeRangeIndex]:
-
- i = index(5)
- for s in [Series(np.arange(len(i))),
- DataFrame(np.random.randn(len(i), len(i)),
- index=i, columns=i)]:
-
- # any kind of get access should fail
- for attr in ['iloc', 'loc', 'ix']:
- def f():
- getattr(s, attr)[3.0]
- self.assertRaises(TypeError, f)
- error = KeyError if isinstance(s, DataFrame) else TypeError
- self.assertRaises(error, lambda: s[3.0])
-
- # setting only fails with iloc as
- def f():
- s.iloc[3.0] = 0
- self.assertRaises(TypeError, f)
-
- # other indexers will coerce to an object index
- # tested explicity in: test_invalid_scalar_float_indexers
- # above
-
- # floats index
- index = tm.makeFloatIndex(5)
- for s in [Series(np.arange(len(index)), index=index),
- DataFrame(np.random.randn(len(index), len(index)),
- index=index, columns=index)]:
-
- # assert all operations except for iloc are ok
- indexer = index[3]
- expected = s.iloc[3]
-
- if isinstance(s, Series):
- compare = self.assertEqual
- else:
- compare = tm.assert_series_equal
-
- for attr in ['loc', 'ix']:
-
- # getting
- result = getattr(s, attr)[indexer]
- compare(result, expected)
-
- # setting
- s2 = s.copy()
-
- def f():
- getattr(s2, attr)[indexer] = expected
- result = getattr(s2, attr)[indexer]
- compare(result, expected)
-
- # random integer is a KeyError
- self.assertRaises(KeyError, lambda: getattr(s, attr)[3])
-
- # iloc succeeds with an integer
- result = s.iloc[3]
- compare(result, expected)
-
- s2 = s.copy()
-
- def f():
- s2.iloc[3] = expected
- result = s2.iloc[3]
- compare(result, expected)
-
- # iloc raises with a float
- self.assertRaises(TypeError, lambda: s.iloc[3.0])
-
- def f():
- s.iloc[3.0] = 0
- self.assertRaises(TypeError, f)
-
- # getitem
-
- # getting
- if isinstance(s, DataFrame):
- expected = s.iloc[:, 3]
- result = s[indexer]
- compare(result, expected)
-
- # setting
- s2 = s.copy()
-
- def f():
- s2[indexer] = expected
- result = s2[indexer]
- compare(result, expected)
-
- # random integer is a KeyError
- result = self.assertRaises(KeyError, lambda: s[3])
-
- def test_invalid_slice_float_indexers(self):
-
- # GH 4892
- # float_indexers should raise exceptions
- # on appropriate Index types & accessors
-
- for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
- tm.makeDateIndex, tm.makeTimedeltaIndex,
- tm.makePeriodIndex]:
-
- index = index(5)
- for s in [Series(range(5), index=index),
- DataFrame(np.random.randn(5, 2), index=index)]:
-
- # getitem
- for l in [slice(3.0, 4),
- slice(3, 4.0),
- slice(3.0, 4.0)]:
-
- def f():
- s.iloc[l]
- self.assertRaises(TypeError, f)
-
- def f():
- s.loc[l]
- self.assertRaises(TypeError, f)
-
- def f():
- s[l]
- self.assertRaises(TypeError, f)
-
- def f():
- s.ix[l]
- self.assertRaises(TypeError, f)
-
- # setitem
- for l in [slice(3.0, 4),
- slice(3, 4.0),
- slice(3.0, 4.0)]:
-
- def f():
- s.iloc[l] = 0
- self.assertRaises(TypeError, f)
-
- def f():
- s.loc[l] = 0
- self.assertRaises(TypeError, f)
-
- def f():
- s[l] = 0
- self.assertRaises(TypeError, f)
-
- def f():
- s.ix[l] = 0
- self.assertRaises(TypeError, f)
-
- # same as above, but for Integer based indexes
- for index in [tm.makeIntIndex, tm.makeRangeIndex]:
-
- index = index(5)
- for s in [Series(range(5), index=index),
- DataFrame(np.random.randn(5, 2), index=index)]:
-
- # getitem
- for l in [slice(3.0, 4),
- slice(3, 4.0),
- slice(3.0, 4.0)]:
-
- def f():
- s.iloc[l]
- self.assertRaises(TypeError, f)
-
- def f():
- s.loc[l]
- self.assertRaises(TypeError, f)
-
- def f():
- s[l]
- self.assertRaises(TypeError, f)
-
- # ix allows float slicing
- s.ix[l]
-
- # setitem
- for l in [slice(3.0, 4),
- slice(3, 4.0),
- slice(3.0, 4.0)]:
-
- def f():
- s.iloc[l] = 0
- self.assertRaises(TypeError, f)
-
- def f():
- s.loc[l] = 0
- self.assertRaises(TypeError, f)
-
- def f():
- s[l] = 0
- self.assertRaises(TypeError, f)
-
- # ix allows float slicing
- s.ix[l] = 0
-
- # same as above, but for floats
- index = tm.makeFloatIndex(5)
- for s in [Series(range(5), index=index),
- DataFrame(np.random.randn(5, 2), index=index)]:
-
- # getitem
- for l in [slice(3.0, 4),
- slice(3, 4.0),
- slice(3.0, 4.0)]:
-
- # ix is ok
- result1 = s.ix[3:4]
- result2 = s.ix[3.0:4]
- result3 = s.ix[3.0:4.0]
- result4 = s.ix[3:4.0]
- self.assertTrue(result1.equals(result2))
- self.assertTrue(result1.equals(result3))
- self.assertTrue(result1.equals(result4))
-
- # setitem
- for l in [slice(3.0, 4),
- slice(3, 4.0),
- slice(3.0, 4.0)]:
-
- pass
-
def test_float_index_to_mixed(self):
df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)})
df['a'] = 10
@@ -5906,338 +5310,6 @@ def test_maybe_numeric_slice(self):
self.assertEqual(result, expected)
-class TestCategoricalIndex(tm.TestCase):
-
- def setUp(self):
-
- self.df = DataFrame({'A': np.arange(6, dtype='int64'),
- 'B': Series(list('aabbca')).astype(
- 'category', categories=list(
- 'cab'))}).set_index('B')
- self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
- 'B': Series(list('aabbca')).astype(
- 'category', categories=list(
- 'cabe'))}).set_index('B')
- self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
- 'B': (Series([1, 1, 2, 1, 3, 2])
- .astype('category', categories=[3, 2, 1],
- ordered=True))}).set_index('B')
- self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
- 'B': (Series([1, 1, 2, 1, 3, 2])
- .astype('category', categories=[3, 2, 1],
- ordered=False))}).set_index('B')
-
- def test_loc_scalar(self):
- result = self.df.loc['a']
- expected = (DataFrame({'A': [0, 1, 5],
- 'B': (Series(list('aaa'))
- .astype('category',
- categories=list('cab')))})
- .set_index('B'))
- assert_frame_equal(result, expected)
-
- df = self.df.copy()
- df.loc['a'] = 20
- expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20],
- 'B': (Series(list('aabbca'))
- .astype('category',
- categories=list('cab')))})
- .set_index('B'))
- assert_frame_equal(df, expected)
-
- # value not in the categories
- self.assertRaises(KeyError, lambda: df.loc['d'])
-
- def f():
- df.loc['d'] = 10
-
- self.assertRaises(TypeError, f)
-
- def f():
- df.loc['d', 'A'] = 10
-
- self.assertRaises(TypeError, f)
-
- def f():
- df.loc['d', 'C'] = 10
-
- self.assertRaises(TypeError, f)
-
- def test_loc_listlike(self):
-
- # list of labels
- result = self.df.loc[['c', 'a']]
- expected = self.df.iloc[[4, 0, 1, 5]]
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.loc[['a', 'b', 'e']]
- exp_index = pd.CategoricalIndex(
- list('aaabbe'), categories=list('cabe'), name='B')
- expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
- assert_frame_equal(result, expected, check_index_type=True)
-
- # element in the categories but not in the values
- self.assertRaises(KeyError, lambda: self.df2.loc['e'])
-
- # assign is ok
- df = self.df2.copy()
- df.loc['e'] = 20
- result = df.loc[['a', 'b', 'e']]
- exp_index = pd.CategoricalIndex(
- list('aaabbe'), categories=list('cabe'), name='B')
- expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
- assert_frame_equal(result, expected)
-
- df = self.df2.copy()
- result = df.loc[['a', 'b', 'e']]
- exp_index = pd.CategoricalIndex(
- list('aaabbe'), categories=list('cabe'), name='B')
- expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
- assert_frame_equal(result, expected, check_index_type=True)
-
- # not all labels in the categories
- self.assertRaises(KeyError, lambda: self.df2.loc[['a', 'd']])
-
- def test_loc_listlike_dtypes(self):
- # GH 11586
-
- # unique categories and codes
- index = pd.CategoricalIndex(['a', 'b', 'c'])
- df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
-
- # unique slice
- res = df.loc[['a', 'b']]
- exp = DataFrame({'A': [1, 2],
- 'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b']))
- tm.assert_frame_equal(res, exp, check_index_type=True)
-
- # duplicated slice
- res = df.loc[['a', 'a', 'b']]
- exp = DataFrame({'A': [1, 1, 2],
- 'B': [4, 4, 5]},
- index=pd.CategoricalIndex(['a', 'a', 'b']))
- tm.assert_frame_equal(res, exp, check_index_type=True)
-
- with tm.assertRaisesRegexp(
- KeyError,
- 'a list-indexer must only include values that are '
- 'in the categories'):
- df.loc[['a', 'x']]
-
- # duplicated categories and codes
- index = pd.CategoricalIndex(['a', 'b', 'a'])
- df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
-
- # unique slice
- res = df.loc[['a', 'b']]
- exp = DataFrame({'A': [1, 3, 2],
- 'B': [4, 6, 5]},
- index=pd.CategoricalIndex(['a', 'a', 'b']))
- tm.assert_frame_equal(res, exp, check_index_type=True)
-
- # duplicated slice
- res = df.loc[['a', 'a', 'b']]
- exp = DataFrame(
- {'A': [1, 3, 1, 3, 2],
- 'B': [4, 6, 4, 6, 5
- ]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
- tm.assert_frame_equal(res, exp, check_index_type=True)
-
- with tm.assertRaisesRegexp(
- KeyError,
- 'a list-indexer must only include values '
- 'that are in the categories'):
- df.loc[['a', 'x']]
-
- # contains unused category
- index = pd.CategoricalIndex(
- ['a', 'b', 'a', 'c'], categories=list('abcde'))
- df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
-
- res = df.loc[['a', 'b']]
- exp = DataFrame({'A': [1, 3, 2],
- 'B': [5, 7, 6]}, index=pd.CategoricalIndex(
- ['a', 'a', 'b'], categories=list('abcde')))
- tm.assert_frame_equal(res, exp, check_index_type=True)
-
- res = df.loc[['a', 'e']]
- exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
- index=pd.CategoricalIndex(['a', 'a', 'e'],
- categories=list('abcde')))
- tm.assert_frame_equal(res, exp, check_index_type=True)
-
- # duplicated slice
- res = df.loc[['a', 'a', 'b']]
- exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
- index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
- categories=list('abcde')))
- tm.assert_frame_equal(res, exp, check_index_type=True)
-
- with tm.assertRaisesRegexp(
- KeyError,
- 'a list-indexer must only include values '
- 'that are in the categories'):
- df.loc[['a', 'x']]
-
- def test_read_only_source(self):
- # GH 10043
- rw_array = np.eye(10)
- rw_df = DataFrame(rw_array)
-
- ro_array = np.eye(10)
- ro_array.setflags(write=False)
- ro_df = DataFrame(ro_array)
-
- assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
- assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
- assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
- assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
-
- assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
- assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
- assert_series_equal(rw_df.loc[1], ro_df.loc[1])
- assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
-
- def test_reindexing(self):
-
- # reindexing
- # convert to a regular index
- result = self.df2.reindex(['a', 'b', 'e'])
- expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
- 'B': Series(list('aaabbe'))}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(['a', 'b'])
- expected = DataFrame({'A': [0, 1, 5, 2, 3],
- 'B': Series(list('aaabb'))}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(['e'])
- expected = DataFrame({'A': [np.nan],
- 'B': Series(['e'])}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(['d'])
- expected = DataFrame({'A': [np.nan],
- 'B': Series(['d'])}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- # since we are actually reindexing with a Categorical
- # then return a Categorical
- cats = list('cabe')
-
- result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats))
- expected = DataFrame({'A': [0, 1, 5, np.nan],
- 'B': Series(list('aaad')).astype(
- 'category', categories=cats)}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(pd.Categorical(['a'], categories=cats))
- expected = DataFrame({'A': [0, 1, 5],
- 'B': Series(list('aaa')).astype(
- 'category', categories=cats)}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(['a', 'b', 'e'])
- expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
- 'B': Series(list('aaabbe'))}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(['a', 'b'])
- expected = DataFrame({'A': [0, 1, 5, 2, 3],
- 'B': Series(list('aaabb'))}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(['e'])
- expected = DataFrame({'A': [np.nan],
- 'B': Series(['e'])}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- # give back the type of categorical that we received
- result = self.df2.reindex(pd.Categorical(
- ['a', 'd'], categories=cats, ordered=True))
- expected = DataFrame(
- {'A': [0, 1, 5, np.nan],
- 'B': Series(list('aaad')).astype('category', categories=cats,
- ordered=True)}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- result = self.df2.reindex(pd.Categorical(
- ['a', 'd'], categories=['a', 'd']))
- expected = DataFrame({'A': [0, 1, 5, np.nan],
- 'B': Series(list('aaad')).astype(
- 'category', categories=['a', 'd'
- ])}).set_index('B')
- assert_frame_equal(result, expected, check_index_type=True)
-
- # passed duplicate indexers are not allowed
- self.assertRaises(ValueError, lambda: self.df2.reindex(['a', 'a']))
-
- # args NotImplemented ATM
- self.assertRaises(NotImplementedError,
- lambda: self.df2.reindex(['a'], method='ffill'))
- self.assertRaises(NotImplementedError,
- lambda: self.df2.reindex(['a'], level=1))
- self.assertRaises(NotImplementedError,
- lambda: self.df2.reindex(['a'], limit=2))
-
- def test_loc_slice(self):
- # slicing
- # not implemented ATM
- # GH9748
-
- self.assertRaises(TypeError, lambda: self.df.loc[1:5])
-
- # result = df.loc[1:5]
- # expected = df.iloc[[1,2,3,4]]
- # assert_frame_equal(result, expected)
-
- def test_boolean_selection(self):
-
- df3 = self.df3
- df4 = self.df4
-
- result = df3[df3.index == 'a']
- expected = df3.iloc[[]]
- assert_frame_equal(result, expected)
-
- result = df4[df4.index == 'a']
- expected = df4.iloc[[]]
- assert_frame_equal(result, expected)
-
- result = df3[df3.index == 1]
- expected = df3.iloc[[0, 1, 3]]
- assert_frame_equal(result, expected)
-
- result = df4[df4.index == 1]
- expected = df4.iloc[[0, 1, 3]]
- assert_frame_equal(result, expected)
-
- # since we have an ordered categorical
-
- # CategoricalIndex([1, 1, 2, 1, 3, 2],
- # categories=[3, 2, 1],
- # ordered=True,
- # name=u'B')
- result = df3[df3.index < 2]
- expected = df3.iloc[[4]]
- assert_frame_equal(result, expected)
-
- result = df3[df3.index > 1]
- expected = df3.iloc[[]]
- assert_frame_equal(result, expected)
-
- # unordered
- # cannot be compared
-
- # CategoricalIndex([1, 1, 2, 1, 3, 2],
- # categories=[3, 2, 1],
- # ordered=False,
- # name=u'B')
- self.assertRaises(TypeError, lambda: df4[df4.index < 2])
- self.assertRaises(TypeError, lambda: df4[df4.index > 1])
-
-
class TestSeriesNoneCoercion(tm.TestCase):
EXPECTED_RESULTS = [
# For numeric series, we should coerce to NaN.
diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py
index d4121fb86de79..6e9df1661d139 100644
--- a/pandas/tests/series/test_datetime_values.py
+++ b/pandas/tests/series/test_datetime_values.py
@@ -32,8 +32,8 @@ def test_dt_namespace_accessor(self):
'weekofyear', 'week', 'dayofweek', 'weekday',
'dayofyear', 'quarter', 'freq', 'days_in_month',
'daysinmonth']
- ok_for_period = ok_for_base + ['qyear']
- ok_for_period_methods = ['strftime']
+ ok_for_period = ok_for_base + ['qyear', 'start_time', 'end_time']
+ ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq']
ok_for_dt = ok_for_base + ['date', 'time', 'microsecond', 'nanosecond',
'is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end',
diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py
index 2ea95b4e0b300..68c0b81eb18ce 100644
--- a/pandas/tests/test_compat.py
+++ b/pandas/tests/test_compat.py
@@ -4,7 +4,8 @@
"""
from pandas.compat import (range, zip, map, filter, lrange, lzip, lmap,
- lfilter, builtins)
+ lfilter, builtins, iterkeys, itervalues, iteritems,
+ next)
import pandas.util.testing as tm
@@ -61,3 +62,8 @@ def test_zip(self):
expected = list(builtins.zip(*lst)),
lengths = 10,
self.check_result(actual, expected, lengths)
+
+ def test_dict_iterators(self):
+ self.assertEqual(next(itervalues({1: 2})), 2)
+ self.assertEqual(next(iterkeys({1: 2})), 1)
+ self.assertEqual(next(iteritems({1: 2})), (1, 2))
diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py
index a309d88e62857..1198d6b194c60 100644
--- a/pandas/tests/test_generic.py
+++ b/pandas/tests/test_generic.py
@@ -956,7 +956,8 @@ def test_describe_objects(self):
s = Series(['a', 'b', 'b', np.nan, np.nan, np.nan, 'c', 'd', 'a', 'a'])
result = s.describe()
expected = Series({'count': 7, 'unique': 4,
- 'top': 'a', 'freq': 3}, index=result.index)
+ 'top': 'a', 'freq': 3, 'second': 'b',
+ 'second_freq': 2}, index=result.index)
assert_series_equal(result, expected)
dt = list(self.ts.index)
@@ -1487,9 +1488,8 @@ def test_describe_typefiltering_category_bool(self):
'D_num': np.arange(24.) + .5,
'E_ts': tm.makeTimeSeries()[:24].index})
- # bool is considered numeric in describe, although not an np.number
desc = df.describe()
- expected_cols = ['C_bool', 'D_num']
+ expected_cols = ['D_num']
expected = DataFrame(dict((k, df[k].describe())
for k in expected_cols),
columns=expected_cols)
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index b339d25cd6c45..45d3fd0dad855 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -17,7 +17,9 @@
from pandas.util.decorators import cache_readonly
import pandas.core.common as com
import pandas.util.testing as tm
-from pandas.util.testing import ensure_clean
+from pandas.util.testing import (ensure_clean,
+ assert_is_valid_plot_return_object)
+
from pandas.core.config import set_option
import numpy as np
@@ -60,8 +62,8 @@ def setUp(self):
n = 100
with tm.RNGContext(42):
- gender = tm.choice(['Male', 'Female'], size=n)
- classroom = tm.choice(['A', 'B', 'C'], size=n)
+ gender = np.random.choice(['Male', 'Female'], size=n)
+ classroom = np.random.choice(['A', 'B', 'C'], size=n)
self.hist_df = DataFrame({'gender': gender,
'classroom': classroom,
@@ -3861,7 +3863,7 @@ def test_series_groupby_plotting_nominally_works(self):
weight = Series(np.random.normal(166, 20, size=n))
height = Series(np.random.normal(60, 10, size=n))
with tm.RNGContext(42):
- gender = tm.choice(['male', 'female'], size=n)
+ gender = np.random.choice(['male', 'female'], size=n)
weight.groupby(gender).plot()
tm.close()
@@ -3916,21 +3918,6 @@ def test_plot_kwargs(self):
self.assertEqual(len(res['a'].collections), 1)
-def assert_is_valid_plot_return_object(objs):
- import matplotlib.pyplot as plt
- if isinstance(objs, np.ndarray):
- for el in objs.flat:
- assert isinstance(el, plt.Axes), ('one of \'objs\' is not a '
- 'matplotlib Axes instance, '
- 'type encountered {0!r}'
- ''.format(el.__class__.__name__))
- else:
- assert isinstance(objs, (plt.Artist, tuple, dict)), \
- ('objs is neither an ndarray of Artist instances nor a '
- 'single Artist instance, tuple, or dict, "objs" is a {0!r} '
- ''.format(objs.__class__.__name__))
-
-
def _check_plot_works(f, filterwarnings='always', **kwargs):
import matplotlib.pyplot as plt
ret = None
diff --git a/pandas/tests/test_graphics_others.py b/pandas/tests/test_graphics_others.py
index 983d0c310f71d..b032ce196c113 100644
--- a/pandas/tests/test_graphics_others.py
+++ b/pandas/tests/test_graphics_others.py
@@ -641,7 +641,7 @@ def test_grouped_plot_fignums(self):
weight = Series(np.random.normal(166, 20, size=n))
height = Series(np.random.normal(60, 10, size=n))
with tm.RNGContext(42):
- gender = tm.choice(['male', 'female'], size=n)
+ gender = np.random.choice(['male', 'female'], size=n)
df = DataFrame({'height': height, 'weight': weight, 'gender': gender})
gb = df.groupby('gender')
@@ -715,7 +715,7 @@ def test_grouped_hist_legacy2(self):
weight = Series(np.random.normal(166, 20, size=n))
height = Series(np.random.normal(60, 10, size=n))
with tm.RNGContext(42):
- gender_int = tm.choice([0, 1], size=n)
+ gender_int = np.random.choice([0, 1], size=n)
df_int = DataFrame({'height': height, 'weight': weight,
'gender': gender_int})
gb = df_int.groupby('gender')
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 7ee40a7758011..947daab2017d3 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -3993,11 +3993,13 @@ def test_groupby_groups_datetimeindex_tz(self):
df['datetime'] = df['datetime'].apply(
lambda d: Timestamp(d, tz='US/Pacific'))
- exp_idx1 = pd.DatetimeIndex(
- ['2011-07-19 07:00:00', '2011-07-19 07:00:00',
- '2011-07-19 08:00:00', '2011-07-19 08:00:00',
- '2011-07-19 09:00:00', '2011-07-19 09:00:00'],
- tz='US/Pacific', name='datetime')
+ exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00',
+ '2011-07-19 07:00:00',
+ '2011-07-19 08:00:00',
+ '2011-07-19 08:00:00',
+ '2011-07-19 09:00:00',
+ '2011-07-19 09:00:00'],
+ tz='US/Pacific', name='datetime')
exp_idx2 = Index(['a', 'b'] * 3, name='label')
exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
@@ -4013,9 +4015,9 @@ def test_groupby_groups_datetimeindex_tz(self):
'value2': [1, 2, 3, 1, 2, 3]},
index=didx)
- exp_idx = pd.DatetimeIndex(
- ['2011-07-19 07:00:00', '2011-07-19 08:00:00',
- '2011-07-19 09:00:00'], tz='Asia/Tokyo')
+ exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00',
+ '2011-07-19 08:00:00',
+ '2011-07-19 09:00:00'], tz='Asia/Tokyo')
expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
index=exp_idx, columns=['value1', 'value2'])
@@ -4032,8 +4034,8 @@ def test_groupby_multi_timezone(self):
3,2000-01-31 16:50:00,America/Chicago
4,2000-01-01 16:50:00,America/New_York"""
- df = pd.read_csv(
- StringIO(data), header=None, names=['value', 'date', 'tz'])
+ df = pd.read_csv(StringIO(data), header=None,
+ names=['value', 'date', 'tz'])
result = df.groupby('tz').date.apply(
lambda x: pd.to_datetime(x).dt.tz_localize(x.name))
@@ -4051,14 +4053,54 @@ def test_groupby_multi_timezone(self):
assert_series_equal(result, expected)
tz = 'America/Chicago'
- result = pd.to_datetime(df.groupby('tz').date.get_group(
- tz)).dt.tz_localize(tz)
- expected = pd.to_datetime(Series(
- ['2000-01-28 16:47:00', '2000-01-29 16:48:00',
- '2000-01-31 16:50:00'], index=[0, 1, 3
- ], name='date')).dt.tz_localize(tz)
+ res_values = df.groupby('tz').date.get_group(tz)
+ result = pd.to_datetime(res_values).dt.tz_localize(tz)
+ exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00',
+ '2000-01-31 16:50:00'],
+ index=[0, 1, 3], name='date')
+ expected = pd.to_datetime(exp_values).dt.tz_localize(tz)
assert_series_equal(result, expected)
+ def test_groupby_groups_periods(self):
+ dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00',
+ '2011-07-19 09:00:00', '2011-07-19 07:00:00',
+ '2011-07-19 08:00:00', '2011-07-19 09:00:00']
+ df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'],
+ 'period': [pd.Period(d, freq='H') for d in dates],
+ 'value1': np.arange(6, dtype='int64'),
+ 'value2': [1, 2] * 3})
+
+ exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00',
+ '2011-07-19 07:00:00',
+ '2011-07-19 08:00:00',
+ '2011-07-19 08:00:00',
+ '2011-07-19 09:00:00',
+ '2011-07-19 09:00:00'],
+ freq='H', name='period')
+ exp_idx2 = Index(['a', 'b'] * 3, name='label')
+ exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
+ expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
+ 'value2': [1, 2, 2, 1, 1, 2]},
+ index=exp_idx, columns=['value1', 'value2'])
+
+ result = df.groupby(['period', 'label']).sum()
+ assert_frame_equal(result, expected)
+
+ # by level
+ didx = pd.PeriodIndex(dates, freq='H')
+ df = DataFrame({'value1': np.arange(6, dtype='int64'),
+ 'value2': [1, 2, 3, 1, 2, 3]},
+ index=didx)
+
+ exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00',
+ '2011-07-19 08:00:00',
+ '2011-07-19 09:00:00'], freq='H')
+ expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
+ index=exp_idx, columns=['value1', 'value2'])
+
+ result = df.groupby(level=0).sum()
+ assert_frame_equal(result, expected)
+
def test_groupby_reindex_inside_function(self):
from pandas.tseries.api import DatetimeIndex
@@ -5568,7 +5610,8 @@ def test_tab_completion(self):
'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum',
'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take',
'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov',
- 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin'])
+ 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin',
+ 'ffill', 'bfill', 'pad', 'backfill'])
self.assertEqual(results, expected)
def test_lexsort_indexer(self):
@@ -6104,6 +6147,21 @@ def test_nunique_with_object(self):
expected = pd.Series([1] * 5, name='name', index=index)
tm.assert_series_equal(result, expected)
+ def test_transform_with_non_scalar_group(self):
+ # GH 10165
+ cols = pd.MultiIndex.from_tuples([
+ ('syn', 'A'), ('mis', 'A'), ('non', 'A'),
+ ('syn', 'C'), ('mis', 'C'), ('non', 'C'),
+ ('syn', 'T'), ('mis', 'T'), ('non', 'T'),
+ ('syn', 'G'), ('mis', 'G'), ('non', 'G')])
+ df = pd.DataFrame(np.random.randint(1, 10, (4, 12)),
+ columns=cols,
+ index=['A', 'C', 'G', 'T'])
+ self.assertRaisesRegexp(ValueError, 'transform must return a scalar '
+ 'value for each group.*', df.groupby
+ (axis=1, level=1).transform,
+ lambda z: z.div(z.sum(axis=1), axis=0))
+
def assert_fp_equal(a, b):
assert (np.abs(a - b) < 1e-12).all()
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index dd7468723c9c7..0a1e15921dad7 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -318,10 +318,10 @@ def test_keys(self):
def test_iteritems(self):
# Test panel.iteritems(), aka panel.iteritems()
# just test that it works
- for k, v in compat.iteritems(self.panel):
+ for k, v in self.panel.iteritems():
pass
- self.assertEqual(len(list(compat.iteritems(self.panel))),
+ self.assertEqual(len(list(self.panel.iteritems())),
len(self.panel.items))
@ignore_sparse_panel_future_warning
@@ -1105,7 +1105,7 @@ def test_ctor_dict(self):
assert_panel_equal(result, expected)
def test_constructor_dict_mixed(self):
- data = dict((k, v.values) for k, v in compat.iteritems(self.panel))
+ data = dict((k, v.values) for k, v in self.panel.iteritems())
result = Panel(data)
exp_major = Index(np.arange(len(self.panel.major_axis)))
self.assertTrue(result.major_axis.equals(exp_major))
@@ -1872,7 +1872,7 @@ def test_shift(self):
# negative numbers, #2164
result = self.panel.shift(-1)
expected = Panel(dict((i, f.shift(-1)[:-1])
- for i, f in compat.iteritems(self.panel)))
+ for i, f in self.panel.iteritems()))
assert_panel_equal(result, expected)
# mixed dtypes #6959
@@ -2072,7 +2072,7 @@ def test_to_excel(self):
except ImportError:
raise nose.SkipTest("need xlwt xlrd openpyxl")
- for item, df in compat.iteritems(self.panel):
+ for item, df in self.panel.iteritems():
recdf = reader.parse(str(item), index_col=0)
assert_frame_equal(df, recdf)
@@ -2092,7 +2092,7 @@ def test_to_excel_xlsxwriter(self):
except ImportError as e:
raise nose.SkipTest("cannot write excel file: %s" % e)
- for item, df in compat.iteritems(self.panel):
+ for item, df in self.panel.iteritems():
recdf = reader.parse(str(item), index_col=0)
assert_frame_equal(df, recdf)
diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py
index 6238f13864552..40447fffdebbd 100644
--- a/pandas/tests/test_panel4d.py
+++ b/pandas/tests/test_panel4d.py
@@ -12,7 +12,6 @@
from pandas.core.panel4d import Panel4D
from pandas.core.series import remove_na
import pandas.core.common as com
-from pandas import compat
from pandas.util.testing import (assert_panel_equal,
assert_panel4d_equal,
@@ -232,7 +231,7 @@ def test_keys(self):
def test_iteritems(self):
"""Test panel4d.iteritems()"""
- self.assertEqual(len(list(compat.iteritems(self.panel4d))),
+ self.assertEqual(len(list(self.panel4d.iteritems())),
len(self.panel4d.labels))
def test_combinePanel4d(self):
@@ -731,7 +730,7 @@ def test_ctor_dict(self):
# assert_panel_equal(result, expected)
def test_constructor_dict_mixed(self):
- data = dict((k, v.values) for k, v in compat.iteritems(self.panel4d))
+ data = dict((k, v.values) for k, v in self.panel4d.iteritems())
result = Panel4D(data)
exp_major = Index(np.arange(len(self.panel4d.major_axis)))
self.assertTrue(result.major_axis.equals(exp_major))
diff --git a/pandas/tests/test_style.py b/pandas/tests/test_style.py
index ef5a966d65545..bfabaab8ad2f5 100644
--- a/pandas/tests/test_style.py
+++ b/pandas/tests/test_style.py
@@ -17,9 +17,12 @@
if job_name == '27_slow_nnet_LOCALE':
raise SkipTest("No jinja")
try:
- from pandas.core.style import Styler
+ # Do try except on just jinja, so the only reason
+ # We skip is if jinja can't import, not something else
+ import jinja2 # noqa
except ImportError:
raise SkipTest("No Jinja2")
+from pandas.core.style import Styler # noqa
class TestStyler(TestCase):
@@ -129,6 +132,27 @@ def test_set_properties_subset(self):
expected = {(0, 0): ['color: white']}
self.assertEqual(result, expected)
+ def test_empty_index_name_doesnt_display(self):
+ # https://github.com/pydata/pandas/pull/12090#issuecomment-180695902
+ df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]})
+ result = df.style._translate()
+
+ expected = [[{'class': 'blank', 'type': 'th', 'value': ''},
+ {'class': 'col_heading level0 col0',
+ 'display_value': 'A',
+ 'type': 'th',
+ 'value': 'A'},
+ {'class': 'col_heading level0 col1',
+ 'display_value': 'B',
+ 'type': 'th',
+ 'value': 'B'},
+ {'class': 'col_heading level0 col2',
+ 'display_value': 'C',
+ 'type': 'th',
+ 'value': 'C'}]]
+
+ self.assertEqual(result['head'], expected)
+
def test_index_name(self):
# https://github.com/pydata/pandas/issues/11655
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]})
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index cc4a6ba61306d..d5647d1b5f822 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -3,6 +3,7 @@
import sys
import warnings
+from nose.tools import assert_raises
from datetime import datetime
from numpy.random import randn
from numpy.testing.decorators import slow
@@ -14,7 +15,7 @@
notnull, concat)
from pandas.util.testing import (assert_almost_equal, assert_series_equal,
assert_frame_equal, assert_panel_equal,
- assert_index_equal)
+ assert_index_equal, assert_numpy_array_equal)
import pandas.core.datetools as datetools
import pandas.stats.moments as mom
import pandas.core.window as rwindow
@@ -98,19 +99,6 @@ def tests_skip_nuisance(self):
result = r.sum()
assert_frame_equal(result, expected)
- def test_timedeltas(self):
-
- df = DataFrame({'A': range(5),
- 'B': pd.timedelta_range('1 day', periods=5)})
- r = df.rolling(window=3)
- result = r.sum()
- expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9],
- 'B': pd.to_timedelta([pd.NaT, pd.NaT,
- '6 days', '9 days',
- '12 days'])},
- columns=list('AB'))
- assert_frame_equal(result, expected)
-
def test_agg(self):
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
@@ -289,6 +277,218 @@ def test_deprecations(self):
mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0)
+# GH #12373 : rolling functions error on float32 data
+# make sure rolling functions works for different dtypes
+#
+# NOTE that these are yielded tests and so _create_data is
+# explicity called, nor do these inherit from unittest.TestCase
+#
+# further note that we are only checking rolling for fully dtype
+# compliance (though both expanding and ewm inherit)
+class Dtype(object):
+ window = 2
+
+ funcs = {
+ 'count': lambda v: v.count(),
+ 'max': lambda v: v.max(),
+ 'min': lambda v: v.min(),
+ 'sum': lambda v: v.sum(),
+ 'mean': lambda v: v.mean(),
+ 'std': lambda v: v.std(),
+ 'var': lambda v: v.var(),
+ 'median': lambda v: v.median()
+ }
+
+ def get_expects(self):
+ expects = {
+ 'sr1': {
+ 'count': Series([1, 2, 2, 2, 2], dtype='float64'),
+ 'max': Series([np.nan, 1, 2, 3, 4], dtype='float64'),
+ 'min': Series([np.nan, 0, 1, 2, 3], dtype='float64'),
+ 'sum': Series([np.nan, 1, 3, 5, 7], dtype='float64'),
+ 'mean': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64'),
+ 'std': Series([np.nan] + [np.sqrt(.5)] * 4, dtype='float64'),
+ 'var': Series([np.nan, .5, .5, .5, .5], dtype='float64'),
+ 'median': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64')
+ },
+ 'sr2': {
+ 'count': Series([1, 2, 2, 2, 2], dtype='float64'),
+ 'max': Series([np.nan, 10, 8, 6, 4], dtype='float64'),
+ 'min': Series([np.nan, 8, 6, 4, 2], dtype='float64'),
+ 'sum': Series([np.nan, 18, 14, 10, 6], dtype='float64'),
+ 'mean': Series([np.nan, 9, 7, 5, 3], dtype='float64'),
+ 'std': Series([np.nan] + [np.sqrt(2)] * 4, dtype='float64'),
+ 'var': Series([np.nan, 2, 2, 2, 2], dtype='float64'),
+ 'median': Series([np.nan, 9, 7, 5, 3], dtype='float64')
+ },
+ 'df': {
+ 'count': DataFrame({0: Series([1, 2, 2, 2, 2]),
+ 1: Series([1, 2, 2, 2, 2])},
+ dtype='float64'),
+ 'max': DataFrame({0: Series([np.nan, 2, 4, 6, 8]),
+ 1: Series([np.nan, 3, 5, 7, 9])},
+ dtype='float64'),
+ 'min': DataFrame({0: Series([np.nan, 0, 2, 4, 6]),
+ 1: Series([np.nan, 1, 3, 5, 7])},
+ dtype='float64'),
+ 'sum': DataFrame({0: Series([np.nan, 2, 6, 10, 14]),
+ 1: Series([np.nan, 4, 8, 12, 16])},
+ dtype='float64'),
+ 'mean': DataFrame({0: Series([np.nan, 1, 3, 5, 7]),
+ 1: Series([np.nan, 2, 4, 6, 8])},
+ dtype='float64'),
+ 'std': DataFrame({0: Series([np.nan] + [np.sqrt(2)] * 4),
+ 1: Series([np.nan] + [np.sqrt(2)] * 4)},
+ dtype='float64'),
+ 'var': DataFrame({0: Series([np.nan, 2, 2, 2, 2]),
+ 1: Series([np.nan, 2, 2, 2, 2])},
+ dtype='float64'),
+ 'median': DataFrame({0: Series([np.nan, 1, 3, 5, 7]),
+ 1: Series([np.nan, 2, 4, 6, 8])},
+ dtype='float64'),
+ }
+ }
+ return expects
+
+ def _create_dtype_data(self, dtype):
+ sr1 = Series(range(5), dtype=dtype)
+ sr2 = Series(range(10, 0, -2), dtype=dtype)
+ df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)
+
+ data = {
+ 'sr1': sr1,
+ 'sr2': sr2,
+ 'df': df
+ }
+
+ return data
+
+ def _create_data(self):
+ self.data = self._create_dtype_data(self.dtype)
+ self.expects = self.get_expects()
+
+ def test_dtypes(self):
+ self._create_data()
+ for f_name, d_name in product(self.funcs.keys(), self.data.keys()):
+ f = self.funcs[f_name]
+ d = self.data[d_name]
+ exp = self.expects[d_name][f_name]
+ yield self.check_dtypes, f, f_name, d, d_name, exp
+
+ def check_dtypes(self, f, f_name, d, d_name, exp):
+ roll = d.rolling(window=self.window)
+ result = f(roll)
+ assert_almost_equal(result, exp)
+
+
+class TestDtype_object(Dtype):
+ dtype = object
+
+
+class Dtype_integer(Dtype):
+ pass
+
+
+class TestDtype_int8(Dtype_integer):
+ dtype = np.int8
+
+
+class TestDtype_int16(Dtype_integer):
+ dtype = np.int16
+
+
+class TestDtype_int32(Dtype_integer):
+ dtype = np.int32
+
+
+class TestDtype_int64(Dtype_integer):
+ dtype = np.int64
+
+
+class Dtype_uinteger(Dtype):
+ pass
+
+
+class TestDtype_uint8(Dtype_uinteger):
+ dtype = np.uint8
+
+
+class TestDtype_uint16(Dtype_uinteger):
+ dtype = np.uint16
+
+
+class TestDtype_uint32(Dtype_uinteger):
+ dtype = np.uint32
+
+
+class TestDtype_uint64(Dtype_uinteger):
+ dtype = np.uint64
+
+
+class Dtype_float(Dtype):
+ pass
+
+
+class TestDtype_float16(Dtype_float):
+ dtype = np.float16
+
+
+class TestDtype_float32(Dtype_float):
+ dtype = np.float32
+
+
+class TestDtype_float64(Dtype_float):
+ dtype = np.float64
+
+
+class TestDtype_category(Dtype):
+ dtype = 'category'
+ include_df = False
+
+ def _create_dtype_data(self, dtype):
+ sr1 = Series(range(5), dtype=dtype)
+ sr2 = Series(range(10, 0, -2), dtype=dtype)
+
+ data = {
+ 'sr1': sr1,
+ 'sr2': sr2
+ }
+
+ return data
+
+
+class DatetimeLike(Dtype):
+
+ def check_dtypes(self, f, f_name, d, d_name, exp):
+
+ roll = d.rolling(window=self.window)
+
+ if f_name == 'count':
+ result = f(roll)
+ assert_almost_equal(result, exp)
+
+ else:
+
+ # other methods not Implemented ATM
+ assert_raises(NotImplementedError, f, roll)
+
+
+class TestDtype_timedelta(DatetimeLike):
+ dtype = np.dtype('m8[ns]')
+
+
+class TestDtype_datetime(DatetimeLike):
+ dtype = np.dtype('M8[ns]')
+
+
+class TestDtype_datetime64UTC(DatetimeLike):
+ dtype = 'datetime64[ns, UTC]'
+
+ def _create_data(self):
+ raise nose.SkipTest("direct creation of extension dtype "
+ "datetime64[ns, UTC] is not supported ATM")
+
+
class TestMoments(Base):
def setUp(self):
@@ -1049,8 +1249,8 @@ def test_ewma_span_com_args(self):
B = mom.ewma(self.arr, span=20)
assert_almost_equal(A, B)
- self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
- self.assertRaises(Exception, mom.ewma, self.arr)
+ self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20)
+ self.assertRaises(ValueError, mom.ewma, self.arr)
def test_ewma_halflife_arg(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
@@ -1058,13 +1258,78 @@ def test_ewma_halflife_arg(self):
B = mom.ewma(self.arr, halflife=10.0)
assert_almost_equal(A, B)
- self.assertRaises(Exception, mom.ewma, self.arr, span=20,
+ self.assertRaises(ValueError, mom.ewma, self.arr, span=20,
halflife=50)
- self.assertRaises(Exception, mom.ewma, self.arr, com=9.5,
+ self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5,
halflife=50)
- self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20,
+ self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20,
halflife=50)
- self.assertRaises(Exception, mom.ewma, self.arr)
+ self.assertRaises(ValueError, mom.ewma, self.arr)
+
+ def test_ewma_alpha_old_api(self):
+ # GH 10789
+ with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+ a = mom.ewma(self.arr, alpha=0.61722699889169674)
+ b = mom.ewma(self.arr, com=0.62014947789973052)
+ c = mom.ewma(self.arr, span=2.240298955799461)
+ d = mom.ewma(self.arr, halflife=0.721792864318)
+ assert_numpy_array_equal(a, b)
+ assert_numpy_array_equal(a, c)
+ assert_numpy_array_equal(a, d)
+
+ def test_ewma_alpha_arg_old_api(self):
+ # GH 10789
+ with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+ self.assertRaises(ValueError, mom.ewma, self.arr)
+ self.assertRaises(ValueError, mom.ewma, self.arr,
+ com=10.0, alpha=0.5)
+ self.assertRaises(ValueError, mom.ewma, self.arr,
+ span=10.0, alpha=0.5)
+ self.assertRaises(ValueError, mom.ewma, self.arr,
+ halflife=10.0, alpha=0.5)
+
+ def test_ewm_alpha(self):
+ # GH 10789
+ s = Series(self.arr)
+ a = s.ewm(alpha=0.61722699889169674).mean()
+ b = s.ewm(com=0.62014947789973052).mean()
+ c = s.ewm(span=2.240298955799461).mean()
+ d = s.ewm(halflife=0.721792864318).mean()
+ assert_series_equal(a, b)
+ assert_series_equal(a, c)
+ assert_series_equal(a, d)
+
+ def test_ewm_alpha_arg(self):
+ # GH 10789
+ s = Series(self.arr)
+ self.assertRaises(ValueError, s.ewm)
+ self.assertRaises(ValueError, s.ewm, com=10.0, alpha=0.5)
+ self.assertRaises(ValueError, s.ewm, span=10.0, alpha=0.5)
+ self.assertRaises(ValueError, s.ewm, halflife=10.0, alpha=0.5)
+
+ def test_ewm_domain_checks(self):
+ # GH 12492
+ s = Series(self.arr)
+ # com must satisfy: com >= 0
+ self.assertRaises(ValueError, s.ewm, com=-0.1)
+ s.ewm(com=0.0)
+ s.ewm(com=0.1)
+ # span must satisfy: span >= 1
+ self.assertRaises(ValueError, s.ewm, span=-0.1)
+ self.assertRaises(ValueError, s.ewm, span=0.0)
+ self.assertRaises(ValueError, s.ewm, span=0.9)
+ s.ewm(span=1.0)
+ s.ewm(span=1.1)
+ # halflife must satisfy: halflife > 0
+ self.assertRaises(ValueError, s.ewm, halflife=-0.1)
+ self.assertRaises(ValueError, s.ewm, halflife=0.0)
+ s.ewm(halflife=0.1)
+ # alpha must satisfy: 0 < alpha <= 1
+ self.assertRaises(ValueError, s.ewm, alpha=-0.1)
+ self.assertRaises(ValueError, s.ewm, alpha=0.0)
+ s.ewm(alpha=0.1)
+ s.ewm(alpha=1.0)
+ self.assertRaises(ValueError, s.ewm, alpha=1.1)
def test_ew_empty_arrays(self):
arr = np.array([], dtype=np.float64)
@@ -2432,3 +2697,24 @@ def test_rolling_median_memory_error(self):
n = 20000
Series(np.random.randn(n)).rolling(window=2, center=False).median()
Series(np.random.randn(n)).rolling(window=2, center=False).median()
+
+ def test_rolling_min_max_numeric_types(self):
+ # GH12373
+ types_test = [np.dtype("f{}".format(width)) for width in [4, 8]]
+ types_test.extend([np.dtype("{}{}".format(sign, width))
+ for width in [1, 2, 4, 8] for sign in "ui"])
+ for data_type in types_test:
+ # Just testing that these don't throw exceptions and that
+ # the types match. Other tests will cover quantitative
+ # correctness
+ for convert_to_float in [True]:
+ if not convert_to_float:
+ expected_type = data_type
+ else:
+ expected_type = np.dtype(float)
+ result = (DataFrame(np.arange(20, dtype=data_type))
+ .rolling(window=5).max(as_float=convert_to_float))
+ self.assertEqual(result.dtypes[0], expected_type)
+ result = (DataFrame(np.arange(20, dtype=data_type))
+ .rolling(window=5).min(as_float=convert_to_float))
+ self.assertEqual(result.dtypes[0], expected_type)
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 046d2322165b5..d5ddfe624e240 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -236,27 +236,27 @@ def test_join_on(self):
def test_join_on_fails_with_different_right_index(self):
with tm.assertRaises(ValueError):
- df = DataFrame({'a': tm.choice(['m', 'f'], size=3),
+ df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
'b': np.random.randn(3)})
- df2 = DataFrame({'a': tm.choice(['m', 'f'], size=10),
+ df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
'b': np.random.randn(10)},
index=tm.makeCustomIndex(10, 2))
merge(df, df2, left_on='a', right_index=True)
def test_join_on_fails_with_different_left_index(self):
with tm.assertRaises(ValueError):
- df = DataFrame({'a': tm.choice(['m', 'f'], size=3),
+ df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
'b': np.random.randn(3)},
index=tm.makeCustomIndex(10, 2))
- df2 = DataFrame({'a': tm.choice(['m', 'f'], size=10),
+ df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
'b': np.random.randn(10)})
merge(df, df2, right_on='b', left_index=True)
def test_join_on_fails_with_different_column_counts(self):
with tm.assertRaises(ValueError):
- df = DataFrame({'a': tm.choice(['m', 'f'], size=3),
+ df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
'b': np.random.randn(3)})
- df2 = DataFrame({'a': tm.choice(['m', 'f'], size=10),
+ df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
'b': np.random.randn(10)},
index=tm.makeCustomIndex(10, 2))
merge(df, df2, right_on='a', left_on=['a', 'b'])
@@ -1031,6 +1031,36 @@ def test_merge_on_datetime64tz(self):
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)
+ def test_merge_on_periods(self):
+ left = pd.DataFrame({'key': pd.period_range('20151010', periods=2,
+ freq='D'),
+ 'value': [1, 2]})
+ right = pd.DataFrame({'key': pd.period_range('20151011', periods=3,
+ freq='D'),
+ 'value': [1, 2, 3]})
+
+ expected = DataFrame({'key': pd.period_range('20151010', periods=4,
+ freq='D'),
+ 'value_x': [1, 2, np.nan, np.nan],
+ 'value_y': [np.nan, 1, 2, 3]})
+ result = pd.merge(left, right, on='key', how='outer')
+ assert_frame_equal(result, expected)
+
+ left = pd.DataFrame({'value': pd.period_range('20151010', periods=2,
+ freq='D'),
+ 'key': [1, 2]})
+ right = pd.DataFrame({'value': pd.period_range('20151011', periods=2,
+ freq='D'),
+ 'key': [2, 3]})
+
+ exp_x = pd.period_range('20151010', periods=2, freq='D')
+ exp_y = pd.period_range('20151011', periods=2, freq='D')
+ expected = DataFrame({'value_x': list(exp_x) + [pd.NaT],
+ 'value_y': [pd.NaT] + list(exp_y),
+ 'key': [1., 2, 3]})
+ result = pd.merge(left, right, on='key', how='outer')
+ assert_frame_equal(result, expected)
+
def test_concat_NaT_series(self):
# GH 11693
# test for merging NaT series with datetime series.
@@ -1131,6 +1161,39 @@ def test_concat_tz_series(self):
result = pd.concat([first, second])
self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]')
+ def test_concat_period_series(self):
+ x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
+ y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
+ expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
+ result = concat([x, y], ignore_index=True)
+ tm.assert_series_equal(result, expected)
+
+ # different freq
+ x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
+ y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M'))
+ expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
+ result = concat([x, y], ignore_index=True)
+ tm.assert_series_equal(result, expected)
+
+ x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
+ y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M'))
+ expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
+ result = concat([x, y], ignore_index=True)
+ tm.assert_series_equal(result, expected)
+
+ # non-period
+ x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
+ y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01']))
+ expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
+ result = concat([x, y], ignore_index=True)
+ tm.assert_series_equal(result, expected)
+
+ x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
+ y = Series(['A', 'B'])
+ expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
+ result = concat([x, y], ignore_index=True)
+ tm.assert_series_equal(result, expected)
+
def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
df1 = DataFrame({'col1': [0, 1], 'col_left': [
@@ -2671,7 +2734,7 @@ def test_panel_join_many(self):
data_dict = {}
for p in panels:
- data_dict.update(compat.iteritems(p))
+ data_dict.update(p.iteritems())
joined = panels[0].join(panels[1:], how='inner')
expected = Panel.from_dict(data_dict, intersect=True)
diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py
index 845f50aa65d70..994269d36cd85 100644
--- a/pandas/tools/tests/test_pivot.py
+++ b/pandas/tools/tests/test_pivot.py
@@ -240,6 +240,39 @@ def test_pivot_with_tz(self):
pv = df.pivot(index='dt1', columns='dt2', values='data1')
tm.assert_frame_equal(pv, expected)
+ def test_pivot_periods(self):
+ df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
+ pd.Period('2013-01-02', 'D'),
+ pd.Period('2013-01-01', 'D'),
+ pd.Period('2013-01-02', 'D')],
+ 'p2': [pd.Period('2013-01', 'M'),
+ pd.Period('2013-01', 'M'),
+ pd.Period('2013-02', 'M'),
+ pd.Period('2013-02', 'M')],
+ 'data1': np.arange(4, dtype='int64'),
+ 'data2': np.arange(4, dtype='int64')})
+
+ exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
+ exp_col2 = pd.PeriodIndex(['2013-01', '2013-02'] * 2,
+ name='p2', freq='M')
+ exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
+ expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
+ index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
+ name='p1', freq='D'),
+ columns=exp_col)
+
+ pv = df.pivot(index='p1', columns='p2')
+ tm.assert_frame_equal(pv, expected)
+
+ expected = DataFrame([[0, 2], [1, 3]],
+ index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
+ name='p1', freq='D'),
+ columns=pd.PeriodIndex(['2013-01', '2013-02'],
+ name='p2', freq='M'))
+
+ pv = df.pivot(index='p1', columns='p2', values='data1')
+ tm.assert_frame_equal(pv, expected)
+
def test_margins(self):
def _check_output(result, values_col, index=['A', 'B'],
columns=['C'],
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index 62a7ad078da70..7584b99dbdb97 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -453,12 +453,20 @@ def _convert_scalar_indexer(self, key, kind=None):
Parameters
----------
key : label of the slice bound
- kind : optional, type of the indexing operation (loc/ix/iloc/None)
+ kind : {'ix', 'loc', 'getitem', 'iloc'} or None
"""
- if (kind in ['loc'] and lib.isscalar(key) and
- (is_integer(key) or is_float(key))):
- self._invalid_indexer('index', key)
+ assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
+
+ # we don't allow integer/float indexing for loc
+ # we don't allow float indexing for ix/getitem
+ if lib.isscalar(key):
+ is_int = is_integer(key)
+ is_flt = is_float(key)
+ if kind in ['loc'] and (is_int or is_flt):
+ self._invalid_indexer('index', key)
+ elif kind in ['ix', 'getitem'] and is_flt:
+ self._invalid_indexer('index', key)
return (super(DatetimeIndexOpsMixin, self)
._convert_scalar_indexer(key, kind=kind))
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index c745f1b2eddf9..b3b43e1a5babb 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1443,7 +1443,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
----------
label : object
side : {'left', 'right'}
- kind : string / None
+ kind : {'ix', 'loc', 'getitem'}
Returns
-------
@@ -1454,6 +1454,8 @@ def _maybe_cast_slice_bound(self, label, side, kind):
Value of `side` parameter should be validated in caller.
"""
+ assert kind in ['ix', 'loc', 'getitem', None]
+
if is_float(label) or isinstance(label, time) or is_integer(label):
self._invalid_indexer('slice', label)
@@ -1500,7 +1502,7 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
raise KeyError('Cannot mix time and non-time slice keys')
try:
- return Index.slice_indexer(self, start, end, step)
+ return Index.slice_indexer(self, start, end, step, kind=kind)
except KeyError:
# For historical reasons DatetimeIndex by default supports
# value-based partial (aka string) slices on non-monotonic arrays,
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index f34936f9c7b82..df04984bcb582 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -156,7 +156,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
_datetimelike_ops = ['year', 'month', 'day', 'hour', 'minute', 'second',
'weekofyear', 'week', 'dayofweek', 'weekday',
'dayofyear', 'quarter', 'qyear', 'freq',
- 'days_in_month', 'daysinmonth']
+ 'days_in_month', 'daysinmonth',
+ 'to_timestamp', 'asfreq', 'start_time', 'end_time']
_is_numeric_dtype = False
_infer_as_myclass = True
@@ -498,6 +499,14 @@ def to_datetime(self, dayfirst=False):
'days_in_month', 11, "The number of days in the month")
daysinmonth = days_in_month
+ @property
+ def start_time(self):
+ return self.to_timestamp(how='start')
+
+ @property
+ def end_time(self):
+ return self.to_timestamp(how='end')
+
def _get_object_array(self):
freq = self.freq
return np.array([Period._from_ordinal(ordinal=x, freq=freq)
@@ -683,7 +692,7 @@ def get_loc(self, key, method=None, tolerance=None):
except ValueError:
# we cannot construct the Period
# as we have an invalid type
- return self._invalid_indexer('label', key)
+ raise KeyError(key)
try:
return Index.get_loc(self, key.ordinal, method, tolerance)
except KeyError:
@@ -698,7 +707,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
----------
label : object
side : {'left', 'right'}
- kind : string / None
+ kind : {'ix', 'loc', 'getitem'}
Returns
-------
@@ -709,6 +718,8 @@ def _maybe_cast_slice_bound(self, label, side, kind):
Value of `side` parameter should be validated in caller.
"""
+ assert kind in ['ix', 'loc', 'getitem']
+
if isinstance(label, datetime):
return Period(label, freq=self.freq)
elif isinstance(label, compat.string_types):
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index ba2eb3463d169..0ac10eb4fa15b 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -102,7 +102,7 @@ def _typ(self):
def _deprecated(self):
warnings.warn(".resample() is now a deferred operation\n"
"use .resample(...).mean() instead of .resample(...)",
- FutureWarning, stacklevel=2)
+ FutureWarning, stacklevel=3)
return self.mean()
def _make_deprecated_binop(op):
@@ -154,9 +154,7 @@ def __getattr__(self, attr):
if attr in self._deprecated_invalids:
raise ValueError(".resample() is now a deferred operation\n"
"\tuse .resample(...).mean() instead of "
- ".resample(...)\n"
- "\tassignment will have no effect as you "
- "are working on a copy")
+ ".resample(...)")
if attr not in self._deprecated_valids:
self = self._deprecated()
return object.__getattribute__(self, attr)
@@ -167,6 +165,17 @@ def __setattr__(self, attr, value):
self.__class__.__name__))
object.__setattr__(self, attr, value)
+ def __getitem__(self, key):
+ try:
+ return super(Resampler, self).__getitem__(key)
+ except (KeyError, com.AbstractMethodError):
+
+ # compat for deprecated
+ if isinstance(self.obj, com.ABCSeries):
+ return self._deprecated()[key]
+
+ raise
+
def __setitem__(self, attr, value):
raise ValueError("cannot set items on {0}".format(
self.__class__.__name__))
@@ -208,6 +217,11 @@ def _assure_grouper(self):
""" make sure that we are creating our binner & grouper """
self._set_binner()
+ def plot(self, *args, **kwargs):
+ # for compat with prior versions, we want to
+ # have the warnings shown here and just have this work
+ return self._deprecated().plot(*args, **kwargs)
+
def aggregate(self, arg, *args, **kwargs):
"""
Apply aggregation function or functions to resampled groups, yielding
@@ -400,6 +414,8 @@ def backfill(self, limit=None):
def fillna(self, method, limit=None):
"""
+ Fill missing values
+
Parameters
----------
method : str, method of resampling ('ffill', 'bfill')
@@ -468,6 +484,52 @@ def f(self, _method=method):
setattr(Resampler, method, f)
+def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None):
+ """ potentially we might have a deprecation warning, show it
+ but call the appropriate methods anyhow """
+
+ if how is not None:
+
+ # .resample(..., how='sum')
+ if isinstance(how, compat.string_types):
+ method = "{0}()".format(how)
+
+ # .resample(..., how=lambda x: ....)
+ else:
+ method = ".apply()"
+
+ # if we have both a how and fill_method, then show
+ # the following warning
+ if fill_method is None:
+ warnings.warn("how in .resample() is deprecated\n"
+ "the new syntax is "
+ ".resample(...).{method}".format(
+ method=method),
+ FutureWarning, stacklevel=3)
+ r = r.aggregate(how)
+
+ if fill_method is not None:
+
+ # show the prior function call
+ method = '.' + method if how is not None else ''
+
+ args = "limit={0}".format(limit) if limit is not None else ""
+ warnings.warn("fill_method is deprecated to .resample()\n"
+ "the new syntax is .resample(...){method}"
+ ".{fill_method}({args})".format(
+ method=method,
+ fill_method=fill_method,
+ args=args),
+ FutureWarning, stacklevel=3)
+
+ if how is not None:
+ r = getattr(r, fill_method)(limit=limit)
+ else:
+ r = r.aggregate(fill_method, limit=limit)
+
+ return r
+
+
class DatetimeIndexResampler(Resampler):
def _get_binner_for_time(self):
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
index 9759d13fe4632..bea2aeb508358 100644
--- a/pandas/tseries/tdi.py
+++ b/pandas/tseries/tdi.py
@@ -710,13 +710,15 @@ def _maybe_cast_slice_bound(self, label, side, kind):
----------
label : object
side : {'left', 'right'}
- kind : string / None
+ kind : {'ix', 'loc', 'getitem'}
Returns
-------
label : object
"""
+ assert kind in ['ix', 'loc', 'getitem', None]
+
if isinstance(label, compat.string_types):
parsed = _coerce_scalar_to_timedelta_type(label, box=True)
lbound = parsed.round(parsed.resolution)
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 8a876272dfdef..95d84bba4b5db 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -2030,6 +2030,16 @@ def test_to_timestamp_pi_mult(self):
['2011-02-28', 'NaT', '2011-03-31'], name='idx')
self.assert_index_equal(result, expected)
+ def test_start_time(self):
+ index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31')
+ expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS')
+ self.assertTrue(index.start_time.equals(expected_index))
+
+ def test_end_time(self):
+ index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31')
+ expected_index = date_range('2016-01-01', end='2016-05-31', freq='M')
+ self.assertTrue(index.end_time.equals(expected_index))
+
def test_as_frame_columns(self):
rng = period_range('1/1/2000', periods=5)
df = DataFrame(randn(10, 5), columns=rng)
@@ -2867,6 +2877,17 @@ def test_union(self):
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
self.assertRaises(ValueError, index.join, index3)
+ def test_union_dataframe_index(self):
+ rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M')
+ s1 = pd.Series(np.random.randn(len(rng1)), rng1)
+
+ rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M')
+ s2 = pd.Series(np.random.randn(len(rng2)), rng2)
+ df = pd.DataFrame({'s1': s1, 's2': s2})
+
+ exp = pd.period_range('1/1/1980', '1/1/2012', freq='M')
+ self.assert_index_equal(df.index, exp)
+
def test_intersection(self):
index = period_range('1/1/2000', '1/20/2000', freq='D')
@@ -2887,6 +2908,63 @@ def test_intersection(self):
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
self.assertRaises(ValueError, index.intersection, index3)
+ def test_intersection_cases(self):
+ base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx')
+
+ # if target has the same name, it is preserved
+ rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx')
+ expected2 = period_range('6/1/2000', '6/20/2000', freq='D',
+ name='idx')
+
+ # if target name is different, it will be reset
+ rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other')
+ expected3 = period_range('6/1/2000', '6/20/2000', freq='D',
+ name=None)
+
+ rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
+ expected4 = PeriodIndex([], name='idx', freq='D')
+
+ for (rng, expected) in [(rng2, expected2), (rng3, expected3),
+ (rng4, expected4)]:
+ result = base.intersection(rng)
+ self.assertTrue(result.equals(expected))
+ self.assertEqual(result.name, expected.name)
+ self.assertEqual(result.freq, expected.freq)
+
+ # non-monotonic
+ base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02',
+ '2011-01-03'], freq='D', name='idx')
+
+ rng2 = PeriodIndex(['2011-01-04', '2011-01-02',
+ '2011-02-02', '2011-02-03'],
+ freq='D', name='idx')
+ expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
+ name='idx')
+
+ rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02',
+ '2011-02-03'],
+ freq='D', name='other')
+ expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
+ name=None)
+
+ rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
+ expected4 = PeriodIndex([], freq='D', name='idx')
+
+ for (rng, expected) in [(rng2, expected2), (rng3, expected3),
+ (rng4, expected4)]:
+ result = base.intersection(rng)
+ self.assertTrue(result.equals(expected))
+ self.assertEqual(result.name, expected.name)
+ self.assertEqual(result.freq, 'D')
+
+ # empty same freq
+ rng = date_range('6/1/2000', '6/15/2000', freq='T')
+ result = rng[0:0].intersection(rng)
+ self.assertEqual(len(result), 0)
+
+ result = rng.intersection(rng[0:0])
+ self.assertEqual(len(result), 0)
+
def test_fields(self):
# year, month, day, hour, minute
# second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
@@ -3724,6 +3802,86 @@ def test_pi_nat_comp(self):
idx1 == diff
+class TestSeriesPeriod(tm.TestCase):
+
+ def setUp(self):
+ self.series = Series(period_range('2000-01-01', periods=10, freq='D'))
+
+ def test_auto_conversion(self):
+ series = Series(list(period_range('2000-01-01', periods=10, freq='D')))
+ self.assertEqual(series.dtype, 'object')
+
+ def test_constructor_cant_cast_period(self):
+ with tm.assertRaises(TypeError):
+ Series(period_range('2000-01-01', periods=10, freq='D'),
+ dtype=float)
+
+ def test_series_comparison_scalars(self):
+ val = pd.Period('2000-01-04', freq='D')
+ result = self.series > val
+ expected = np.array([x > val for x in self.series])
+ self.assert_numpy_array_equal(result, expected)
+
+ val = self.series[5]
+ result = self.series > val
+ expected = np.array([x > val for x in self.series])
+ self.assert_numpy_array_equal(result, expected)
+
+ def test_between(self):
+ left, right = self.series[[2, 7]]
+ result = self.series.between(left, right)
+ expected = (self.series >= left) & (self.series <= right)
+ assert_series_equal(result, expected)
+
+ # ---------------------------------------------------------------------
+ # NaT support
+
+ """
+ # ToDo: Enable when support period dtype
+ def test_NaT_scalar(self):
+ series = Series([0, 1000, 2000, iNaT], dtype='period[D]')
+
+ val = series[3]
+ self.assertTrue(com.isnull(val))
+
+ series[2] = val
+ self.assertTrue(com.isnull(series[2]))
+
+ def test_NaT_cast(self):
+ result = Series([np.nan]).astype('period[D]')
+ expected = Series([NaT])
+ assert_series_equal(result, expected)
+ """
+
+ def test_set_none_nan(self):
+ # currently Period is stored as object dtype, not as NaT
+ self.series[3] = None
+ self.assertIs(self.series[3], None)
+
+ self.series[3:5] = None
+ self.assertIs(self.series[4], None)
+
+ self.series[5] = np.nan
+ self.assertTrue(np.isnan(self.series[5]))
+
+ self.series[5:7] = np.nan
+ self.assertTrue(np.isnan(self.series[6]))
+
+ def test_intercept_astype_object(self):
+ expected = self.series.astype('object')
+
+ df = DataFrame({'a': self.series,
+ 'b': np.random.randn(len(self.series))})
+
+ result = df.values.squeeze()
+ self.assertTrue((result[:, 0] == expected.values).all())
+
+ df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)})
+
+ result = df.values.squeeze()
+ self.assertTrue((result[:, 0] == expected.values).all())
+
+
if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index b0e315ead2acb..4ddfc6ac573e4 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -151,6 +151,74 @@ def f():
check_stacklevel=False):
self.assertIsInstance(getattr(r, op)(2), pd.Series)
+ # getitem compat
+ df = self.series.to_frame('foo')
+
+ # same as prior versions for DataFrame
+ self.assertRaises(KeyError, lambda: df.resample('H')[0])
+
+ # compat for Series
+ # but we cannot be sure that we need a warning here
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
+ result = self.series.resample('H')[0]
+ expected = self.series.resample('H').mean()[0]
+ self.assertEqual(result, expected)
+
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
+ result = self.series.resample('H')['2005-01-09 23:00:00']
+ expected = self.series.resample('H').mean()['2005-01-09 23:00:00']
+ self.assertEqual(result, expected)
+
+ def test_groupby_resample_api(self):
+
+ # GH 12448
+ # .groupby(...).resample(...) hitting warnings
+ # when appropriate
+ df = DataFrame({'date': pd.date_range(start='2016-01-01',
+ periods=4,
+ freq='W'),
+ 'group': [1, 1, 2, 2],
+ 'val': [5, 6, 7, 8]}).set_index('date')
+
+ # replication step
+ i = pd.date_range('2016-01-03', periods=8).tolist() + \
+ pd.date_range('2016-01-17', periods=8).tolist()
+ index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i],
+ names=['group', 'date'])
+ expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]},
+ index=index)
+ result = df.groupby('group').apply(
+ lambda x: x.resample('1D').ffill())[['val']]
+ assert_frame_equal(result, expected)
+
+ # deferred operations are currently disabled
+ # GH 12486
+ #
+ # with tm.assert_produces_warning(FutureWarning,
+ # check_stacklevel=False):
+ # result = df.groupby('group').resample('1D').ffill()
+ # assert_frame_equal(result, expected)
+
+ def test_plot_api(self):
+ tm._skip_if_no_mpl()
+
+ # .resample(....).plot(...)
+ # hitting warnings
+ # GH 12448
+ s = Series(np.random.randn(60),
+ index=date_range('2016-01-01', periods=60, freq='1min'))
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
+ result = s.resample('15min').plot()
+ tm.assert_is_valid_plot_return_object(result)
+
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
+ result = s.resample('15min', how='sum').plot()
+ tm.assert_is_valid_plot_return_object(result)
+
def test_getitem(self):
r = self.frame.resample('H')
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index b83c51b6a3ab6..6167edd9499ab 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -1325,6 +1325,47 @@ def test_date_range_negative_freq(self):
self.assert_index_equal(rng, exp)
self.assertEqual(rng.freq, '-2M')
+ def test_date_range_bms_bug(self):
+ # #1645
+ rng = date_range('1/1/2000', periods=10, freq='BMS')
+
+ ex_first = Timestamp('2000-01-03')
+ self.assertEqual(rng[0], ex_first)
+
+ def test_date_range_businesshour(self):
+ idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
+ '2014-07-04 11:00',
+ '2014-07-04 12:00', '2014-07-04 13:00',
+ '2014-07-04 14:00',
+ '2014-07-04 15:00', '2014-07-04 16:00'],
+ freq='BH')
+ rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH')
+ tm.assert_index_equal(idx, rng)
+
+ idx = DatetimeIndex(
+ ['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH')
+ rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH')
+ tm.assert_index_equal(idx, rng)
+
+ idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
+ '2014-07-04 11:00',
+ '2014-07-04 12:00', '2014-07-04 13:00',
+ '2014-07-04 14:00',
+ '2014-07-04 15:00', '2014-07-04 16:00',
+ '2014-07-07 09:00', '2014-07-07 10:00',
+ '2014-07-07 11:00',
+ '2014-07-07 12:00', '2014-07-07 13:00',
+ '2014-07-07 14:00',
+ '2014-07-07 15:00', '2014-07-07 16:00',
+ '2014-07-08 09:00', '2014-07-08 10:00',
+ '2014-07-08 11:00',
+ '2014-07-08 12:00', '2014-07-08 13:00',
+ '2014-07-08 14:00',
+ '2014-07-08 15:00', '2014-07-08 16:00'],
+ freq='BH')
+ rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH')
+ tm.assert_index_equal(idx, rng)
+
def test_first_subset(self):
ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h')
result = ts.first('10d')
@@ -2716,6 +2757,26 @@ def test_union_bug_4564(self):
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
self.assertTrue(result.equals(exp))
+ def test_union_freq_both_none(self):
+ # GH11086
+ expected = bdate_range('20150101', periods=10)
+ expected.freq = None
+
+ result = expected.union(expected)
+ tm.assert_index_equal(result, expected)
+ self.assertIsNone(result.freq)
+
+ def test_union_dataframe_index(self):
+ rng1 = date_range('1/1/1999', '1/1/2012', freq='MS')
+ s1 = Series(np.random.randn(len(rng1)), rng1)
+
+ rng2 = date_range('1/1/1980', '12/1/2001', freq='MS')
+ s2 = Series(np.random.randn(len(rng2)), rng2)
+ df = DataFrame({'s1': s1, 's2': s2})
+
+ exp = pd.date_range('1/1/1980', '1/1/2012', freq='MS')
+ self.assert_index_equal(df.index, exp)
+
def test_intersection_bug_1708(self):
from pandas import DateOffset
index_1 = date_range('1/1/2012', periods=4, freq='12H')
@@ -2724,14 +2785,80 @@ def test_intersection_bug_1708(self):
result = index_1 & index_2
self.assertEqual(len(result), 0)
- def test_union_freq_both_none(self):
- # GH11086
- expected = bdate_range('20150101', periods=10)
- expected.freq = None
+ def test_intersection(self):
+ # GH 4690 (with tz)
+ for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']:
+ base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx')
- result = expected.union(expected)
- tm.assert_index_equal(result, expected)
- self.assertIsNone(result.freq)
+ # if target has the same name, it is preserved
+ rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx')
+ expected2 = date_range('6/1/2000', '6/20/2000', freq='D',
+ name='idx')
+
+ # if target name is different, it will be reset
+ rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other')
+ expected3 = date_range('6/1/2000', '6/20/2000', freq='D',
+ name=None)
+
+ rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx')
+ expected4 = DatetimeIndex([], name='idx')
+
+ for (rng, expected) in [(rng2, expected2), (rng3, expected3),
+ (rng4, expected4)]:
+ result = base.intersection(rng)
+ self.assertTrue(result.equals(expected))
+ self.assertEqual(result.name, expected.name)
+ self.assertEqual(result.freq, expected.freq)
+ self.assertEqual(result.tz, expected.tz)
+
+ # non-monotonic
+ base = DatetimeIndex(['2011-01-05', '2011-01-04',
+ '2011-01-02', '2011-01-03'],
+ tz=tz, name='idx')
+
+ rng2 = DatetimeIndex(['2011-01-04', '2011-01-02',
+ '2011-02-02', '2011-02-03'],
+ tz=tz, name='idx')
+ expected2 = DatetimeIndex(
+ ['2011-01-04', '2011-01-02'], tz=tz, name='idx')
+
+ rng3 = DatetimeIndex(['2011-01-04', '2011-01-02',
+ '2011-02-02', '2011-02-03'],
+ tz=tz, name='other')
+ expected3 = DatetimeIndex(
+ ['2011-01-04', '2011-01-02'], tz=tz, name=None)
+
+ # GH 7880
+ rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz,
+ name='idx')
+ expected4 = DatetimeIndex([], tz=tz, name='idx')
+
+ for (rng, expected) in [(rng2, expected2), (rng3, expected3),
+ (rng4, expected4)]:
+ result = base.intersection(rng)
+ self.assertTrue(result.equals(expected))
+ self.assertEqual(result.name, expected.name)
+ self.assertIsNone(result.freq)
+ self.assertEqual(result.tz, expected.tz)
+
+ # empty same freq GH2129
+ rng = date_range('6/1/2000', '6/15/2000', freq='T')
+ result = rng[0:0].intersection(rng)
+ self.assertEqual(len(result), 0)
+
+ result = rng.intersection(rng[0:0])
+ self.assertEqual(len(result), 0)
+
+ def test_string_index_series_name_converted(self):
+ # #1644
+ df = DataFrame(np.random.randn(10, 4),
+ index=date_range('1/1/2000', periods=10))
+
+ result = df.ix['1/3/2000']
+ self.assertEqual(result.name, df.index[2])
+
+ result = df.T['1/3/2000']
+ self.assertEqual(result.name, df.index[2])
# GH 10699
def test_datetime64_with_DateOffset(self):
@@ -3823,131 +3950,6 @@ def test_intercept_astype_object(self):
result = df.values.squeeze()
self.assertTrue((result[:, 0] == expected.values).all())
- def test_union(self):
- rng1 = date_range('1/1/1999', '1/1/2012', freq='MS')
- s1 = Series(np.random.randn(len(rng1)), rng1)
-
- rng2 = date_range('1/1/1980', '12/1/2001', freq='MS')
- s2 = Series(np.random.randn(len(rng2)), rng2)
- df = DataFrame({'s1': s1, 's2': s2})
- self.assertEqual(df.index.values.dtype, np.dtype('M8[ns]'))
-
- def test_intersection(self):
- # GH 4690 (with tz)
- for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']:
- base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx')
-
- # if target has the same name, it is preserved
- rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx')
- expected2 = date_range('6/1/2000', '6/20/2000', freq='D',
- name='idx')
-
- # if target name is different, it will be reset
- rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other')
- expected3 = date_range('6/1/2000', '6/20/2000', freq='D',
- name=None)
-
- rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx')
- expected4 = DatetimeIndex([], name='idx')
-
- for (rng, expected) in [(rng2, expected2), (rng3, expected3),
- (rng4, expected4)]:
- result = base.intersection(rng)
- self.assertTrue(result.equals(expected))
- self.assertEqual(result.name, expected.name)
- self.assertEqual(result.freq, expected.freq)
- self.assertEqual(result.tz, expected.tz)
-
- # non-monotonic
- base = DatetimeIndex(['2011-01-05', '2011-01-04',
- '2011-01-02', '2011-01-03'],
- tz=tz, name='idx')
-
- rng2 = DatetimeIndex(['2011-01-04', '2011-01-02',
- '2011-02-02', '2011-02-03'],
- tz=tz, name='idx')
- expected2 = DatetimeIndex(
- ['2011-01-04', '2011-01-02'], tz=tz, name='idx')
-
- rng3 = DatetimeIndex(['2011-01-04', '2011-01-02',
- '2011-02-02', '2011-02-03'],
- tz=tz, name='other')
- expected3 = DatetimeIndex(
- ['2011-01-04', '2011-01-02'], tz=tz, name=None)
-
- # GH 7880
- rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz,
- name='idx')
- expected4 = DatetimeIndex([], tz=tz, name='idx')
-
- for (rng, expected) in [(rng2, expected2), (rng3, expected3),
- (rng4, expected4)]:
- result = base.intersection(rng)
- self.assertTrue(result.equals(expected))
- self.assertEqual(result.name, expected.name)
- self.assertIsNone(result.freq)
- self.assertEqual(result.tz, expected.tz)
-
- # empty same freq GH2129
- rng = date_range('6/1/2000', '6/15/2000', freq='T')
- result = rng[0:0].intersection(rng)
- self.assertEqual(len(result), 0)
-
- result = rng.intersection(rng[0:0])
- self.assertEqual(len(result), 0)
-
- def test_date_range_bms_bug(self):
- # #1645
- rng = date_range('1/1/2000', periods=10, freq='BMS')
-
- ex_first = Timestamp('2000-01-03')
- self.assertEqual(rng[0], ex_first)
-
- def test_date_range_businesshour(self):
- idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
- '2014-07-04 11:00',
- '2014-07-04 12:00', '2014-07-04 13:00',
- '2014-07-04 14:00',
- '2014-07-04 15:00', '2014-07-04 16:00'],
- freq='BH')
- rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH')
- tm.assert_index_equal(idx, rng)
-
- idx = DatetimeIndex(
- ['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH')
- rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH')
- tm.assert_index_equal(idx, rng)
-
- idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
- '2014-07-04 11:00',
- '2014-07-04 12:00', '2014-07-04 13:00',
- '2014-07-04 14:00',
- '2014-07-04 15:00', '2014-07-04 16:00',
- '2014-07-07 09:00', '2014-07-07 10:00',
- '2014-07-07 11:00',
- '2014-07-07 12:00', '2014-07-07 13:00',
- '2014-07-07 14:00',
- '2014-07-07 15:00', '2014-07-07 16:00',
- '2014-07-08 09:00', '2014-07-08 10:00',
- '2014-07-08 11:00',
- '2014-07-08 12:00', '2014-07-08 13:00',
- '2014-07-08 14:00',
- '2014-07-08 15:00', '2014-07-08 16:00'],
- freq='BH')
- rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH')
- tm.assert_index_equal(idx, rng)
-
- def test_string_index_series_name_converted(self):
- # #1644
- df = DataFrame(np.random.randn(10, 4),
- index=date_range('1/1/2000', periods=10))
-
- result = df.ix['1/3/2000']
- self.assertEqual(result.name, df.index[2])
-
- result = df.T['1/3/2000']
- self.assertEqual(result.name, df.index[2])
-
class TestTimestamp(tm.TestCase):
def test_class_ops_pytz(self):
diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py
index 381b106b17eb0..937a8fa340348 100644
--- a/pandas/tseries/tests/test_tslib.py
+++ b/pandas/tseries/tests/test_tslib.py
@@ -519,7 +519,12 @@ def test_parsers(self):
'2014-06': datetime.datetime(2014, 6, 1),
'06-2014': datetime.datetime(2014, 6, 1),
'2014-6': datetime.datetime(2014, 6, 1),
- '6-2014': datetime.datetime(2014, 6, 1), }
+ '6-2014': datetime.datetime(2014, 6, 1),
+
+ '20010101 12': datetime.datetime(2001, 1, 1, 12),
+ '20010101 1234': datetime.datetime(2001, 1, 1, 12, 34),
+ '20010101 123456': datetime.datetime(2001, 1, 1, 12, 34, 56),
+ }
for date_str, expected in compat.iteritems(cases):
result1, _, _ = tools.parse_time_string(date_str)
@@ -713,11 +718,22 @@ def test_parsers_iso8601(self):
self.assertEqual(actual, exp)
# seperators must all match - YYYYMM not valid
- invalid_cases = ['2011-01/02', '2011^11^11', '201401',
- '201111', '200101']
+ invalid_cases = ['2011-01/02', '2011^11^11',
+ '201401', '201111', '200101',
+ # mixed separated and unseparated
+ '2005-0101', '200501-01',
+ '20010101 12:3456', '20010101 1234:56',
+ # HHMMSS must have two digits in each component
+ # if unseparated
+ '20010101 1', '20010101 123', '20010101 12345',
+ '20010101 12345Z',
+ # wrong separator for HHMMSS
+ '2001-01-01 12-34-56']
for date_str in invalid_cases:
with tm.assertRaises(ValueError):
tslib._test_parse_iso8601(date_str)
+ # If no ValueError raised, let me know which case failed.
+ raise Exception(date_str)
class TestArrayToDatetime(tm.TestCase):
@@ -881,6 +897,11 @@ def test_nanosecond_string_parsing(self):
self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000)
self.assertIn(expected_repr, repr(ts))
+ # GH 10041
+ ts = Timestamp('20130501T071545.123456789')
+ self.assertEqual(ts.value, expected_value)
+ self.assertIn(expected_repr, repr(ts))
+
def test_nanosecond_timestamp(self):
# GH 7610
expected = 1293840000000000005
diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py
index 8f127e28e28a9..d92cfef5280fc 100644
--- a/pandas/tseries/tools.py
+++ b/pandas/tseries/tools.py
@@ -190,6 +190,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
----------
arg : string, datetime, list, tuple, 1-d array, or Series
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
+
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaT
- If 'ignore', then invalid parsing will return the input
@@ -201,10 +202,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
with day first (this is a known bug, based on dateutil behavior).
yearfirst : boolean, default False
Specify a date parse order if `arg` is str or its list-likes.
+
- If True parses dates with the year first, eg 10/11/12 is parsed as
2010-11-12.
- If both dayfirst and yearfirst are True, yearfirst is preceded (same
as dateutil).
+
Warning: yearfirst=True is not strict, but will prefer to parse
with year first (this is a known bug, based on dateutil beahavior).
@@ -214,14 +217,17 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
Return UTC DatetimeIndex if True (converting any tz-aware
datetime.datetime objects as well).
box : boolean, default True
+
- If True returns a DatetimeIndex
- If False returns ndarray of values.
format : string, default None
strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
all the way up to nanoseconds.
exact : boolean, True by default
+
- If True, require an exact format match.
- If False, allow the format to match anywhere in the target string.
+
unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch
(e.g. a unix timestamp), which is an integer/float number.
infer_datetime_format : boolean, default False
@@ -273,6 +279,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
datetime.datetime(1300, 1, 1, 0, 0)
>>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
NaT
+
"""
return _to_datetime(arg, errors=errors, dayfirst=dayfirst,
yearfirst=yearfirst,
diff --git a/pandas/util/clipboard.py b/pandas/util/clipboard.py
index 026f13aad0bf3..02da0d5b8159f 100644
--- a/pandas/util/clipboard.py
+++ b/pandas/util/clipboard.py
@@ -45,6 +45,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# flake8: noqa
import platform
import os
diff --git a/pandas/util/nosetester.py b/pandas/util/nosetester.py
index 445cb79978fc1..1bdaaff99fd50 100644
--- a/pandas/util/nosetester.py
+++ b/pandas/util/nosetester.py
@@ -122,6 +122,45 @@ def _get_custom_doctester(self):
"""
return None
+ def _test_argv(self, label, verbose, extra_argv):
+ """
+ Generate argv for nosetest command
+
+ Parameters
+ ----------
+ label : {'fast', 'full', '', attribute identifier}, optional
+ see ``test`` docstring
+ verbose : int, optional
+ Verbosity value for test outputs, in the range 1-10. Default is 1.
+ extra_argv : list, optional
+ List with any extra arguments to pass to nosetests.
+
+ Returns
+ -------
+ argv : list
+ command line arguments that will be passed to nose
+ """
+
+ argv = [__file__, self.package_path]
+ if label and label != 'full':
+ if not isinstance(label, string_types):
+ raise TypeError('Selection label should be a string')
+ if label == 'fast':
+ label = 'not slow and not network and not disabled'
+ argv += ['-A', label]
+ argv += ['--verbosity', str(verbose)]
+
+ # When installing with setuptools, and also in some other cases, the
+ # test_*.py files end up marked +x executable. Nose, by default, does
+ # not run files marked with +x as they might be scripts. However, in
+ # our case nose only looks for test_*.py files under the package
+ # directory, which should be safe.
+ argv += ['--exe']
+
+ if extra_argv:
+ argv += extra_argv
+ return argv
+
def test(self, label='fast', verbose=1, extra_argv=None,
doctests=False, coverage=False, raise_warnings=None):
"""
@@ -133,6 +172,7 @@ def test(self, label='fast', verbose=1, extra_argv=None,
Identifies the tests to run. This can be a string to pass to
the nosetests executable with the '-A' option, or one of several
special values. Special values are:
+
* 'fast' - the default - which corresponds to the ``nosetests -A``
option of 'not slow'.
* 'full' - fast (as above) and slow tests as in the
@@ -140,6 +180,7 @@ def test(self, label='fast', verbose=1, extra_argv=None,
* None or '' - run all tests.
* attribute_identifier - string passed directly to nosetests
as '-A'.
+
verbose : int, optional
Verbosity value for test outputs, in the range 1-10. Default is 1.
extra_argv : list, optional
@@ -154,14 +195,15 @@ def test(self, label='fast', verbose=1, extra_argv=None,
This specifies which warnings to configure as 'raise' instead
of 'warn' during the test execution. Valid strings are:
- - "develop" : equals ``(DeprecationWarning, RuntimeWarning)``
- - "release" : equals ``()``, don't raise on any warnings.
+ - 'develop' : equals ``(DeprecationWarning, RuntimeWarning)``
+ - 'release' : equals ``()``, don't raise on any warnings.
Returns
-------
result : object
Returns the result of running the tests as a
``nose.result.TextTestResult`` object.
+
"""
# cap verbosity at 3 because nose becomes *very* verbose beyond that
diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py
index 80c10b53d37b5..c972caad5d74c 100644
--- a/pandas/util/print_versions.py
+++ b/pandas/util/print_versions.py
@@ -91,7 +91,8 @@ def show_versions(as_json=False):
("sqlalchemy", lambda mod: mod.__version__),
("pymysql", lambda mod: mod.__version__),
("psycopg2", lambda mod: mod.__version__),
- ("jinja2", lambda mod: mod.__version__)
+ ("jinja2", lambda mod: mod.__version__),
+ ("boto", lambda mod: mod.__version__)
]
deps_blob = list()
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 35a615db444e9..ba869efbc5837 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -133,7 +133,7 @@ def randbool(size=(), p=0.5):
def rands_array(nchars, size, dtype='O'):
"""Generate an array of byte strings."""
- retval = (choice(RANDS_CHARS, size=nchars * np.prod(size))
+ retval = (np.random.choice(RANDS_CHARS, size=nchars * np.prod(size))
.view((np.str_, nchars)).reshape(size))
if dtype is None:
return retval
@@ -143,7 +143,7 @@ def rands_array(nchars, size, dtype='O'):
def randu_array(nchars, size, dtype='O'):
"""Generate an array of unicode strings."""
- retval = (choice(RANDU_CHARS, size=nchars * np.prod(size))
+ retval = (np.random.choice(RANDU_CHARS, size=nchars * np.prod(size))
.view((np.unicode_, nchars)).reshape(size))
if dtype is None:
return retval
@@ -158,7 +158,7 @@ def rands(nchars):
See `rands_array` if you want to create an array of random strings.
"""
- return ''.join(choice(RANDS_CHARS, nchars))
+ return ''.join(np.random.choice(RANDS_CHARS, nchars))
def randu(nchars):
@@ -171,14 +171,6 @@ def randu(nchars):
return ''.join(choice(RANDU_CHARS, nchars))
-def choice(x, size=10):
- """sample with replacement; uniform over the input"""
- try:
- return np.random.choice(x, size=size)
- except AttributeError:
- return np.random.randint(len(x), size=size).choose(x)
-
-
def close(fignum=None):
from matplotlib.pyplot import get_fignums, close as _close
@@ -209,6 +201,12 @@ def setUpClass(cls):
cls.setUpClass = setUpClass
return cls
+def _skip_if_no_mpl():
+ try:
+ import matplotlib
+ except ImportError:
+ import nose
+ raise nose.SkipTest("matplotlib not installed")
def _skip_if_mpl_1_5():
import matplotlib
@@ -217,7 +215,6 @@ def _skip_if_mpl_1_5():
import nose
raise nose.SkipTest("matplotlib 1.5")
-
def _skip_if_no_scipy():
try:
import scipy.stats
@@ -775,6 +772,21 @@ def assert_attr_equal(attr, left, right, obj='Attributes'):
left_attr, right_attr)
+def assert_is_valid_plot_return_object(objs):
+ import matplotlib.pyplot as plt
+ if isinstance(objs, np.ndarray):
+ for el in objs.flat:
+ assert isinstance(el, plt.Axes), ('one of \'objs\' is not a '
+ 'matplotlib Axes instance, '
+ 'type encountered {0!r}'
+ ''.format(el.__class__.__name__))
+ else:
+ assert isinstance(objs, (plt.Artist, tuple, dict)), \
+ ('objs is neither an ndarray of Artist instances nor a '
+ 'single Artist instance, tuple, or dict, "objs" is a {0!r} '
+ ''.format(objs.__class__.__name__))
+
+
def isiterable(obj):
return hasattr(obj, '__iter__')
diff --git a/setup.py b/setup.py
index f33b01b24c165..e3fb5a007aad3 100755
--- a/setup.py
+++ b/setup.py
@@ -14,6 +14,15 @@
import platform
from distutils.version import LooseVersion
+def is_platform_windows():
+ return sys.platform == 'win32' or sys.platform == 'cygwin'
+
+def is_platform_linux():
+ return sys.platform == 'linux2'
+
+def is_platform_mac():
+ return sys.platform == 'darwin'
+
# versioning
import versioneer
cmdclass = versioneer.get_cmdclass()
@@ -375,6 +384,11 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
def pxd(name):
return os.path.abspath(pjoin('pandas', name + '.pxd'))
+# args to ignore warnings
+if is_platform_windows():
+ extra_compile_args=[]
+else:
+ extra_compile_args=['-Wno-unused-function']
lib_depends = lib_depends + ['pandas/src/numpy_helper.h',
'pandas/src/parse_helper.h']
@@ -386,7 +400,7 @@ def pxd(name):
# some linux distros require it
-libraries = ['m'] if 'win32' not in sys.platform else []
+libraries = ['m'] if not is_platform_windows() else []
ext_data = dict(
lib={'pyxfile': 'lib',
@@ -439,7 +453,8 @@ def pxd(name):
obj = Extension('pandas.%s' % name,
sources=sources,
depends=data.get('depends', []),
- include_dirs=include)
+ include_dirs=include,
+ extra_compile_args=extra_compile_args)
extensions.append(obj)
@@ -447,14 +462,16 @@ def pxd(name):
sparse_ext = Extension('pandas._sparse',
sources=[srcpath('sparse', suffix=suffix)],
include_dirs=[],
- libraries=libraries)
+ libraries=libraries,
+ extra_compile_args=extra_compile_args)
extensions.extend([sparse_ext])
testing_ext = Extension('pandas._testing',
sources=[srcpath('testing', suffix=suffix)],
include_dirs=[],
- libraries=libraries)
+ libraries=libraries,
+ extra_compile_args=extra_compile_args)
extensions.extend([testing_ext])
@@ -474,7 +491,8 @@ def pxd(name):
subdir='msgpack')],
language='c++',
include_dirs=['pandas/src/msgpack'] + common_include,
- define_macros=macros)
+ define_macros=macros,
+ extra_compile_args=extra_compile_args)
unpacker_ext = Extension('pandas.msgpack._unpacker',
depends=['pandas/src/msgpack/unpack.h',
'pandas/src/msgpack/unpack_define.h',
@@ -484,7 +502,8 @@ def pxd(name):
subdir='msgpack')],
language='c++',
include_dirs=['pandas/src/msgpack'] + common_include,
- define_macros=macros)
+ define_macros=macros,
+ extra_compile_args=extra_compile_args)
extensions.append(packer_ext)
extensions.append(unpacker_ext)
@@ -508,7 +527,7 @@ def pxd(name):
include_dirs=['pandas/src/ujson/python',
'pandas/src/ujson/lib',
'pandas/src/datetime'] + common_include,
- extra_compile_args=['-D_GNU_SOURCE'])
+ extra_compile_args=['-D_GNU_SOURCE'] + extra_compile_args)
extensions.append(ujson_ext)
diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
index bc21372225322..268d71f864823 100644
--- a/vb_suite/groupby.py
+++ b/vb_suite/groupby.py
@@ -143,7 +143,7 @@ def f():
value2 = np.random.randn(n)
value2[np.random.rand(n) > 0.5] = np.nan
-obj = tm.choice(list('ab'), size=n).astype(object)
+obj = np.random.choice(list('ab'), size=n).astype(object)
obj[np.random.randn(n) > 0.5] = np.nan
df = DataFrame({'key1': np.random.randint(0, 500, size=n),