Skip to content

Commit 5576c0f

Browse files
author
Yoav Ram
committed
1 parent 8cd7f90 commit 5576c0f

File tree

1 file changed

+287
-0
lines changed

1 file changed

+287
-0
lines changed

pandas duplicate column bug.ipynb

+287
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {
7+
"collapsed": true
8+
},
9+
"outputs": [],
10+
"source": [
11+
"import pandas as pd\n",
12+
"from StringIO import StringIO"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 15,
18+
"metadata": {
19+
"collapsed": false
20+
},
21+
"outputs": [],
22+
"source": [
23+
"data = \"\"\"A,A,B,B,B\n",
24+
" 1,2,3,4,5\n",
25+
" 6,7,8,9,10\n",
26+
" 11,12,13,14,15\"\"\"\n",
27+
"\n",
28+
"# check default beahviour\n",
29+
"df = pd.read_table(StringIO(data), sep=',')\n",
30+
"assert (list(df.columns) == ['A', 'A.1', 'B', 'B.1', 'B.2'])\n",
31+
"\n",
32+
"df = pd.read_table(StringIO(data), sep=',', mangle_dupe_cols=False)\n",
33+
"assert (list(df.columns)==['A', 'A', 'B', 'B', 'B'])\n",
34+
"\n",
35+
"df = pd.read_table(StringIO(data), sep=',', mangle_dupe_cols=True)\n",
36+
"assert (list(df.columns)== ['A', 'A.1', 'B', 'B.1', 'B.2'])"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": 30,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [
46+
{
47+
"data": {
48+
"text/html": [
49+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
50+
"<table border=\"1\" class=\"dataframe\">\n",
51+
" <thead>\n",
52+
" <tr style=\"text-align: right;\">\n",
53+
" <th></th>\n",
54+
" <th>A</th>\n",
55+
" <th>A.1</th>\n",
56+
" <th>B</th>\n",
57+
" <th>B.1</th>\n",
58+
" <th>B.2</th>\n",
59+
" </tr>\n",
60+
" </thead>\n",
61+
" <tbody>\n",
62+
" <tr>\n",
63+
" <th>0</th>\n",
64+
" <td>1</td>\n",
65+
" <td>2</td>\n",
66+
" <td>3</td>\n",
67+
" <td>4</td>\n",
68+
" <td>5</td>\n",
69+
" </tr>\n",
70+
" <tr>\n",
71+
" <th>1</th>\n",
72+
" <td>6</td>\n",
73+
" <td>7</td>\n",
74+
" <td>8</td>\n",
75+
" <td>9</td>\n",
76+
" <td>10</td>\n",
77+
" </tr>\n",
78+
" <tr>\n",
79+
" <th>2</th>\n",
80+
" <td>11</td>\n",
81+
" <td>12</td>\n",
82+
" <td>13</td>\n",
83+
" <td>14</td>\n",
84+
" <td>15</td>\n",
85+
" </tr>\n",
86+
" </tbody>\n",
87+
"</table>\n",
88+
"</div>"
89+
],
90+
"text/plain": [
91+
" A A.1 B B.1 B.2\n",
92+
"0 1 2 3 4 5\n",
93+
"1 6 7 8 9 10\n",
94+
"2 11 12 13 14 15"
95+
]
96+
},
97+
"execution_count": 30,
98+
"metadata": {},
99+
"output_type": "execute_result"
100+
}
101+
],
102+
"source": [
103+
"df = pd.read_table(StringIO(data), sep=',', mangle_dupe_cols=True)\n",
104+
"df"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": 31,
110+
"metadata": {
111+
"collapsed": false
112+
},
113+
"outputs": [
114+
{
115+
"name": "stdout",
116+
"output_type": "stream",
117+
"text": [
118+
"| | A | A.1 | B | B.1 | B.2 |\n",
119+
"|---:|----:|------:|----:|------:|------:|\n",
120+
"| 0 | 1 | 2 | 3 | 4 | 5 |\n",
121+
"| 1 | 6 | 7 | 8 | 9 | 10 |\n",
122+
"| 2 | 11 | 12 | 13 | 14 | 15 |\n"
123+
]
124+
}
125+
],
126+
"source": [
127+
"import tabulate\n",
128+
"print tabulate.tabulate(df, headers=df.columns.tolist(), tablefmt='pipe')"
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": 25,
134+
"metadata": {
135+
"collapsed": false
136+
},
137+
"outputs": [
138+
{
139+
"data": {
140+
"text/html": [
141+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
142+
"<table border=\"1\" class=\"dataframe\">\n",
143+
" <thead>\n",
144+
" <tr style=\"text-align: right;\">\n",
145+
" <th></th>\n",
146+
" <th>A</th>\n",
147+
" <th>A</th>\n",
148+
" <th>B</th>\n",
149+
" <th>B</th>\n",
150+
" <th>B</th>\n",
151+
" </tr>\n",
152+
" </thead>\n",
153+
" <tbody>\n",
154+
" <tr>\n",
155+
" <th>0</th>\n",
156+
" <td>2</td>\n",
157+
" <td>2</td>\n",
158+
" <td>5</td>\n",
159+
" <td>5</td>\n",
160+
" <td>5</td>\n",
161+
" </tr>\n",
162+
" <tr>\n",
163+
" <th>1</th>\n",
164+
" <td>7</td>\n",
165+
" <td>7</td>\n",
166+
" <td>10</td>\n",
167+
" <td>10</td>\n",
168+
" <td>10</td>\n",
169+
" </tr>\n",
170+
" <tr>\n",
171+
" <th>2</th>\n",
172+
" <td>12</td>\n",
173+
" <td>12</td>\n",
174+
" <td>15</td>\n",
175+
" <td>15</td>\n",
176+
" <td>15</td>\n",
177+
" </tr>\n",
178+
" </tbody>\n",
179+
"</table>\n",
180+
"</div>"
181+
],
182+
"text/plain": [
183+
" A A B B B\n",
184+
"0 2 2 5 5 5\n",
185+
"1 7 7 10 10 10\n",
186+
"2 12 12 15 15 15"
187+
]
188+
},
189+
"execution_count": 25,
190+
"metadata": {},
191+
"output_type": "execute_result"
192+
}
193+
],
194+
"source": [
195+
"df = pd.read_table(StringIO(data), sep=',', mangle_dupe_cols=False)\n",
196+
"df"
197+
]
198+
},
199+
{
200+
"cell_type": "code",
201+
"execution_count": 29,
202+
"metadata": {
203+
"collapsed": false
204+
},
205+
"outputs": [
206+
{
207+
"name": "stdout",
208+
"output_type": "stream",
209+
"text": [
210+
"| | A | A | B | B | B |\n",
211+
"|---:|----:|----:|----:|----:|----:|\n",
212+
"| 0 | 2 | 2 | 5 | 5 | 5 |\n",
213+
"| 1 | 7 | 7 | 10 | 10 | 10 |\n",
214+
"| 2 | 12 | 12 | 15 | 15 | 15 |\n"
215+
]
216+
}
217+
],
218+
"source": [
219+
"print tabulate.tabulate(df, headers=df.columns.tolist(), tablefmt='pipe')"
220+
]
221+
},
222+
{
223+
"cell_type": "code",
224+
"execution_count": 27,
225+
"metadata": {
226+
"collapsed": false
227+
},
228+
"outputs": [
229+
{
230+
"data": {
231+
"text/plain": [
232+
"[u'fancy_grid',\n",
233+
" u'grid',\n",
234+
" u'html',\n",
235+
" u'latex',\n",
236+
" u'latex_booktabs',\n",
237+
" u'mediawiki',\n",
238+
" u'orgtbl',\n",
239+
" u'pipe',\n",
240+
" u'plain',\n",
241+
" u'psql',\n",
242+
" u'rst',\n",
243+
" u'simple',\n",
244+
" u'tsv']"
245+
]
246+
},
247+
"execution_count": 27,
248+
"metadata": {},
249+
"output_type": "execute_result"
250+
}
251+
],
252+
"source": [
253+
"tabulate.tabulate_formats"
254+
]
255+
},
256+
{
257+
"cell_type": "code",
258+
"execution_count": null,
259+
"metadata": {
260+
"collapsed": true
261+
},
262+
"outputs": [],
263+
"source": []
264+
}
265+
],
266+
"metadata": {
267+
"kernelspec": {
268+
"display_name": "Python 2",
269+
"language": "python",
270+
"name": "python2"
271+
},
272+
"language_info": {
273+
"codemirror_mode": {
274+
"name": "ipython",
275+
"version": 2
276+
},
277+
"file_extension": ".py",
278+
"mimetype": "text/x-python",
279+
"name": "python",
280+
"nbconvert_exporter": "python",
281+
"pygments_lexer": "ipython2",
282+
"version": "2.7.8"
283+
}
284+
},
285+
"nbformat": 4,
286+
"nbformat_minor": 0
287+
}

0 commit comments

Comments
 (0)