@@ -145,7 +145,10 @@ usecols : list-like or callable, default ``None``
145
145
146
146
.. ipython :: python
147
147
148
- data = ' col1,col2,col3\n a,b,1\n a,b,2\n c,d,3'
148
+ data = (' col1,col2,col3\n '
149
+ ' a,b,1\n '
150
+ ' a,b,2\n '
151
+ ' c,d,3' )
149
152
pd.read_csv(StringIO(data))
150
153
pd.read_csv(StringIO(data), usecols = lambda x : x.upper() in [' COL1' , ' COL3' ])
151
154
@@ -191,7 +194,10 @@ skiprows : list-like or integer, default ``None``
191
194
192
195
.. ipython :: python
193
196
194
- data = ' col1,col2,col3\n a,b,1\n a,b,2\n c,d,3'
197
+ data = (' col1,col2,col3\n '
198
+ ' a,b,1\n '
199
+ ' a,b,2\n '
200
+ ' c,d,3' )
195
201
pd.read_csv(StringIO(data))
196
202
pd.read_csv(StringIO(data), skiprows = lambda x : x % 2 != 0 )
197
203
@@ -366,7 +372,10 @@ columns:
366
372
367
373
.. ipython :: python
368
374
369
- data = ' a,b,c\n 1,2,3\n 4,5,6\n 7,8,9'
375
+ data = (' a,b,c\n '
376
+ ' 1,2,3\n '
377
+ ' 4,5,6\n '
378
+ ' 7,8,9' )
370
379
print (data)
371
380
372
381
df = pd.read_csv(StringIO(data), dtype = object )
@@ -387,7 +396,11 @@ of :func:`~pandas.read_csv`:
387
396
388
397
.. ipython :: python
389
398
390
- data = " col_1\n 1\n 2\n 'A'\n 4.22"
399
+ data = (" col_1\n "
400
+ " 1\n "
401
+ " 2\n "
402
+ " 'A'\n "
403
+ " 4.22" )
391
404
df = pd.read_csv(StringIO(data), converters = {' col_1' : str })
392
405
df
393
406
df[' col_1' ].apply(type ).value_counts()
@@ -455,7 +468,10 @@ Specifying Categorical dtype
455
468
456
469
.. ipython :: python
457
470
458
- data = ' col1,col2,col3\n a,b,1\n a,b,2\n c,d,3'
471
+ data = (' col1,col2,col3\n '
472
+ ' a,b,1\n '
473
+ ' a,b,2\n '
474
+ ' c,d,3' )
459
475
460
476
pd.read_csv(StringIO(data))
461
477
pd.read_csv(StringIO(data)).dtypes
@@ -524,7 +540,10 @@ used as the column names:
524
540
525
541
.. ipython :: python
526
542
527
- data = ' a,b,c\n 1,2,3\n 4,5,6\n 7,8,9'
543
+ data = (' a,b,c\n '
544
+ ' 1,2,3\n '
545
+ ' 4,5,6\n '
546
+ ' 7,8,9' )
528
547
print (data)
529
548
pd.read_csv(StringIO(data))
530
549
@@ -543,7 +562,11 @@ If the header is in a row other than the first, pass the row number to
543
562
544
563
.. ipython :: python
545
564
546
- data = ' skip this skip it\n a,b,c\n 1,2,3\n 4,5,6\n 7,8,9'
565
+ data = (' skip this skip it\n '
566
+ ' a,b,c\n '
567
+ ' 1,2,3\n '
568
+ ' 4,5,6\n '
569
+ ' 7,8,9' )
547
570
pd.read_csv(StringIO(data), header = 1 )
548
571
549
572
.. note ::
@@ -564,7 +587,9 @@ distinguish between them so as to prevent overwriting data:
564
587
565
588
.. ipython :: python
566
589
567
- data = 'a,b,a\n0,1,2\n3,4,5'
590
+ data = ('a,b,a\n'
591
+ '0,1,2\n'
592
+ '3,4,5')
568
593
pd.read_csv(StringIO(data))
569
594
570
595
There is no more duplicate data because ``mangle_dupe_cols=True `` by default,
@@ -632,15 +657,26 @@ be ignored. By default, completely blank lines will be ignored as well.
632
657
633
658
.. ipython :: python
634
659
635
- data = ' \n a,b,c\n \n # commented line\n 1,2,3\n\n 4,5,6'
660
+ data = (' \n '
661
+ ' a,b,c\n '
662
+ ' \n '
663
+ ' # commented line\n '
664
+ ' 1,2,3\n '
665
+ ' \n '
666
+ ' 4,5,6' )
636
667
print (data)
637
668
pd.read_csv(StringIO(data), comment = ' #' )
638
669
639
670
If ``skip_blank_lines=False ``, then ``read_csv `` will not ignore blank lines:
640
671
641
672
.. ipython :: python
642
673
643
- data = ' a,b,c\n\n 1,2,3\n\n\n 4,5,6'
674
+ data = (' a,b,c\n '
675
+ ' \n '
676
+ ' 1,2,3\n '
677
+ ' \n '
678
+ ' \n '
679
+ ' 4,5,6' )
644
680
pd.read_csv(StringIO(data), skip_blank_lines = False )
645
681
646
682
.. warning ::
@@ -651,7 +687,10 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:
651
687
652
688
.. ipython :: python
653
689
654
- data = ' #comment\n a,b,c\n A,B,C\n 1,2,3'
690
+ data = (' #comment\n '
691
+ ' a,b,c\n '
692
+ ' A,B,C\n '
693
+ ' 1,2,3' )
655
694
pd.read_csv(StringIO(data), comment = ' #' , header = 1 )
656
695
data = ' A,B,C\n #comment\n a,b,c\n 1,2,3'
657
696
pd.read_csv(StringIO(data), comment = ' #' , skiprows = 2 )
@@ -661,15 +700,14 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:
661
700
662
701
.. ipython :: python
663
702
664
- data = ' \n ' .join([' # empty' ,
665
- ' # second empty line' ,
666
- ' # third empty' ,
667
- ' line' ,
668
- ' X,Y,Z' ,
669
- ' 1,2,3' ,
670
- ' A,B,C' ,
671
- ' 1,2.,4.' ,
672
- ' 5.,NaN,10.0' ])
703
+ data = (' # empty\n '
704
+ ' # second empty line\n '
705
+ ' # third emptyline\n '
706
+ ' X,Y,Z\n '
707
+ ' 1,2,3\n '
708
+ ' A,B,C\n '
709
+ ' 1,2.,4.\n '
710
+ ' 5.,NaN,10.0\n ' )
673
711
print (data)
674
712
pd.read_csv(StringIO(data), comment = ' #' , skiprows = 4 , header = 1 )
675
713
@@ -724,7 +762,9 @@ result in byte strings being decoded to unicode in the result:
724
762
725
763
.. ipython :: python
726
764
727
- data = b ' word,length\n Tr\xc3\xa4 umen,7\n Gr\xc3\xbc\xc3\x9f e,5'
765
+ data = (b ' word,length\n '
766
+ b ' Tr\xc3\xa4 umen,7\n '
767
+ b ' Gr\xc3\xbc\xc3\x9f e,5' )
728
768
data = data.decode(' utf8' ).encode(' latin-1' )
729
769
df = pd.read_csv(BytesIO(data), encoding = ' latin-1' )
730
770
df
@@ -745,12 +785,16 @@ first column will be used as the ``DataFrame``'s row names:
745
785
746
786
.. ipython :: python
747
787
748
- data = ' a,b,c\n 4,apple,bat,5.7\n 8,orange,cow,10'
788
+ data = (' a,b,c\n '
789
+ ' 4,apple,bat,5.7\n '
790
+ ' 8,orange,cow,10' )
749
791
pd.read_csv(StringIO(data))
750
792
751
793
.. ipython :: python
752
794
753
- data = ' index,a,b,c\n 4,apple,bat,5.7\n 8,orange,cow,10'
795
+ data = (' index,a,b,c\n '
796
+ ' 4,apple,bat,5.7\n '
797
+ ' 8,orange,cow,10' )
754
798
pd.read_csv(StringIO(data), index_col = 0 )
755
799
756
800
Ordinarily, you can achieve this behavior using the ``index_col `` option.
@@ -761,7 +805,9 @@ index column inference and discard the last column, pass ``index_col=False``:
761
805
762
806
.. ipython :: python
763
807
764
- data = ' a,b,c\n 4,apple,bat,\n 8,orange,cow,'
808
+ data = (' a,b,c\n '
809
+ ' 4,apple,bat,\n '
810
+ ' 8,orange,cow,' )
765
811
print (data)
766
812
pd.read_csv(StringIO(data))
767
813
pd.read_csv(StringIO(data), index_col = False )
@@ -771,7 +817,9 @@ If a subset of data is being parsed using the ``usecols`` option, the
771
817
772
818
.. ipython :: python
773
819
774
- data = ' a,b,c\n 4,apple,bat,\n 8,orange,cow,'
820
+ data = (' a,b,c\n '
821
+ ' 4,apple,bat,\n '
822
+ ' 8,orange,cow,' )
775
823
print (data)
776
824
pd.read_csv(StringIO(data), usecols = [' b' , ' c' ])
777
825
pd.read_csv(StringIO(data), usecols = [' b' , ' c' ], index_col = 0 )
@@ -5451,6 +5499,7 @@ And here's the code:
5451
5499
sz = 1000000
5452
5500
df = pd.DataFrame({' A' : randn(sz), ' B' : [1 ] * sz})
5453
5501
5502
+
5454
5503
def test_sql_write (df ):
5455
5504
if os.path.exists(' test.sql' ):
5456
5505
os.remove(' test.sql' )
0 commit comments