@@ -249,6 +249,114 @@ def test_conll_2003_to_dataframes_multi_field(self):
249
249
),
250
250
)
251
251
252
+ def test_conll_u_to_dataframes_split_by_sent (self ):
253
+ dfs = conll_u_to_dataframes ("test_data/io/test_conll/conll_u_test1.txt"
254
+ ,separate_sentences_by_doc = True )
255
+ self .maxDiff = None
256
+ print (f"***{ repr (dfs [4 ].drop (columns = ['sentence_id' ,'doc_id' ,'paragraph_id' ]))} ***" )
257
+ self .assertEqual (
258
+ repr (dfs [4 ].drop (columns = ['sentence_id' ,'doc_id' ,'paragraph_id' ])),
259
+ # NOTE the escaped backslash in the string below. Be sure to put it back
260
+ # in when regenerating this string!
261
+ textwrap .dedent ("""\
262
+ span lemma upostag xpostag \\
263
+ 0 [0, 4): 'Bush' Bush PROPN NNP
264
+ 1 [5, 9): 'also' also ADV RB
265
+ 2 [10, 19): 'nominated' nominate VERB VBD
266
+ 3 [20, 22): 'A.' A. PROPN NNP
267
+ 4 [23, 27): 'Noel' Noel PROPN NNP
268
+ 5 [28, 36): 'Anketell' Anketell PROPN NNP
269
+ 6 [37, 43): 'Kramer' Kramer PROPN NNP
270
+ 7 [44, 47): 'for' for ADP IN
271
+ 8 [48, 49): 'a' a DET DT
272
+ 9 [50, 52): '15' 15 NUM CD
273
+ 10 [52, 53): '-' - PUNCT HYPH
274
+ 11 [54, 58): 'year' year NOUN NN
275
+ 12 [59, 63): 'term' term NOUN NN
276
+ 13 [64, 66): 'as' as ADP IN
277
+ 14 [67, 76): 'associate' associate ADJ JJ
278
+ 15 [77, 82): 'judge' judge NOUN NN
279
+ 16 [83, 85): 'of' of ADP IN
280
+ 17 [86, 89): 'the' the DET DT
281
+ 18 [90, 98): 'District' District PROPN NNP
282
+ 19 [99, 101): 'of' of ADP IN
283
+ 20 [102, 110): 'Columbia' Columbia PROPN NNP
284
+ 21 [111, 116): 'Court' Court PROPN NNP
285
+ 22 [117, 119): 'of' of ADP IN
286
+ 23 [120, 127): 'Appeals' Appeal PROPN NNPS
287
+ 24 [127, 128): ',' , PUNCT ,
288
+ 25 [129, 138): 'replacing' replace VERB VBG
289
+ 26 [139, 143): 'John' John PROPN NNP
290
+ 27 [144, 152): 'Montague' Montague PROPN NNP
291
+ 28 [153, 161): 'Steadman' Steadman PROPN NNP
292
+ 29 [161, 162): '.' . PUNCT .
293
+
294
+ features head deprel deps \\
295
+ 0 Number=Sing 2 nsubj 3:nsubj
296
+ 1 None 2 advmod 3:advmod
297
+ 2 Mood=Ind|Tense=Past|VerbForm=Fin <NA> root 0:root
298
+ 3 Number=Sing 2 obj 3:obj
299
+ 4 Number=Sing 3 flat 4:flat
300
+ 5 Number=Sing 3 flat 4:flat
301
+ 6 Number=Sing 3 flat 4:flat
302
+ 7 None 12 case 13:case
303
+ 8 Definite=Ind|PronType=Art 12 det 13:det
304
+ 9 NumType=Card 11 nummod 12:nummod
305
+ 10 None 11 punct 12:punct
306
+ 11 Number=Sing 12 compound 13:compound
307
+ 12 Number=Sing 2 obl 3:obl:for
308
+ 13 None 15 case 16:case
309
+ 14 Degree=Pos 15 amod 16:amod
310
+ 15 Number=Sing 12 nmod 13:nmod:as
311
+ 16 None 18 case 19:case
312
+ 17 Definite=Def|PronType=Art 18 det 19:det
313
+ 18 Number=Sing 15 nmod 16:nmod:of
314
+ 19 None 21 case 22:case
315
+ 20 Number=Sing 21 compound 22:compound
316
+ 21 Number=Sing 18 nmod 19:nmod:of
317
+ 22 None 23 case 24:case
318
+ 23 Number=Plur 21 nmod 22:nmod:of
319
+ 24 None 2 punct 3:punct
320
+ 25 VerbForm=Ger 2 advcl 3:advcl
321
+ 26 Number=Sing 25 obj 26:obj
322
+ 27 Number=Sing 26 flat 27:flat
323
+ 28 Number=Sing 26 flat 27:flat
324
+ 29 None 2 punct 3:punct
325
+
326
+ misc sentence line_num
327
+ 0 None [0, 162): 'Bush also nominated A. Noel Anketel... 73
328
+ 1 None [0, 162): 'Bush also nominated A. Noel Anketel... 74
329
+ 2 None [0, 162): 'Bush also nominated A. Noel Anketel... 75
330
+ 3 None [0, 162): 'Bush also nominated A. Noel Anketel... 76
331
+ 4 None [0, 162): 'Bush also nominated A. Noel Anketel... 77
332
+ 5 None [0, 162): 'Bush also nominated A. Noel Anketel... 78
333
+ 6 None [0, 162): 'Bush also nominated A. Noel Anketel... 79
334
+ 7 None [0, 162): 'Bush also nominated A. Noel Anketel... 80
335
+ 8 None [0, 162): 'Bush also nominated A. Noel Anketel... 81
336
+ 9 SpaceAfter=No [0, 162): 'Bush also nominated A. Noel Anketel... 82
337
+ 10 SpaceAfter=No [0, 162): 'Bush also nominated A. Noel Anketel... 83
338
+ 11 None [0, 162): 'Bush also nominated A. Noel Anketel... 84
339
+ 12 None [0, 162): 'Bush also nominated A. Noel Anketel... 85
340
+ 13 None [0, 162): 'Bush also nominated A. Noel Anketel... 86
341
+ 14 None [0, 162): 'Bush also nominated A. Noel Anketel... 87
342
+ 15 None [0, 162): 'Bush also nominated A. Noel Anketel... 88
343
+ 16 None [0, 162): 'Bush also nominated A. Noel Anketel... 89
344
+ 17 None [0, 162): 'Bush also nominated A. Noel Anketel... 90
345
+ 18 None [0, 162): 'Bush also nominated A. Noel Anketel... 91
346
+ 19 None [0, 162): 'Bush also nominated A. Noel Anketel... 92
347
+ 20 None [0, 162): 'Bush also nominated A. Noel Anketel... 93
348
+ 21 None [0, 162): 'Bush also nominated A. Noel Anketel... 94
349
+ 22 None [0, 162): 'Bush also nominated A. Noel Anketel... 95
350
+ 23 SpaceAfter=No [0, 162): 'Bush also nominated A. Noel Anketel... 96
351
+ 24 None [0, 162): 'Bush also nominated A. Noel Anketel... 97
352
+ 25 None [0, 162): 'Bush also nominated A. Noel Anketel... 98
353
+ 26 None [0, 162): 'Bush also nominated A. Noel Anketel... 99
354
+ 27 None [0, 162): 'Bush also nominated A. Noel Anketel... 100
355
+ 28 SpaceAfter=No [0, 162): 'Bush also nominated A. Noel Anketel... 101
356
+ 29 None [0, 162): 'Bush also nominated A. Noel Anketel... 102 """ ))
357
+
358
+
359
+
252
360
def test_conll_u_to_dataframes (self ):
253
361
dfs = conll_u_to_dataframes ("test_data/io/test_conll/conll_u_test1.txt" )
254
362
self .maxDiff = None
0 commit comments