Skip to content

Commit 0f29e7b

Browse files
authored
1 parent 68d8c95 commit 0f29e7b

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

dask/dataframe/io/parquet.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ def _read_fastparquet(fs, paths, myopen, columns=None, filters=None,
9191
dtypes)
9292

9393
for cat in categories:
94-
meta[cat] = pd.Series(pd.Categorical([],
95-
categories=[UNKNOWN_CATEGORIES]))
94+
if cat in meta:
95+
meta[cat] = pd.Series(pd.Categorical([],
96+
categories=[UNKNOWN_CATEGORIES]))
9697

9798
if index_col:
9899
meta = meta.set_index(index_col)

dask/dataframe/io/tests/test_parquet.py

+16
Original file line numberDiff line numberDiff line change
@@ -536,3 +536,19 @@ def test_drill_scheme(fn):
536536
out = df.compute()
537537
assert 'dir0' in out
538538
assert (np.unique(out.dir0) == ['test_data1', 'test_data2']).all()
539+
540+
541+
def test_parquet_select_cats(fn):
542+
df = pd.DataFrame({
543+
'categories': pd.Series(
544+
np.random.choice(['a', 'b', 'c', 'd', 'e', 'f'], size=100),
545+
dtype='category'),
546+
'ints': pd.Series(list(range(0, 100)), dtype='int'),
547+
'floats': pd.Series(list(range(0, 100)), dtype='float')})
548+
549+
ddf = dd.from_pandas(df, 1)
550+
ddf.to_parquet(fn)
551+
rddf = dd.read_parquet(fn, columns=['ints'])
552+
assert list(rddf.columns) == ['ints']
553+
rddf = dd.read_parquet(fn)
554+
assert list(rddf.columns) == list(df)

0 commit comments

Comments
 (0)