Skip to content

Commit 7a24f63

Browse files
authored
fix: serialization of dataframes with NaN values and columns starting with digits (#486)
1 parent 4537d21 commit 7a24f63

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### Bug Fixes
66
1. [#483](https://github.com/influxdata/influxdb-client-python/pull/483): Querying data if the `debug` is enabled
77
1. [#477](https://github.com/influxdata/influxdb-client-python/pull/477): Parsing date fails due to thread race
8+
1. [#486](https://github.com/influxdata/influxdb-client-python/pull/486): Fix bug when serializing DataFrames that might occur if you're inserting NaN values and have columns starting with digits.
89

910
### Dependencies
1011
1. [#472](https://github.com/influxdata/influxdb-client-python/pull/472): Update `RxPY` to `4.0.4`

influxdb_client/client/write/dataframe_serializer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def serialize(self, chunk_idx: int = None):
265265
if self.first_field_maybe_null:
266266
# When the first field is null (None/NaN), we'll have
267267
# a spurious leading comma which needs to be removed.
268-
lp = (re.sub('^(( |[^ ])* ),([a-zA-Z])(.*)', '\\1\\3\\4', self.f(p))
268+
lp = (re.sub('^(( |[^ ])* ),([a-zA-Z0-9])(.*)', '\\1\\3\\4', self.f(p))
269269
for p in filter(lambda x: _any_not_nan(x, self.field_indexes), _itertuples(chunk)))
270270
return list(lp)
271271
else:

tests/test_WriteApiDataFrame.py

+33
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,39 @@ def test_specify_timezone_period_time_index(self):
511511
self.assertEqual('test value1=10i,value2=30i 1590307200000000000', points[0])
512512
self.assertEqual('test value1=20i,value2=40i 1590310800000000000', points[1])
513513

514+
def test_serialization_for_nan_in_columns_starting_with_digits(self):
515+
from influxdb_client.extras import pd
516+
from influxdb_client.extras import np
517+
data_frame = pd.DataFrame(data={
518+
'1value': [np.nan, 30.0, np.nan, 30.0, np.nan],
519+
'2value': [30.0, np.nan, np.nan, np.nan, np.nan],
520+
'3value': [30.0, 30.0, 30.0, np.nan, np.nan],
521+
'avalue': [30.0, 30.0, 30.0, 30.0, 30.0]
522+
}, index=pd.period_range('2020-05-24 10:00', freq='H', periods=5))
523+
524+
points = data_frame_to_list_of_points(data_frame,
525+
PointSettings(),
526+
data_frame_measurement_name='test')
527+
528+
self.assertEqual(5, len(points))
529+
self.assertEqual('test 2value=30.0,3value=30.0,avalue=30.0 1590314400000000000', points[0])
530+
self.assertEqual('test 1value=30.0,3value=30.0,avalue=30.0 1590318000000000000', points[1])
531+
self.assertEqual('test 3value=30.0,avalue=30.0 1590321600000000000', points[2])
532+
self.assertEqual('test 1value=30.0,avalue=30.0 1590325200000000000', points[3])
533+
self.assertEqual('test avalue=30.0 1590328800000000000', points[4])
534+
535+
data_frame = pd.DataFrame(data={
536+
'1value': [np.nan],
537+
'avalue': [30.0],
538+
'bvalue': [30.0]
539+
}, index=pd.period_range('2020-05-24 10:00', freq='H', periods=1))
540+
541+
points = data_frame_to_list_of_points(data_frame,
542+
PointSettings(),
543+
data_frame_measurement_name='test')
544+
self.assertEqual(1, len(points))
545+
self.assertEqual('test avalue=30.0,bvalue=30.0 1590314400000000000', points[0])
546+
514547

515548
class DataSerializerChunksTest(unittest.TestCase):
516549
def test_chunks(self):

0 commit comments

Comments
 (0)