feat(data_frame): add possibility to specify timestamp column

bednar · bednar · commit b40f24b77f46 · 2022-05-18T10:30:15.000+02:00
diff --git a/influxdb_client/client/write/dataframe_serializer.py b/influxdb_client/client/write/dataframe_serializer.py
@@ -41,6 +41,7 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
         :param chunk_size: The size of chunk for serializing into chunks.
         :key data_frame_measurement_name: name of measurement for writing Pandas DataFrame
         :key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
+        :key data_frame_timestamp_column: DataFrame column which contains timestamp
         """
         # This function is hard to understand but for good reason:
         # the approach used here is considerably more efficient
@@ -92,19 +93,25 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
         if data_frame_measurement_name is None:
             raise TypeError('"data_frame_measurement_name" is a Required Argument')
 
+        timestamp_column = kwargs.get('data_frame_timestamp_column', None)
         data_frame = data_frame.copy(deep=False)
-        if isinstance(data_frame.index, pd.PeriodIndex):
-            data_frame.index = data_frame.index.to_timestamp()
+        data_frame_timestamp = data_frame.index if timestamp_column is None else data_frame[timestamp_column]
+        if isinstance(data_frame_timestamp, pd.PeriodIndex):
+            data_frame_timestamp = data_frame_timestamp.to_timestamp()
         else:
             # TODO: this is almost certainly not what you want
             # when the index is the default RangeIndex.
             # Instead, it would probably be better to leave
             # out the timestamp unless a time column is explicitly
             # enabled.
-            data_frame.index = pd.to_datetime(data_frame.index, unit=precision)
+            data_frame_timestamp = pd.to_datetime(data_frame_timestamp, unit=precision)
 
-        if data_frame.index.tzinfo is None:
-            data_frame.index = data_frame.index.tz_localize('UTC')
+        if hasattr(data_frame_timestamp, 'tzinfo') and data_frame_timestamp.tzinfo is None:
+            data_frame_timestamp = data_frame_timestamp.tz_localize('UTC')
+        if timestamp_column is None:
+            data_frame.index = data_frame_timestamp
+        else:
+            data_frame[timestamp_column] = data_frame_timestamp
 
         data_frame_tag_columns = kwargs.get('data_frame_tag_columns')
         data_frame_tag_columns = set(data_frame_tag_columns or [])
@@ -141,6 +148,7 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
         # null_columns has a bool value for each column holding
         # whether that column contains any null (NaN or None) values.
         null_columns = data_frame.isnull().any()
+        timestamp_index = 0
 
         # Iterate through the columns building up the expression for each column.
         for index, (key, value) in columns:
@@ -164,6 +172,9 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
                     key_value = f',{key_format}={{str({val_format}).translate(_ESCAPE_KEY)}}'
                 tags.append(key_value)
                 continue
+            elif timestamp_column is not None and key in timestamp_column:
+                timestamp_index = field_index
+                continue
 
             # This column is a field column.
             # Note: no comma separator is needed for the first field.
@@ -195,13 +206,13 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
 
         tags = ''.join(tags)
         fields = ''.join(fields)
-        timestamp = '{p[0].value}'
+        timestamp = '{p[%s].value}' % timestamp_index
         if precision == WritePrecision.US:
-            timestamp = '{int(p[0].value / 1e3)}'
+            timestamp = '{int(p[%s].value / 1e3)}' % timestamp_index
         elif precision == WritePrecision.MS:
-            timestamp = '{int(p[0].value / 1e6)}'
+            timestamp = '{int(p[%s].value / 1e6)}' % timestamp_index
         elif precision == WritePrecision.S:
-            timestamp = '{int(p[0].value / 1e9)}'
+            timestamp = '{int(p[%s].value / 1e9)}' % timestamp_index
 
         f = eval(f'lambda p: f"""{{measurement_name}}{tags} {fields} {timestamp}"""', {
             'measurement_name': measurement_name,
@@ -268,5 +279,6 @@ def data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_W
     :param precision: The precision for the unix timestamps within the body line-protocol.
     :key data_frame_measurement_name: name of measurement for writing Pandas DataFrame
     :key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
+    :key data_frame_timestamp_column: DataFrame column which contains timestamps
     """
     return DataframeSerializer(data_frame, point_settings, precision, **kwargs).serialize()
diff --git a/influxdb_client/client/write_api.py b/influxdb_client/client/write_api.py
@@ -294,6 +294,7 @@ def write(self, bucket: str, org: str = None,
         :key data_frame_measurement_name: name of measurement for writing Pandas DataFrame - ``DataFrame``
         :key data_frame_tag_columns: list of DataFrame columns which are tags,
                                      rest columns will be fields - ``DataFrame``
+        :key data_frame_timestamp_column: DataFrame column which contains timestamp - ``DataFrame``                            
         :key record_measurement_key: key of record with specified measurement -
                                      ``dictionary``, ``NamedTuple``, ``dataclass``
         :key record_measurement_name: static measurement name - ``dictionary``, ``NamedTuple``, ``dataclass``
diff --git a/influxdb_client/client/write_api_async.py b/influxdb_client/client/write_api_async.py
@@ -58,6 +58,7 @@ async def write(self, bucket: str, org: str = None,
         :key data_frame_measurement_name: name of measurement for writing Pandas DataFrame - ``DataFrame``
         :key data_frame_tag_columns: list of DataFrame columns which are tags,
                                      rest columns will be fields - ``DataFrame``
+        :key data_frame_timestamp_column: DataFrame column which contains timestamp - ``DataFrame`` 
         :key record_measurement_key: key of record with specified measurement -
                                      ``dictionary``, ``NamedTuple``, ``dataclass``
         :key record_measurement_name: static measurement name - ``dictionary``, ``NamedTuple``, ``dataclass``
diff --git a/tests/test_WriteApiDataFrame.py b/tests/test_WriteApiDataFrame.py
@@ -416,6 +416,23 @@ def test_without_tags_and_fields_with_nan(self):
         self.assertEqual("test a=1.0 1609459260000000000", points[1])
         self.assertEqual("test a=2.0,b=1.0 1609459320000000000", points[2])
 
+    def test_use_timestamp_from_specified_column(self):
+        from influxdb_client.extras import pd
+        data_frame = pd.DataFrame(data={
+            'column_time': ['2020-04-05', '2020-05-05'],
+            'value1': [10, 20],
+            'value2': [30, 40],
+        }, index=['A', 'B'])
+
+        points = data_frame_to_list_of_points(data_frame=data_frame,
+                                              data_frame_measurement_name="test",
+                                              data_frame_timestamp_column="column_time",
+                                              point_settings=PointSettings())
+
+        self.assertEqual(2, len(points))
+        self.assertEqual('test value1=10i,value2=30i 1586044800000000000', points[0])
+        self.assertEqual('test value1=20i,value2=40i 1588636800000000000', points[1])
+
 
 class DataSerializerChunksTest(unittest.TestCase):
     def test_chunks(self):