43
43
44
44
from databricks .sql .types import Row , SSLOptions
45
45
from databricks .sql .auth .auth import get_python_sql_connector_auth_provider
46
- from databricks .sql .experimental .oauth_persistence import OAuthPersistence
47
-
46
+ from databricks .sql .telemetry .telemetry_client import (
47
+ telemetry_client ,
48
+ NoopTelemetryClient ,
49
+ )
48
50
from databricks .sql .thrift_api .TCLIService .ttypes import (
49
51
TSparkParameter ,
50
52
TOperationState ,
51
53
)
52
-
54
+ from databricks .sql .telemetry .latency_logger import log_latency
55
+ from databricks .sql .telemetry .models .enums import DriverVolumeOperationType
53
56
54
57
logger = logging .getLogger (__name__ )
55
58
@@ -238,6 +241,9 @@ def read(self) -> Optional[OAuthToken]:
238
241
self .telemetry_enabled = (
239
242
self .client_telemetry_enabled and self .server_telemetry_enabled
240
243
)
244
+ telemetry_batch_size = kwargs .get (
245
+ "telemetry_batch_size" , 100
246
+ ) # TODO: Decide on batch size
241
247
242
248
user_agent_entry = kwargs .get ("user_agent_entry" )
243
249
if user_agent_entry is None :
@@ -294,6 +300,25 @@ def read(self) -> Optional[OAuthToken]:
294
300
kwargs .get ("use_inline_params" , False )
295
301
)
296
302
303
+ if self .telemetry_enabled :
304
+ telemetry_client .initialize (
305
+ host = self .host ,
306
+ connection_uuid = self .get_session_id_hex (),
307
+ auth_provider = auth_provider ,
308
+ is_authenticated = True , # TODO: Add authentication logic later
309
+ batch_size = telemetry_batch_size ,
310
+ user_agent = useragent_header ,
311
+ )
312
+
313
+ telemetry_client .export_initial_telemetry_log (
314
+ http_path ,
315
+ self .port ,
316
+ kwargs .get ("_socket_timeout" , None ),
317
+ self .get_session_id_hex (),
318
+ )
319
+ else :
320
+ self .telemetry_client = NoopTelemetryClient ()
321
+
297
322
def _set_use_inline_params_with_warning (self , value : Union [bool , str ]):
298
323
"""Valid values are True, False, and "silent"
299
324
@@ -430,6 +455,9 @@ def _close(self, close_cursors=True) -> None:
430
455
431
456
self .open = False
432
457
458
+ if self .telemetry_enabled :
459
+ telemetry_client .close (self .get_session_id_hex ())
460
+
433
461
def commit (self ):
434
462
"""No-op because Databricks does not support transactions"""
435
463
pass
@@ -487,7 +515,10 @@ def __iter__(self):
487
515
for row in self .active_result_set :
488
516
yield row
489
517
else :
490
- raise Error ("There is no active result set" )
518
+ raise Error (
519
+ "There is no active result set" ,
520
+ connection_uuid = self .connection .get_session_id_hex (),
521
+ )
491
522
492
523
def _determine_parameter_approach (
493
524
self , params : Optional [TParameterCollection ]
@@ -624,7 +655,10 @@ def _close_and_clear_active_result_set(self):
624
655
625
656
def _check_not_closed (self ):
626
657
if not self .open :
627
- raise Error ("Attempting operation on closed cursor" )
658
+ raise Error (
659
+ "Attempting operation on closed cursor" ,
660
+ connection_uuid = self .connection .get_session_id_hex (),
661
+ )
628
662
629
663
def _handle_staging_operation (
630
664
self , staging_allowed_local_path : Union [None , str , List [str ]]
@@ -642,7 +676,8 @@ def _handle_staging_operation(
642
676
_staging_allowed_local_paths = staging_allowed_local_path
643
677
else :
644
678
raise Error (
645
- "You must provide at least one staging_allowed_local_path when initialising a connection to perform ingestion commands"
679
+ "You must provide at least one staging_allowed_local_path when initialising a connection to perform ingestion commands" ,
680
+ connection_uuid = self .connection .get_session_id_hex (),
646
681
)
647
682
648
683
abs_staging_allowed_local_paths = [
@@ -671,7 +706,8 @@ def _handle_staging_operation(
671
706
continue
672
707
if not allow_operation :
673
708
raise Error (
674
- "Local file operations are restricted to paths within the configured staging_allowed_local_path"
709
+ "Local file operations are restricted to paths within the configured staging_allowed_local_path" ,
710
+ connection_uuid = self .connection .get_session_id_hex (),
675
711
)
676
712
677
713
# May be real headers, or could be json string
@@ -701,9 +737,11 @@ def _handle_staging_operation(
701
737
else :
702
738
raise Error (
703
739
f"Operation { row .operation } is not supported. "
704
- + "Supported operations are GET, PUT, and REMOVE"
740
+ + "Supported operations are GET, PUT, and REMOVE" ,
741
+ connection_uuid = self .connection .get_session_id_hex (),
705
742
)
706
743
744
+ @log_latency ()
707
745
def _handle_staging_put (
708
746
self , presigned_url : str , local_file : str , headers : Optional [dict ] = None
709
747
):
@@ -713,7 +751,13 @@ def _handle_staging_put(
713
751
"""
714
752
715
753
if local_file is None :
716
- raise Error ("Cannot perform PUT without specifying a local_file" )
754
+ raise Error (
755
+ "Cannot perform PUT without specifying a local_file" ,
756
+ connection_uuid = self .connection .get_session_id_hex (),
757
+ )
758
+
759
+ self .volume_operation_type = DriverVolumeOperationType .PUT
760
+ self .volume_path = local_file
717
761
718
762
with open (local_file , "rb" ) as fh :
719
763
r = requests .put (url = presigned_url , data = fh , headers = headers )
@@ -730,7 +774,8 @@ def _handle_staging_put(
730
774
731
775
if r .status_code not in [OK , CREATED , NO_CONTENT , ACCEPTED ]:
732
776
raise Error (
733
- f"Staging operation over HTTP was unsuccessful: { r .status_code } -{ r .text } "
777
+ f"Staging operation over HTTP was unsuccessful: { r .status_code } -{ r .text } " ,
778
+ connection_uuid = self .connection .get_session_id_hex (),
734
779
)
735
780
736
781
if r .status_code == ACCEPTED :
@@ -739,6 +784,7 @@ def _handle_staging_put(
739
784
+ "but not yet applied on the server. It's possible this command may fail later."
740
785
)
741
786
787
+ @log_latency ()
742
788
def _handle_staging_get (
743
789
self , local_file : str , presigned_url : str , headers : Optional [dict ] = None
744
790
):
@@ -748,25 +794,38 @@ def _handle_staging_get(
748
794
"""
749
795
750
796
if local_file is None :
751
- raise Error ("Cannot perform GET without specifying a local_file" )
797
+ raise Error (
798
+ "Cannot perform GET without specifying a local_file" ,
799
+ connection_uuid = self .connection .get_session_id_hex (),
800
+ )
801
+
802
+ self .volume_operation_type = DriverVolumeOperationType .GET
803
+ self .volume_path = local_file
752
804
753
805
r = requests .get (url = presigned_url , headers = headers )
754
806
755
807
# response.ok verifies the status code is not between 400-600.
756
808
# Any 2xx or 3xx will evaluate r.ok == True
757
809
if not r .ok :
758
810
raise Error (
759
- f"Staging operation over HTTP was unsuccessful: { r .status_code } -{ r .text } "
811
+ f"Staging operation over HTTP was unsuccessful: { r .status_code } -{ r .text } " ,
812
+ connection_uuid = self .connection .get_session_id_hex (),
760
813
)
761
814
762
815
with open (local_file , "wb" ) as fp :
763
816
fp .write (r .content )
764
817
818
+ @log_latency ()
765
819
def _handle_staging_remove (
766
820
self , presigned_url : str , headers : Optional [dict ] = None
767
821
):
768
822
"""Make an HTTP DELETE request to the presigned_url"""
769
823
824
+ self .volume_operation_type = DriverVolumeOperationType .DELETE
825
+ self .volume_path = (
826
+ presigned_url # Using presigned URL as path since there's no local file
827
+ )
828
+
770
829
r = requests .delete (url = presigned_url , headers = headers )
771
830
772
831
if not r .ok :
@@ -970,7 +1029,8 @@ def get_async_execution_result(self):
970
1029
return self
971
1030
else :
972
1031
raise Error (
973
- f"get_execution_result failed with Operation status { operation_state } "
1032
+ f"get_execution_result failed with Operation status { operation_state } " ,
1033
+ connection_uuid = self .connection .get_session_id_hex (),
974
1034
)
975
1035
976
1036
def executemany (self , operation , seq_of_parameters ):
@@ -1120,7 +1180,10 @@ def fetchall(self) -> List[Row]:
1120
1180
if self .active_result_set :
1121
1181
return self .active_result_set .fetchall ()
1122
1182
else :
1123
- raise Error ("There is no active result set" )
1183
+ raise Error (
1184
+ "There is no active result set" ,
1185
+ connection_uuid = self .connection .get_session_id_hex (),
1186
+ )
1124
1187
1125
1188
def fetchone (self ) -> Optional [Row ]:
1126
1189
"""
@@ -1134,7 +1197,10 @@ def fetchone(self) -> Optional[Row]:
1134
1197
if self .active_result_set :
1135
1198
return self .active_result_set .fetchone ()
1136
1199
else :
1137
- raise Error ("There is no active result set" )
1200
+ raise Error (
1201
+ "There is no active result set" ,
1202
+ connection_uuid = self .connection .get_session_id_hex (),
1203
+ )
1138
1204
1139
1205
def fetchmany (self , size : int ) -> List [Row ]:
1140
1206
"""
@@ -1156,21 +1222,30 @@ def fetchmany(self, size: int) -> List[Row]:
1156
1222
if self .active_result_set :
1157
1223
return self .active_result_set .fetchmany (size )
1158
1224
else :
1159
- raise Error ("There is no active result set" )
1225
+ raise Error (
1226
+ "There is no active result set" ,
1227
+ connection_uuid = self .connection .get_session_id_hex (),
1228
+ )
1160
1229
1161
1230
def fetchall_arrow (self ) -> "pyarrow.Table" :
1162
1231
self ._check_not_closed ()
1163
1232
if self .active_result_set :
1164
1233
return self .active_result_set .fetchall_arrow ()
1165
1234
else :
1166
- raise Error ("There is no active result set" )
1235
+ raise Error (
1236
+ "There is no active result set" ,
1237
+ connection_uuid = self .connection .get_session_id_hex (),
1238
+ )
1167
1239
1168
1240
def fetchmany_arrow (self , size ) -> "pyarrow.Table" :
1169
1241
self ._check_not_closed ()
1170
1242
if self .active_result_set :
1171
1243
return self .active_result_set .fetchmany_arrow (size )
1172
1244
else :
1173
- raise Error ("There is no active result set" )
1245
+ raise Error (
1246
+ "There is no active result set" ,
1247
+ connection_uuid = self .connection .get_session_id_hex (),
1248
+ )
1174
1249
1175
1250
def cancel (self ) -> None :
1176
1251
"""
0 commit comments