databricks
diff --git a/‎.idea/.gitignore
Lines changed: 8 additions & 0 deletions b/‎.idea/.gitignore
Lines changed: 8 additions & 0 deletions
diff --git a/‎.idea/codeStyles/Project.xml
Lines changed: 7 additions & 0 deletions b/‎.idea/codeStyles/Project.xml
Lines changed: 7 additions & 0 deletions
diff --git a/‎.idea/codeStyles/codeStyleConfig.xml
Lines changed: 5 additions & 0 deletions b/‎.idea/codeStyles/codeStyleConfig.xml
Lines changed: 5 additions & 0 deletions
diff --git a/‎.idea/databricks-sql-python.iml
Lines changed: 11 additions & 0 deletions b/‎.idea/databricks-sql-python.iml
Lines changed: 11 additions & 0 deletions
diff --git a/‎.idea/misc.xml
Lines changed: 9 additions & 0 deletions b/‎.idea/misc.xml
Lines changed: 9 additions & 0 deletions
diff --git a/‎.idea/modules.xml
Lines changed: 8 additions & 0 deletions b/‎.idea/modules.xml
Lines changed: 8 additions & 0 deletions
diff --git a/‎.idea/vcs.xml
Lines changed: 6 additions & 0 deletions b/‎.idea/vcs.xml
Lines changed: 6 additions & 0 deletions
diff --git a/‎check.py
Lines changed: 42 additions & 0 deletions b/‎check.py
Lines changed: 42 additions & 0 deletions
diff --git a/‎src/databricks/sql/client.py
Lines changed: 25 additions & 0 deletions b/‎src/databricks/sql/client.py
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/databricks/sql/thrift_api/TCLIService/ttypes.py
Lines changed: 1 addition & 1 deletion b/‎src/databricks/sql/thrift_api/TCLIService/ttypes.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/databricks/sql/thrift_backend.py
Lines changed: 16 additions & 3 deletions b/‎src/databricks/sql/thrift_backend.py
Lines changed: 16 additions & 3 deletions
diff --git a/‎src/databricks/sql/utils.py
Lines changed: 89 additions & 3 deletions b/‎src/databricks/sql/utils.py
Lines changed: 89 additions & 3 deletions
@@ -0,0 +1,42 @@
+import os
+import sys
+# import logging
+#
+# logging.basicConfig(level=logging.DEBUG)
+
+#
+# # Get the parent directory of the current file
+# target_folder_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "databricks-sql-python", "src"))
+#
+# # Add the parent directory to sys.path
+# sys.path.append(target_folder_path)
+
+from src.databricks import sql
+
+# from dotenv import load_dotenv
+
+#  export DATABRICKS_TOKEN=whatever
+
+
+# Load environment variables from .env file
+# load_dotenv()
+
+host = "e2-dogfood.staging.cloud.databricks.com"
+http_path = "/sql/1.0/warehouses/dd43ee29fedd958d"
+
+access_token = ""
+connection = sql.connect(
+    server_hostname=host,
+    http_path=http_path,
+    access_token=access_token)
+
+
+cursor = connection.cursor()
+cursor.execute('SELECT :param `p`, * FROM RANGE(10)', {"param": "foo"})
+# cursor.execute('SELECT 1')
+result = cursor.fetchall()
+for row in result:
+    print(row)
+
+cursor.close()
+connection.close()
@@ -777,6 +777,9 @@ def execute(
             use_cloud_fetch=self.connection.use_cloud_fetch,
             parameters=prepared_params,
         )
+
+        print("Line 781")
+        print(execute_response)
         self.active_result_set = ResultSet(
             self.connection,
             execute_response,
@@ -1129,6 +1132,20 @@ def _fill_results_buffer(self):
         self.results = results
         self.has_more_rows = has_more_rows
 
+    def _convert_columnar_table(self, table):
+        column_names = [c[0] for c in self.description]
+        ResultRow = Row(*column_names)
+
+        result = []
+        for row_index in range(len(table[0])):
+            curr_row = []
+            for col_index in range(len(table)-1, -1, -1):
+                curr_row.append(table[col_index][row_index])
+            result.append(ResultRow(*curr_row))
+
+        return result
+
+
     def _convert_arrow_table(self, table):
         column_names = [c[0] for c in self.description]
         ResultRow = Row(*column_names)
@@ -1209,6 +1226,11 @@ def fetchall_arrow(self) -> pyarrow.Table:
 
         return results
 
+    def fetchall_columnar(self):
+        results = self.results.remaining_rows()
+        self._next_row_index += len(results[0])
+        return results
+
     def fetchone(self) -> Optional[Row]:
         """
         Fetch the next row of a query result set, returning a single sequence,
@@ -1224,6 +1246,9 @@ def fetchall(self) -> List[Row]:
         """
         Fetch all (remaining) rows of a query result, returning them as a list of rows.
         """
+
+        return self._convert_columnar_table(self.fetchall_columnar())
+
         return self._convert_arrow_table(self.fetchall_arrow())
 
     def fetchmany(self, size: int) -> List[Row]:
 
@@ -37,6 +37,8 @@
     convert_column_based_set_to_arrow_table,
 )
 
+from src.databricks.sql.thrift_api.TCLIService.ttypes import TDBSqlResultFormat
+
 logger = logging.getLogger(__name__)
 
 unsafe_logger = logging.getLogger("databricks.sql.unsafe")
@@ -734,6 +736,7 @@ def _results_message_to_execute_response(self, resp, operation_state):
         else:
             t_result_set_metadata_resp = self._get_metadata_resp(resp.operationHandle)
 
+        print(f"Line 739 - {t_result_set_metadata_resp.resultFormat}")
         if t_result_set_metadata_resp.resultFormat not in [
             ttypes.TSparkRowSetType.ARROW_BASED_SET,
             ttypes.TSparkRowSetType.COLUMN_BASED_SET,
@@ -858,15 +861,25 @@ def execute_command(
             getDirectResults=ttypes.TSparkGetDirectResults(
                 maxRows=max_rows, maxBytes=max_bytes
             ),
-            canReadArrowResult=True,
+            canReadArrowResult=False,
             canDecompressLZ4Result=lz4_compression,
             canDownloadResult=use_cloud_fetch,
             confOverlay={
                 # We want to receive proper Timestamp arrow types.
                 "spark.thriftserver.arrowBasedRowSet.timestampAsString": "false"
             },
-            useArrowNativeTypes=spark_arrow_types,
-            parameters=parameters,
+            # useArrowNativeTypes=spark_arrow_types,
+            # canReadArrowResult=True,
+            # # canDecompressLZ4Result=lz4_compression,
+            # canDecompressLZ4Result=False,
+            # canDownloadResult=False,
+            # # confOverlay={
+            # #     # We want to receive proper Timestamp arrow types.
+            # #     "spark.thriftserver.arrowBasedRowSet.timestampAsString": "false"
+            # # },
+            # resultDataFormat=TDBSqlResultFormat(None,None,True),
+            # # useArrowNativeTypes=spark_arrow_types,
+             parameters=parameters,
         )
         resp = self.make_request(self._client.ExecuteStatement, req)
         return self._handle_execute_response(resp, cursor)
 
@@ -1,5 +1,7 @@
 from __future__ import annotations
-
+import json
+from thrift.protocol import TJSONProtocol
+from thrift.transport import TTransport
 import datetime
 import decimal
 from abc import ABC, abstractmethod
@@ -33,15 +35,17 @@
 
 class ResultSetQueue(ABC):
     @abstractmethod
-    def next_n_rows(self, num_rows: int) -> pyarrow.Table:
+    def next_n_rows(self, num_rows: int):
         pass
 
     @abstractmethod
-    def remaining_rows(self) -> pyarrow.Table:
+    def remaining_rows(self):
         pass
 
 
 class ResultSetQueueFactory(ABC):
+
+
     @staticmethod
     def build_queue(
         row_set_type: TSparkRowSetType,
@@ -67,6 +71,18 @@ def build_queue(
         Returns:
             ResultSetQueue
         """
+
+        def trow_to_json(trow):
+            # Step 1: Serialize TRow using Thrift's TJSONProtocol
+            transport = TTransport.TMemoryBuffer()
+            protocol = TJSONProtocol.TJSONProtocol(transport)
+            trow.write(protocol)
+
+            # Step 2: Extract JSON string from the transport
+            json_str = transport.getvalue().decode('utf-8')
+
+            return json_str
+
         if row_set_type == TSparkRowSetType.ARROW_BASED_SET:
             arrow_table, n_valid_rows = convert_arrow_based_set_to_arrow_table(
                 t_row_set.arrowBatches, lz4_compressed, arrow_schema_bytes
@@ -76,6 +92,23 @@ def build_queue(
             )
             return ArrowQueue(converted_arrow_table, n_valid_rows)
         elif row_set_type == TSparkRowSetType.COLUMN_BASED_SET:
+            print("Lin 79 ")
+            print(type(t_row_set))
+            print(t_row_set)
+            json_str = json.loads(trow_to_json(t_row_set))
+            pretty_json = json.dumps(json_str, indent=2)
+            print(pretty_json)
+
+            converted_column_table, column_names = convert_column_based_set_to_column_table(
+                t_row_set.columns,
+                description)
+            print(converted_column_table, column_names)
+
+            return ColumnQueue(converted_column_table, column_names)
+
+            print(columnQueue.next_n_rows(2))
+            print(columnQueue.next_n_rows(2))
+            print(columnQueue.remaining_rows())
             arrow_table, n_valid_rows = convert_column_based_set_to_arrow_table(
                 t_row_set.columns, description
             )
@@ -97,6 +130,28 @@ def build_queue(
             raise AssertionError("Row set type is not valid")
 
 
+class ColumnQueue(ResultSetQueue):
+    def __init__(
+            self,
+            columnar_table, column_names):
+        self.columnar_table = columnar_table
+        self.cur_row_index = 0
+        self.n_valid_rows = len(columnar_table[0])
+        self.column_names = column_names
+
+    def next_n_rows(self, num_rows):
+        length = min(num_rows, self.n_valid_rows - self.cur_row_index)
+        # Slicing using the default python slice
+        next_data = [column[self.cur_row_index:self.cur_row_index+length] for column in self.columnar_table]
+        self.cur_row_index += length
+        return next_data
+
+    def remaining_rows(self):
+        next_data = [column[self.cur_row_index:] for column in self.columnar_table]
+        self.cur_row_index += len(next_data[0])
+        return next_data
+
+
 class ArrowQueue(ResultSetQueue):
     def __init__(
         self,
@@ -570,6 +625,13 @@ def convert_column_based_set_to_arrow_table(columns, description):
     )
     return arrow_table, arrow_table.num_rows
 
+def convert_column_based_set_to_column_table(columns, description):
+    column_names = [c[0] for c in description]
+    column_table = [_covert_column_to_list(c) for c in columns]
+
+    return column_table, column_names
+
+
 
 def _convert_column_to_arrow_array(t_col):
     """
@@ -594,6 +656,15 @@ def _convert_column_to_arrow_array(t_col):
 
     raise OperationalError("Empty TColumn instance {}".format(t_col))
 
+def _covert_column_to_list(t_col):
+    supported_field_types = ("boolVal", "byteVal", "i16Val", "i32Val", "i64Val", "doubleVal", "stringVal", "binaryVal")
+
+    for field in supported_field_types:
+        wrapper = getattr(t_col, field)
+        if wrapper:
+            return _create_python_tuple(wrapper)
+
+    raise OperationalError("Empty TColumn instance {}".format(t_col))
 
 def _create_arrow_array(t_col_value_wrapper, arrow_type):
     result = t_col_value_wrapper.values
@@ -609,3 +680,18 @@ def _create_arrow_array(t_col_value_wrapper, arrow_type):
             result[i] = None
 
     return pyarrow.array(result, type=arrow_type)
+
+def _create_python_tuple(t_col_value_wrapper):
+    result = t_col_value_wrapper.values
+    nulls = t_col_value_wrapper.nulls  # bitfield describing which values are null
+    assert isinstance(nulls, bytes)
+
+    # The number of bits in nulls can be both larger or smaller than the number of
+    # elements in result, so take the minimum of both to iterate over.
+    length = min(len(result), len(nulls) * 8)
+
+    for i in range(length):
+        if nulls[i >> 3] & BIT_MASKS[i & 0x7]:
+            result[i] = None
+
+    return tuple(result)