khasanjon-dev
diff --git a/‎main.py
+26-84 b/‎main.py
+26-84
diff --git a/‎main1.py
+77 b/‎main1.py
+77
diff --git a/‎requirements.txt
+4-1 b/‎requirements.txt
+4-1
@@ -1,92 +1,34 @@
-import base64
-import json
-import os
-import time
+import re
 
-import requests
-from dotenv import load_dotenv
-from requests import RequestException
+def get_column_name_and_type(ddl):
+    # Regular expression to match column name and type
+    column_pattern = re.compile(r'(\w+)\s+([\w\(\)]+(?: PRIMARY KEY)?)')
 
-load_dotenv()
+    # Extract the part of DDL inside the parentheses
+    columns_section = re.search(r'\((.*)\);', ddl, re.DOTALL).group(1)
 
-ORG_USERNAME = os.environ.get("ORG_USERNAME")
-ORG_PASSWORD = os.environ.get("ORG_PASSWORD")
-ORG_GRANT_TYPE = os.environ.get("ORG_GRANT_TYPE")
-ORG_TOKEN_URL = os.environ.get("ORG_TOKEN_URL")
-ORG_CONSUMER_KEY = os.environ.get("ORG_CONSUMER_KEY")
-ORG_CONSUMER_SECRET = os.environ.get("ORG_CONSUMER_SECRET")
-ORG_COMPANY_URL = os.environ.get("ORG_COMPANY_URL")
+    # Find all matches in the columns section
+    columns = column_pattern.findall(columns_section)
 
-cache = {}
+    # Create a dictionary with column names as keys and types as values
+    column_dict = {column[0]: column[1].split('(')[0] for column in columns}
 
+    return column_dict
 
-def get_token() -> dict | str:
-    """Get authentication token and cache it."""
-    if "org_token" in cache and cache["org_token"]["expires_at"] > time.time():
-        return cache["org_token"]["token"]
+# Example DDL statement
+ddl_statement = """
+CREATE TABLE employees (
+    id SERIAL PRIMARY KEY,
+    first_name VARCHAR(50),
+    last_name VARCHAR(50),
+    email VARCHAR(100),
+    hire_date DATE,
+    salary NUMERIC(10, 2)
+);
+"""
 
-    payload = {
-        "grant_type": ORG_GRANT_TYPE,
-        "username": ORG_USERNAME,
-        "password": ORG_PASSWORD,
-    }
-    auth_str = f"{ORG_CONSUMER_KEY}:{ORG_CONSUMER_SECRET}"
-    auth_base64 = base64.b64encode(auth_str.encode("utf-8")).decode("utf-8")
+# Get column names and types as a dictionary
+d = get_column_name_and_type(ddl_statement)
 
-    headers = {
-        "Content-Type": "application/x-www-form-urlencoded",
-        "Authorization": f"Basic {auth_base64}",
-    }
-    try:
-        response = requests.post(ORG_TOKEN_URL, headers=headers, data=payload)
-        response.raise_for_status()
-        token = response.json().get("access_token")
-        expires_in = response.json().get("expires_in", 3600)  # Standart 1 soat
-
-        if token:
-            cache["org_token"] = {
-                "token": token,
-                "expires_at": time.time() + expires_in  # Token muddati
-            }
-        return token
-    except RequestException as e:
-        return {"error": "Failed to obtain token", "details": str(e)}
-
-
-def get_company(inn: int | str) -> dict:
-    """Fetch company details by INN from an external service."""
-    payload = json.dumps({"tin": str(inn)})
-
-    token = get_token()
-    if isinstance(token, dict):
-        return token
-
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {token}",
-    }
-
-    try:
-        response = requests.post(ORG_COMPANY_URL, headers=headers, data=payload)
-        if response.status_code == 401:
-            token = get_token()
-            if isinstance(token, dict):
-                return token
-
-            headers["Authorization"] = f"Bearer {token}"
-            response = requests.post(ORG_COMPANY_URL, headers=headers, data=payload)
-
-        response.raise_for_status()
-        response_js = response.json()
-        return {
-            "name": response_js.get("name"),
-            "address": response_js.get("address"),
-            "oked": response_js.get("nc6Code"),
-            "ns_code": response_js.get("ns10Code"),
-        }
-
-    except RequestException as e:
-        return {"error": "Failed to fetch company details", "details": str(e)}
-
-
-print(get_company("200524845"))
+# Print the result
+print(d)
@@ -0,0 +1,77 @@
+import re
+
+
+def postgres_to_clickhouse_type(postgres_type):
+    type_mapping = {
+        "SERIAL": "UInt32",
+        "VARCHAR": "String",
+        "CHAR": "String",
+        "TEXT": "String",
+        "INT": "Int32",
+        "INTEGER": "Int32",
+        "SMALLINT": "Int16",
+        "BIGINT": "Int64",
+        "NUMERIC": "Float64",  # Default mapping
+        "DECIMAL": "Float64",   # Default mapping
+        "REAL": "Float32",
+        "DOUBLE PRECISION": "Float64",
+        "DATE": "Date",
+        "TIMESTAMP": "DateTime",
+        "TIMESTAMPTZ": "DateTime",
+        "BOOLEAN": "UInt8",
+        "BOOL": "UInt8",
+        "JSON": "String",
+        "JSONB": "String",
+        "UUID": "UUID",
+    }
+
+    # Check for type precision (e.g., VARCHAR(50), NUMERIC(10, 2))
+    if '(' in postgres_type:
+        base_type = postgres_type.split('(')[0].strip()
+        if base_type in ["VARCHAR", "CHAR", "TEXT"]:
+            return "String"  # ClickHouse always maps string types to String
+        elif base_type in ["NUMERIC", "DECIMAL"]:
+            return "Float64"  # ClickHouse defaults to Float64 for numeric types with precision
+        else:
+            # Return the base type if it's numeric with precision or something unsupported
+            return type_mapping.get(base_type, "String")
+    else:
+        # No precision/size; simply return the mapped type
+        return type_mapping.get(postgres_type, "String")
+
+
+ddl_statement = """
+CREATE TABLE employees (
+    id SERIAL PRIMARY KEY,
+    first_name VARCHAR(50),
+    last_name VARCHAR(50),
+    email VARCHAR(100),
+    hire_date DATE,
+    salary NUMERIC(10, 2),
+    status BOOLEAN,
+    department_id INT,
+    join_date TIMESTAMP,
+    updated_at TIMESTAMPTZ,
+    profile JSONB,
+    unique_code UUID,
+    age SMALLINT,
+    bonus DOUBLE PRECISION
+);
+"""
+
+def get_column_name_and_type(ddl):
+    # Regex to capture column name and type
+    column_pattern = re.compile(r"(\w+)\s+([\w\(\)\s]+(?: PRIMARY KEY)?)")
+    columns_section = re.search(r"\((.*)\);", ddl, re.DOTALL).group(1)
+    columns = column_pattern.findall(columns_section)
+    return {column[0]: column[1] for column in columns}
+
+
+columns = get_column_name_and_type(ddl_statement)
+
+# Convert the column types to ClickHouse types
+clickhouse_columns = {
+    col: postgres_to_clickhouse_type(col_type) for col, col_type in columns.items()
+}
+
+print(clickhouse_columns)
@@ -1,3 +1,6 @@
 black==25.1.0
 requests==2.32.3
-python-dotenv==1.0.1
+python-dotenv==1.0.1
+psycopg2-binary==2.9.10
+clickhouse-connect==0.8.15
+confluent-kafka==2.8.0