Skip to content

Commit bdcdf04

Browse files
committed
some
1 parent 37bd89f commit bdcdf04

File tree

8 files changed

+593
-120
lines changed

8 files changed

+593
-120
lines changed

main.py

+26-84
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,34 @@
1-
import base64
2-
import json
3-
import os
4-
import time
1+
import re
52

6-
import requests
7-
from dotenv import load_dotenv
8-
from requests import RequestException
3+
def get_column_name_and_type(ddl):
4+
# Regular expression to match column name and type
5+
column_pattern = re.compile(r'(\w+)\s+([\w\(\)]+(?: PRIMARY KEY)?)')
96

10-
load_dotenv()
7+
# Extract the part of DDL inside the parentheses
8+
columns_section = re.search(r'\((.*)\);', ddl, re.DOTALL).group(1)
119

12-
ORG_USERNAME = os.environ.get("ORG_USERNAME")
13-
ORG_PASSWORD = os.environ.get("ORG_PASSWORD")
14-
ORG_GRANT_TYPE = os.environ.get("ORG_GRANT_TYPE")
15-
ORG_TOKEN_URL = os.environ.get("ORG_TOKEN_URL")
16-
ORG_CONSUMER_KEY = os.environ.get("ORG_CONSUMER_KEY")
17-
ORG_CONSUMER_SECRET = os.environ.get("ORG_CONSUMER_SECRET")
18-
ORG_COMPANY_URL = os.environ.get("ORG_COMPANY_URL")
10+
# Find all matches in the columns section
11+
columns = column_pattern.findall(columns_section)
1912

20-
cache = {}
13+
# Create a dictionary with column names as keys and types as values
14+
column_dict = {column[0]: column[1].split('(')[0] for column in columns}
2115

16+
return column_dict
2217

23-
def get_token() -> dict | str:
24-
"""Get authentication token and cache it."""
25-
if "org_token" in cache and cache["org_token"]["expires_at"] > time.time():
26-
return cache["org_token"]["token"]
18+
# Example DDL statement
19+
ddl_statement = """
20+
CREATE TABLE employees (
21+
id SERIAL PRIMARY KEY,
22+
first_name VARCHAR(50),
23+
last_name VARCHAR(50),
24+
email VARCHAR(100),
25+
hire_date DATE,
26+
salary NUMERIC(10, 2)
27+
);
28+
"""
2729

28-
payload = {
29-
"grant_type": ORG_GRANT_TYPE,
30-
"username": ORG_USERNAME,
31-
"password": ORG_PASSWORD,
32-
}
33-
auth_str = f"{ORG_CONSUMER_KEY}:{ORG_CONSUMER_SECRET}"
34-
auth_base64 = base64.b64encode(auth_str.encode("utf-8")).decode("utf-8")
30+
# Get column names and types as a dictionary
31+
d = get_column_name_and_type(ddl_statement)
3532

36-
headers = {
37-
"Content-Type": "application/x-www-form-urlencoded",
38-
"Authorization": f"Basic {auth_base64}",
39-
}
40-
try:
41-
response = requests.post(ORG_TOKEN_URL, headers=headers, data=payload)
42-
response.raise_for_status()
43-
token = response.json().get("access_token")
44-
expires_in = response.json().get("expires_in", 3600) # Standart 1 soat
45-
46-
if token:
47-
cache["org_token"] = {
48-
"token": token,
49-
"expires_at": time.time() + expires_in # Token muddati
50-
}
51-
return token
52-
except RequestException as e:
53-
return {"error": "Failed to obtain token", "details": str(e)}
54-
55-
56-
def get_company(inn: int | str) -> dict:
57-
"""Fetch company details by INN from an external service."""
58-
payload = json.dumps({"tin": str(inn)})
59-
60-
token = get_token()
61-
if isinstance(token, dict):
62-
return token
63-
64-
headers = {
65-
"Content-Type": "application/json",
66-
"Authorization": f"Bearer {token}",
67-
}
68-
69-
try:
70-
response = requests.post(ORG_COMPANY_URL, headers=headers, data=payload)
71-
if response.status_code == 401:
72-
token = get_token()
73-
if isinstance(token, dict):
74-
return token
75-
76-
headers["Authorization"] = f"Bearer {token}"
77-
response = requests.post(ORG_COMPANY_URL, headers=headers, data=payload)
78-
79-
response.raise_for_status()
80-
response_js = response.json()
81-
return {
82-
"name": response_js.get("name"),
83-
"address": response_js.get("address"),
84-
"oked": response_js.get("nc6Code"),
85-
"ns_code": response_js.get("ns10Code"),
86-
}
87-
88-
except RequestException as e:
89-
return {"error": "Failed to fetch company details", "details": str(e)}
90-
91-
92-
print(get_company("200524845"))
33+
# Print the result
34+
print(d)

main1.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import re
2+
3+
4+
def postgres_to_clickhouse_type(postgres_type):
5+
type_mapping = {
6+
"SERIAL": "UInt32",
7+
"VARCHAR": "String",
8+
"CHAR": "String",
9+
"TEXT": "String",
10+
"INT": "Int32",
11+
"INTEGER": "Int32",
12+
"SMALLINT": "Int16",
13+
"BIGINT": "Int64",
14+
"NUMERIC": "Float64", # Default mapping
15+
"DECIMAL": "Float64", # Default mapping
16+
"REAL": "Float32",
17+
"DOUBLE PRECISION": "Float64",
18+
"DATE": "Date",
19+
"TIMESTAMP": "DateTime",
20+
"TIMESTAMPTZ": "DateTime",
21+
"BOOLEAN": "UInt8",
22+
"BOOL": "UInt8",
23+
"JSON": "String",
24+
"JSONB": "String",
25+
"UUID": "UUID",
26+
}
27+
28+
# Check for type precision (e.g., VARCHAR(50), NUMERIC(10, 2))
29+
if '(' in postgres_type:
30+
base_type = postgres_type.split('(')[0].strip()
31+
if base_type in ["VARCHAR", "CHAR", "TEXT"]:
32+
return "String" # ClickHouse always maps string types to String
33+
elif base_type in ["NUMERIC", "DECIMAL"]:
34+
return "Float64" # ClickHouse defaults to Float64 for numeric types with precision
35+
else:
36+
# Return the base type if it's numeric with precision or something unsupported
37+
return type_mapping.get(base_type, "String")
38+
else:
39+
# No precision/size; simply return the mapped type
40+
return type_mapping.get(postgres_type, "String")
41+
42+
43+
ddl_statement = """
44+
CREATE TABLE employees (
45+
id SERIAL PRIMARY KEY,
46+
first_name VARCHAR(50),
47+
last_name VARCHAR(50),
48+
email VARCHAR(100),
49+
hire_date DATE,
50+
salary NUMERIC(10, 2),
51+
status BOOLEAN,
52+
department_id INT,
53+
join_date TIMESTAMP,
54+
updated_at TIMESTAMPTZ,
55+
profile JSONB,
56+
unique_code UUID,
57+
age SMALLINT,
58+
bonus DOUBLE PRECISION
59+
);
60+
"""
61+
62+
def get_column_name_and_type(ddl):
63+
# Regex to capture column name and type
64+
column_pattern = re.compile(r"(\w+)\s+([\w\(\)\s]+(?: PRIMARY KEY)?)")
65+
columns_section = re.search(r"\((.*)\);", ddl, re.DOTALL).group(1)
66+
columns = column_pattern.findall(columns_section)
67+
return {column[0]: column[1] for column in columns}
68+
69+
70+
columns = get_column_name_and_type(ddl_statement)
71+
72+
# Convert the column types to ClickHouse types
73+
clickhouse_columns = {
74+
col: postgres_to_clickhouse_type(col_type) for col, col_type in columns.items()
75+
}
76+
77+
print(clickhouse_columns)

requirements.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
black==25.1.0
22
requests==2.32.3
3-
python-dotenv==1.0.1
3+
python-dotenv==1.0.1
4+
psycopg2-binary==2.9.10
5+
clickhouse-connect==0.8.15
6+
confluent-kafka==2.8.0

0 commit comments

Comments
 (0)