From d2f74d11a86b8c9dd0e190bd2e04dd13b26d1908 Mon Sep 17 00:00:00 2001 From: sunsingerus Date: Tue, 21 Nov 2017 17:16:29 +0300 Subject: [PATCH 1/2] docs polihsing --- README.md | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 2078fea..7fa9cc4 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ pip install clickhouse-driver Also the following (at least one of) MySQL privileges are required for this operation: `SUPER`, `REPLICATION CLIENT` -```sql +```mysql CREATE USER 'reader'@'localhost' IDENTIFIED BY 'qwerty'; CREATE USER 'reader'@'127.0.0.1' IDENTIFIED BY 'qwerty'; CREATE USER 'reader'@'*' IDENTIFIED BY 'qwerty'; @@ -65,7 +65,7 @@ Also the following MySQL config options are required: ```ini [mysqld] server-id = 1 -log_bin = /var/log/mysql/mysql-bin.log +log_bin = /var/lib/mysql/bin.log expire_logs_days = 10 max_binlog_size = 100M binlog-format = row #Very important if you want to receive write, update and delete row events @@ -293,7 +293,7 @@ We have to separate test table into several ones because of this error, produced ERROR 1118 (42000): Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs ``` -```sql +```mysql CREATE TABLE datatypes( bit_1 BIT(1), @@ -397,12 +397,12 @@ CREATE TABLE long_varbinary_datatypes( ``` -```sql +```mysql -- in order to be able to set timestamp = '1970-01-01 00:00:01' set time_zone='+00:00'; ``` -```sql +```mysql -- MIN values INSERT INTO datatypes SET @@ -500,7 +500,7 @@ INSERT INTO long_varbinary_datatypes SET ; ``` -```sql +```mysql -- MAX values INSERT INTO datatypes SET @@ -722,12 +722,24 @@ Main Steps #### airline.ontime Data Set in CSV files Run [download script](run_airline_ontime_data_download.sh) + You may want to adjust dirs where to keep `ZIP` and `CSV` file + In `run_airline_ontime_data_download.sh` edit these lines: ```bash ZIP_FILES_DIR="zip" CSV_FILES_DIR="csv" ``` +You may want to adjust number of files to download (In case downloading all it may take some time). + +Specify year and months range as you wish: +```bash +... +echo "Download files into $ZIP_FILES_DIR" +for year in `seq 1987 2017`; do + for month in `seq 1 12`; do +... +``` ```bash ./run_airline_ontime_data_download.sh @@ -737,7 +749,7 @@ Downloading can take some time. #### airline.ontime MySQL Table Create MySQL table of the following structure: -```sql +```mysql CREATE DATABASE IF NOT EXISTS `airline`; CREATE TABLE IF NOT EXISTS `airline`.`ontime` ( `Year` SMALLINT UNSIGNED, -- maps to UInt16, @@ -971,6 +983,7 @@ CREATE TABLE IF NOT EXISTS `airline`.`ontime` ( #### airline.ontime Data Reader Run [datareader script](run_airline_ontime_data_reader.sh) + You may want to adjust `PYTHON` path and source and target hosts and usernames ```bash PYTHON=python3.6 @@ -989,6 +1002,7 @@ PYTHON=/home/user/pypy3.5-5.9-beta-linux_x86_64-portable/bin/pypy #### airline.ontime Data Importer Run [data importer script](run_airline_ontime_import.sh) + You may want to adjust `CSV` files location, number of imported files and MySQL user/password used for import ```bash # looking for csv files in this dir From ca1139001f9ea07e85bc2fca832ef56196bb34b1 Mon Sep 17 00:00:00 2001 From: sunsingerus Date: Wed, 22 Nov 2017 01:15:31 +0300 Subject: [PATCH 2/2] docs clarifications --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 7fa9cc4..bdf471d 100644 --- a/README.md +++ b/README.md @@ -402,6 +402,8 @@ CREATE TABLE long_varbinary_datatypes( set time_zone='+00:00'; ``` +Insert minimal acceptable values into the test table: + ```mysql -- MIN values INSERT INTO datatypes SET @@ -500,6 +502,8 @@ INSERT INTO long_varbinary_datatypes SET ; ``` +Insert maximum acceptable values into the test table: + ```mysql -- MAX values INSERT INTO datatypes SET @@ -727,8 +731,10 @@ You may want to adjust dirs where to keep `ZIP` and `CSV` file In `run_airline_ontime_data_download.sh` edit these lines: ```bash +... ZIP_FILES_DIR="zip" CSV_FILES_DIR="csv" +... ``` You may want to adjust number of files to download (In case downloading all it may take some time). @@ -986,8 +992,10 @@ Run [datareader script](run_airline_ontime_data_reader.sh) You may want to adjust `PYTHON` path and source and target hosts and usernames ```bash +... PYTHON=python3.6 PYTHON=/home/user/pypy3.5-5.9-beta-linux_x86_64-portable/bin/pypy +... ``` ```bash ... @@ -1005,11 +1013,13 @@ Run [data importer script](run_airline_ontime_import.sh) You may want to adjust `CSV` files location, number of imported files and MySQL user/password used for import ```bash +... # looking for csv files in this dir FILES_TO_IMPORT_DIR="/mnt/nas/work/ontime" # limit import to this number of files FILES_TO_IMPORT_NUM=3 +... ``` ```bash ...