1
1
import contextlib
2
+ import logging
2
3
import os
4
+ import sys
3
5
from enum import Enum
4
6
from os import linesep
5
7
from pathlib import Path
6
- from typing import Dict , List , Optional , Set , TextIO
8
+ from typing import Dict , List , Optional , Set , TextIO , Union
7
9
from typing_extensions import Annotated
8
10
9
11
import pandas as pd
10
12
import typer
11
- import sqlalchemy as sa
12
13
from tabulate import tabulate
13
14
from functional import seq
14
15
24
25
25
26
class OutFmt (str , Enum ):
26
27
'''output format'''
28
+ AUTO = 'auto'
27
29
CSV = 'csv'
28
30
EXCEL = 'excel'
29
31
@@ -41,10 +43,23 @@ class OutFmt(str, Enum):
41
43
creating sharable output files. This shows which tables and columns are
42
44
selected, and how many rows each filter returns.'''
43
45
46
+ QUIET_DESC = 'Don\' t log to STDOUT.'
47
+
48
+ # default cli args
49
+ DEBUG_DEFAULT = False
50
+ ORGS_DEFAULT = []
51
+ OUTDIR_DEFAULT = './'
52
+ OUTFMT_DEFAULT = OutFmt .AUTO
53
+ QUIET_DEFAULT = False
44
54
45
55
app = typer .Typer (pretty_exceptions_show_locals = False )
46
56
47
57
58
+ def error (msg : str ) -> None :
59
+ print (msg , file = sys .stderr )
60
+ logging .error (msg )
61
+
62
+
48
63
def write_line (file : TextIO , text : str = '' ) -> None :
49
64
'''writes a line to STDOUT and file'''
50
65
print (text )
@@ -109,57 +124,76 @@ def get_tables(org_queries: sh.queries.OrgTableQueries) -> Set[str]:
109
124
return result
110
125
111
126
112
- def gen_filename (org : str , table : str , ext : str ) -> str :
113
- # <org>[-<table>].<ext>
114
- return org + (f'-{ table } ' if table else '' ) + f'.{ ext } '
127
+ def gen_filename (in_name : str , org : str , table : str , ext : str ) -> str :
128
+ if in_name == table or not table :
129
+ # this avoids duplicating the table name when both input and output is
130
+ # CSV
131
+ return f'{ in_name } -{ org } .{ ext } '
132
+ else :
133
+ return f'{ in_name } -{ org } -{ table } .{ ext } '
115
134
116
135
117
- def get_debug_writer (debug : bool ) -> TextIO :
136
+ def get_debug_writer (debug : bool ) -> Union [ TextIO , contextlib . nullcontext ] :
118
137
# XXX: this function is only used for brewity with the below `with` clause
119
138
if debug :
120
139
return open ('debug.txt' , 'w' )
121
140
else :
122
141
return contextlib .nullcontext ()
123
142
124
143
125
- def get_excel_writer (debug : bool , org : str , outdir : str , outfmt : OutFmt
126
- ) -> Optional [pd .ExcelWriter ]:
144
+ def get_excel_writer (in_name , debug : bool , org : str , outdir : str ,
145
+ outfmt : OutFmt ) -> Optional [pd .ExcelWriter ]:
127
146
if not debug and outfmt == OutFmt .EXCEL :
128
- filename = gen_filename (org , '' , 'xlsx' )
129
- print ('writing ' + filename )
147
+ filename = gen_filename (in_name , org , '' , 'xlsx' )
148
+ logging . info ('writing ' + filename )
130
149
excel_path = os .path .join (outdir , filename )
131
150
return pd .ExcelWriter (excel_path )
151
+ else :
152
+ return None
132
153
133
154
134
- @app .command ()
135
- def main (
136
- schema : str = typer .Argument (default = ..., help = SCHEMA_DESC ),
137
- input : str = typer .Argument (default = '' , help = INPUT_DESC ),
138
- orgs : List [str ] = typer .Option (default = [], help = ORGS_DESC ),
139
- outfmt : OutFmt = typer .Option (default = OutFmt .EXCEL , help = OUTFMT_DESC ),
140
- outdir : str = typer .Option (default = './' , help = OUTDIR_DESC ),
141
- debug : Annotated [bool , typer .Option ("-d" , "--debug" ,
142
- help = DEBUG_DESC )] = False ,
155
+ def infer_outfmt (path : str ) -> Optional [OutFmt ]:
156
+ '''returns None when not recognized'''
157
+ (_ , ext ) = os .path .splitext (path )
158
+ if ext == '.csv' :
159
+ return OutFmt .CSV
160
+ elif ext == '.xlsx' :
161
+ return OutFmt .EXCEL
162
+
163
+
164
+ def share (
165
+ schema : str ,
166
+ input : str ,
167
+ orgs : List [str ] = ORGS_DEFAULT ,
168
+ outfmt : OutFmt = OUTFMT_DEFAULT ,
169
+ outdir : str = OUTDIR_DEFAULT ,
170
+ debug : bool = DEBUG_DEFAULT ,
143
171
) -> None :
144
172
schema_path = schema
145
- filename = Path (schema_path ).name
173
+ schema_filename = Path (schema_path ).name
174
+ in_name = Path (input ).stem
175
+
176
+ if outfmt == OutFmt .AUTO :
177
+ fmt = infer_outfmt (input )
178
+ if not fmt :
179
+ error ('unable to infer output format from input file' )
180
+ return
181
+ outfmt = fmt
146
182
147
- print (f'loading schema { qt (filename )} ' )
183
+ logging . info (f'loading schema { qt (schema_filename )} ' )
148
184
try :
149
185
ruleset = rules .load (schema_path )
150
- ruletree = trees .parse (ruleset , orgs , filename )
186
+ ruletree = trees .parse (ruleset , orgs , schema_filename )
151
187
org_queries = queries .generate (ruletree )
152
188
table_filter = get_tables (org_queries )
153
189
except rules .ParseError :
154
190
# XXX: error messages are already printed at this point
155
- exit ( 1 )
191
+ return
156
192
157
193
# XXX: only tables found in the schema are considered in the data source
158
- print (f'connecting to { qt (input )} ' )
194
+ logging . info (f'connecting to { qt (input )} ' )
159
195
con = sh .connect (input , table_filter )
160
196
161
- if debug :
162
- print ()
163
197
# one debug file per run
164
198
with get_debug_writer (debug ) as debug_file :
165
199
for org , table_queries in org_queries .items ():
@@ -172,26 +206,50 @@ def main(
172
206
org_data [table ] = sh .get_data (con , tq )
173
207
174
208
# one excel file per org
175
- excel_file = get_excel_writer (debug , org , outdir , outfmt )
209
+ excel_file = get_excel_writer (in_name , debug , org , outdir , outfmt )
176
210
try :
177
211
for table , data in org_data .items ():
178
212
if outfmt == OutFmt .CSV :
179
- filename = gen_filename (org , table , 'csv' )
180
- print ('writing ' + filename )
181
- data .to_csv (os .path .join (outdir , filename ))
213
+ filename = gen_filename (in_name , org , table , 'csv' )
214
+ logging .info ('writing ' + filename )
215
+ path = os .path .join (outdir , filename )
216
+ data .to_csv (path , index = False )
182
217
elif outfmt == OutFmt .EXCEL :
183
- print (f'- { qt (table )} ' )
218
+ logging . info (f'- { qt (table )} ' )
184
219
data .to_excel (excel_file , sheet_name = table )
185
220
else :
186
221
assert False , f'format { outfmt } not impl'
187
222
except IndexError :
188
223
# XXX: this is thrown from excel writer when nothing is written
189
- exit ('failed to write output, most likely due to empty input' )
224
+ error ('failed to write output, most likely due to empty input' )
225
+ return
190
226
finally :
191
227
if excel_file :
192
228
excel_file .close ()
193
- print ('done' )
229
+ logging . info ('done' )
194
230
195
231
196
- if __name__ == '__main__' :
232
+ @app .command ()
233
+ def main_cli (
234
+ schema : str = typer .Argument (default = ..., help = SCHEMA_DESC ),
235
+ input : str = typer .Argument (default = '' , help = INPUT_DESC ),
236
+ orgs : List [str ] = typer .Option (default = ORGS_DEFAULT , help = ORGS_DESC ),
237
+ outfmt : OutFmt = typer .Option (default = OUTFMT_DEFAULT , help = OUTFMT_DESC ),
238
+ outdir : str = typer .Option (default = OUTDIR_DEFAULT , help = OUTDIR_DESC ),
239
+ debug : Annotated [bool , typer .Option ("-d" , "--debug" ,
240
+ help = DEBUG_DESC )] = DEBUG_DEFAULT ,
241
+ quiet : Annotated [bool , typer .Option ("-q" , "--quiet" ,
242
+ help = QUIET_DESC )] = QUIET_DEFAULT ,
243
+ ) -> None :
244
+ if not quiet :
245
+ logging .basicConfig (stream = sys .stdout , level = logging .DEBUG )
246
+ share (schema , input , orgs , outfmt , outdir , debug )
247
+
248
+
249
+ def main ():
250
+ # runs main_cli
197
251
app ()
252
+
253
+
254
+ if __name__ == '__main__' :
255
+ main ()
0 commit comments