Big-Life-Lab
diff --git a/‎INSTALL.md
Lines changed: 0 additions & 33 deletions b/‎INSTALL.md
Lines changed: 0 additions & 33 deletions
diff --git a/‎README.md
Lines changed: 13 additions & 850 deletions b/‎README.md
Lines changed: 13 additions & 850 deletions
diff --git a/‎docs/manual/.gitignore
Lines changed: 10 additions & 0 deletions b/‎docs/manual/.gitignore
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/manual/_quarto.yml
Lines changed: 20 additions & 0 deletions b/‎docs/manual/_quarto.yml
Lines changed: 20 additions & 0 deletions
diff --git a/‎docs/manual/api.qmd
Lines changed: 111 additions & 0 deletions b/‎docs/manual/api.qmd
Lines changed: 111 additions & 0 deletions
diff --git a/‎docs/manual/assets/minimal/measures.csv
Lines changed: 5 additions & 0 deletions b/‎docs/manual/assets/minimal/measures.csv
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/manual/assets/minimal/schema.csv
Lines changed: 4 additions & 0 deletions b/‎docs/manual/assets/minimal/schema.csv
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/manual/assets/odm-logo.png
473 KB b/‎docs/manual/assets/odm-logo.png
473 KB
diff --git a/‎docs/manual/assets/schema-missing-headers.csv
Lines changed: 1 addition & 0 deletions b/‎docs/manual/assets/schema-missing-headers.csv
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/manual/assets/schema-missing-rules.csv
Lines changed: 1 addition & 0 deletions b/‎docs/manual/assets/schema-missing-rules.csv
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/manual/cli.qmd
Lines changed: 151 additions & 0 deletions b/‎docs/manual/cli.qmd
Lines changed: 151 additions & 0 deletions
diff --git a/‎docs/manual/common.py
Lines changed: 18 additions & 0 deletions b/‎docs/manual/common.py
Lines changed: 18 additions & 0 deletions
@@ -0,0 +1,10 @@
+# generated files
+/measures-OHRI.csv
+search.json
+site_libs
+debug.txt
+
+# generated dirs
+/.quarto/
+/_book/
+/build/
@@ -0,0 +1,20 @@
+project:
+  type: book
+  output-dir: build
+
+book:
+  title: 'PHES-ODM Sharing Library Manual'
+  author: 'OHRI'
+  chapters:
+    - index.qmd
+    - install.qmd
+    - getting-started.qmd
+    - cli.qmd
+    - api.qmd
+  appendices:
+    - schemas.qmd
+    - python.qmd
+    - sqlite.qmd
+
+pdf-engine: pdflatex
+toc: true
@@ -0,0 +1,111 @@
+# API {#sec-api}
+
+## Reference
+
+<!-- TODO generate this API reference from the source code automatically -->
+
+```python
+
+def parse(schema_path: str, orgs: List[str] = []) -> OrgTableQueries:
+    '''returns queries for each org and table, generated from the rules
+    specified in `schema_file`
+
+    :raises OSError, ParseError:
+    '''
+
+
+def connect(data_source: str, tables: Set[str] = set()) -> Connection:
+    '''returns a connection object that can be used together with a query
+    object to retrieve data from `data_source`
+
+    :raises DataSourceError:'''
+
+
+def get_data(c: Connection, tq: TableQuery) -> pd.DataFrame:
+    '''returns the data extracted from running query `q` on data-source
+    connection `c`, as a pandas DataFrame
+
+    :raises DataSourceError:'''
+
+
+def get_counts(c: Connection, tq: TableQuery) -> Dict[RuleId, int]:
+    '''returns the row counts for each rule
+
+    :raises DataSourceError:'''
+
+def get_columns(c: Connection, tq: TableQuery
+                ) -> Tuple[RuleId, List[ColumnName]]:
+    '''returns the select-rule id together with the column names that would be
+    extracted when calling `get_data`
+
+    :raises DataSourceError:'''
+
+
+def extract(
+    schema_path: str,
+    data_source: str,
+    orgs: List[str] = [],
+) -> Dict[OrgName, Dict[TableName, pd.DataFrame]]:
+    '''returns a Pandas DataFrame per table per org
+
+    :param data_source: a file path or database url (in SQLAlchemy format)
+    :param schema_path: rule schema file path
+    :param orgs: orgs to share with, or all if empty
+
+    :raises DataSourceError:
+    '''
+```
+
+## Usage
+
+### Examples
+
+**Common definitions:**
+
+```{python}
+#|echo: False
+from common import DATA, SCHEMA, load_csv_md, print_file
+
+def my_processing_func(data):
+    # what a user-made function may look like
+    pass
+```
+
+```{python}
+import pandas as pd
+import odm_sharing.sharing as sh
+
+ORG = 'OHRI'
+ORGS = [ORG]
+```
+
+**High-level one-shot function:**
+
+```{python}
+results = sh.extract(SCHEMA, DATA, ORGS)
+for org, table_data in results.items():
+    for table_name, data_frame in table_data.items():
+        my_processing_func(data_frame)
+```
+
+**Low-level sample code:**
+
+```{python}
+def describe_table_query(con, table, query):
+    print(f'query table: {table}')
+
+    (select_rule_id, columns) = sh.get_columns(con, query)
+    print(f'query columns (from rule {select_rule_id}):')
+    print(','.join(columns))
+
+    print('query counts per rule:')
+    rule_counts = sh.get_counts(con, query)
+    for ruleId, count in rule_counts.items():
+        print(f'{ruleId} | {count}')
+
+con = sh.connect(DATA)
+table_queries = sh.parse(SCHEMA, ORGS)
+for table, query in table_queries[ORG].items():
+    describe_table_query(con, table, query)
+    my_processing_func(sh.get_data(con, query))
+```
@@ -0,0 +1,5 @@
+measureRepID,sampleID,measure,value,unit,aggregation
+o.08.08.20covN1,o.08.08.20,covN1,0.00036,gcPMMoV,meanNr
+o.08.08.20covN2,o.08.08.20,covN1,0.00003,gcPMMoV,sdNr
+o.08.08.20covN4,o.08.08.20,covN2,0.00002,gcPMMoV,meanNr
+o.08.08.20covN3,o.08.08.20,covN2,0.00004,gcPMMoV,sdNr
@@ -0,0 +1,4 @@
+ruleID,table,mode,key,operator,value,notes
+1,measures,select,NA,NA,all,"select all columns from the measures table"
+2,measures,filter,measure,=,covN1,"where measure equals covN1"
+3,NA,share,OHRI,NA,1;2,"use rule 1 & 2 for the OHRI organization"
@@ -0,0 +1 @@
+ruleID,mode,key,value,notes
@@ -0,0 +1 @@
+ruleID,table,mode,key,operator,value,notes
@@ -0,0 +1,151 @@
+```{python}
+#| echo: false
+from odm_sharing.tools.share import OutFmt, share
+
+from common import DATA, SCHEMA, load_csv_md, print_file
+```
+
+# CLI {#sec-cli}
+
+## Reference
+
+```bash
+odm-share [OPTION]... SCHEMA INPUT
+```
+
+Arguments:
+
+- SCHEMA
+
+  sharing schema file path
+
+- INPUT
+
+  spreadsheet file path or [SQLAlchemy database URL](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls)
+
+Options:
+
+- `--orgs=NAME[,...]`
+
+    comma separated list of organizations to output data for, defaults to all
+
+- `--outfmt=FORMAT`
+
+    output format (excel or csv), defaults to input format
+
+- `--outdir=PATH`
+
+    output file directory, defaults to the current directory
+
+- `-d`, `--debug`:
+
+    output debug info to STDOUT (and ./debug.txt) instead of creating sharable
+    output files. This shows which tables and columns are selected, and how
+    many rows each filter returns.
+
+- `-q`, `--quiet`:
+
+    don't log to STDOUT
+
+One or multiple sharable output files will be created in the chosen output
+directory according to the chosen output format and organization(s). Each
+output file will have the input name followed by the recipient org name.
+
+### Errors
+
+Error messages will be printed to the terminal (STDERR) when something is
+wrong. The message starts with telling where the error originated, including
+the filename and line number or rule id. Here's a few examples:
+
+When headers are missing from the schema:
+
+```{python}
+#| echo: false
+share('assets/schema-missing-headers.csv', 'assets/measures.csv')
+```
+
+When no share-rules are contained in the schema:
+
+```{python}
+#| echo: false
+share('assets/schema-missing-rules.csv', 'assets/measures.csv')
+```
+
+## Usage
+
+### Examples
+
+#### Using a CSV file
+
+To share a single table `measures.csv`, using the sharing schema `schema.csv`,
+the following command can be used:
+
+```bash
+odm-share schema.csv measures.csv
+```
+
+It will make an output file called `measures-<org>.csv` for each organization
+specified in the schema, with filtered data that is ready to share.
+
+#### Using an Excel file
+
+Excel files can be used as input to share multiple tables at once:
+
+```bash
+odm-share schema.csv data.xlsx
+```
+
+It will make an output file called `<org>.xlsx` for each organization in the
+schema.
+
+#### Using a database
+
+To use a MySQL database as input (with the pymysql package):
+
+```bash
+odm-share schema.csv mysql+pymysql://user:pass@host/db
+```
+
+Same as above, using a MS SQL Server database through ODBC (with the pyodbc
+package):
+
+```bash
+odm-share schema.csv mssql+pyodbc://user:pass@mydsn
+```
+
+#### Using additional options
+
+- Share CSV files from an Excel file:
+
+    ```bash
+    odm-share --outfmt=CSV schema.csv data.xlsx
+    ```
+
+- Create a sharable excel file in the "~/files" directory, for the "OHRI" and
+  "TOH" organizations, applying the rules from schema.csv on the input from
+  data.xlsx:
+
+    ```bash
+    odm-share --orgs=OHRI,TOH --outdir=~/files schema.csv data.xlsx
+    ```
+
+### Debugging
+
+Debug mode provides information about what would happen when using a specific
+schema, without pulling the actual data to be shared. Debugging is enabled by
+passing the `--debug` flag, or simply `-d`.
+
+Here's an example using the sample files from [getting started](getting-started.qmd):
+
+```bash
+odm-share --debug schema.csv data.xlsx
+```
+```{python}
+#| echo: false
+share(SCHEMA, DATA, debug=True)
+```
+
+Here we can see the columns that would be selected, as well as the number of
+rows each rule would produce. Specifically, we can see that 4 rows would be
+selected by rule #1, but the filter in rule #2 reduces that number to 2, which
+is the final count as confirmed in the count for rule #3.
@@ -0,0 +1,18 @@
+import pandas as pd
+from tabulate import tabulate
+
+
+SCHEMA = 'assets/minimal/schema.csv'
+DATA = 'assets/minimal/measures.csv'
+
+
+def load_csv_md(path):
+    '''read csv file and convert it to markdown'''
+    df = pd.read_csv(path, keep_default_na=False)
+    md = tabulate(df, headers=df.columns.to_list(), showindex=False)
+    return md
+
+
+def print_file(path):
+    with open(path, 'r') as f:
+        print(f.read())
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+ruleID,table,mode,key,operator,value,notes`