diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 9c7738f..f40ebd1 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -26,7 +26,7 @@ jobs: - name: Install application run: | python -m pip install --upgrade pip setuptools wheel - pip install -e . + pip install -e .[sqla] - name: Lint with flake8 and black run: | pip install -r requirements/lint.txt @@ -35,7 +35,7 @@ jobs: - name: Test with pytest run: | pip install -r requirements/test.txt - pytest + python -m pytest - name: Build package run: | pip install -r requirements/build.txt diff --git a/.gitignore b/.gitignore index 5ce893e..c0501db 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,4 @@ dmypy.json # Project specific configurable_http_proxy/version.txt +*sqlite* diff --git a/README.md b/README.md index 78101dd..aa02444 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,10 @@ The following items are supported: - Customizable storage backends - PID file writing - Logging +- Configurable storage backend + +The following options are additional (not available in nodejs CHP currently): +- Ready to use DBMS storage backend The following options are not supported (yet): @@ -35,3 +39,29 @@ The following options are not supported (yet): - Change Origin: `--change-origin` - Rewrites in Location header: `--protocol-rewrite` and `--auto-rewrite` - Metrics server: `--metrics-port` and `--metrics-ip` + +## Database-backed storage backend + +Using a SQL DBMS instead of the default in-memory store enables chp to be replicated +in a High Availability scenario. + +To use a SQL DBMS as the storage backend: + +1. Install DBMS support + +```bash +$ pip install configurable-http-proxy[sqla] +``` + +2. Set the CHP_DATABASE_URL env var to any db URL supported by SQLAlchemy. The default is `sqlite:///chp.sqlite`. + +```bash +$ export CHP_DATABASE_URL="sqlite:///chp.sqlite" +$ configurable-http-proxy --storage-backend configurable_http_proxy.dbstore.DatabaseStore +``` + +3. Optionally you may set the table name by setting the CHP_DATABASE_TABLE. The default is 'chp_routes' + +```bash +$ export CHP_DATABASE_TABLE="chp_routes" +``` diff --git a/configurable_http_proxy/dbstore.py b/configurable_http_proxy/dbstore.py new file mode 100644 index 0000000..2511c8c --- /dev/null +++ b/configurable_http_proxy/dbstore.py @@ -0,0 +1,217 @@ +import json +import logging +import os +from datetime import datetime + +from dataset import connect + +from configurable_http_proxy.store import BaseStore + +log = logging.getLogger(__name__) + + +class DatabaseStore(BaseStore): + """A DBMS storage backend for configurable-http-proxy + + This enables chp to run multiple times and serve routes from a central + DBMS. It uses SQLAlchemy as the database backend. + + Usage: + Set the CHP_DATABASE_URL env var to any db URL supported by SQLAlchemy. + The default is "sqlite:///chp.sqlite". + + $ export CHP_DATABASE_URL="sqlite:///chp.sqlite" + $ configurable-http-proxy --storage-backend configurable_http_proxy.dbstore.DatabaseStore + + Optionally you may set the table name by setting the CHP_DATABASE_TABLE. + The default is 'chp_routes' + + $ export CHP_DATABASE_TABLE="chp_routes" + + See Also: + * Valid URLs https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls + """ + + default_db_url = "sqlite:///chp.sqlite" + default_db_table = "chp_routes" + + def __init__(self): + super().__init__() + # NOTE: Also support getting these settings from CLI args + db_url = os.environ.get("CHP_DATABASE_URL", self.default_db_url) + db_table = os.environ.get("CHP_DATABASE_TABLE", self.default_db_table) + self.routes: TableTrie = TableTrie(db_url, table=db_table) + log.info(f"Using database {db_url}") + for route, data in self.get_all().items(): + log.info(f'Restoring {route} => {data.get("target", "")}') + + def clean(self): + # remove all information stored so far + self.routes.clean() + + def get_target(self, path: str): + # return the data for the most specific matching route + return self.routes.get(self.clean_path(path), trie=True) + + def get_all(self): + # return all routes as route => data + return self.routes.all() + + def add(self, path: str, data): + # add a new route /path, storing data + if self.get(path): + self.update(path, data) + else: + self.routes.add(path, data) + + def update(self, path: str, data): + # update an existing route + self.routes.update(self.clean_path(path), data) + + def remove(self, path: str): + # remove an existing route + path = self.clean_path(path) + route = self.routes.get(path) + if route: + self.routes.remove(path) + return route + + def get(self, path): + # return the data for the exact match + return self.routes.get(self.clean_path(path)) + + +class TableTrie: + """A URLtrie-like backed by a database + + This stores URL-path => data mappings. On retrieving, it will try + to retrieve all subpaths up to the default path. + + Usage: + + # create mapping + routes = TableTrie('sqlite:///:memory:') + routes.add('/', {'some': 'default'}) + routes.add('/foo/bar', {'some': 'value'}) + + # query a mapping that exists + routes.get('/foo/bar/baz') + => { + 'prefix': '/foo/bar', + 'some': 'value' + } + + # query a mapping that does not exist + routes.get('/fox/bax') + => { + 'prefix': '/', + 'some': 'default' + } + + How values are stored: + + Routes are stored in the given table (defaults to 'chp_routes'). + The table has the following columns: + + id: integer (primary key) + key: varchar(128, unique) + data: varchar + + The data is the serialized JSON equivalent of the dictionary stored + by TableTrie.add() or .update(). The rationale for storing a serialized + version of the dict instead of using the sqlalchemy JSON support directly + is to improve compatibility across db dialects. + + DB backend: + + The backend is any database supported by SQLAlchemy. To simplify + implementation this uses the dataset library, which provides a very + straight-forward way of working with tables created from Python dicts. + """ + + def __init__(self, url, table=None): + table = table or "chp_routes" + self.db = connect(url) + self.table = self.db[table] + self.table.create_column("path", self.db.types.string(length=128), unique=True) + + def get(self, path, trie=False): + # return the data store for path + # -- if trie is False (default), will return data for the exact path + # -- if trie is True, will return the data and the matching prefix + try_routes = self._split_routes(path) if trie else [path] + for path in try_routes: + doc = self.table.find_one(path=path, order_by="id") + if doc: + if not trie: + data = self._from_json(doc["data"]) + else: + data = doc + data["data"] = self._from_json(doc["data"]) + data["prefix"] = path + break + else: + data = None + return attrdict(data) if data else None + + def add(self, path, data): + # add the data for the given exact path + self.table.insert({"path": path, "data": self._to_json(data)}) + + def update(self, path, data): + # update the data for the given exact path + doc = self.table.find_one(path=path, order_by="id") + doc["data"] = self._from_json(doc["data"]) + doc["data"].update(data) + doc["data"] = self._to_json(doc["data"]) + self.table.update(doc, "id") + + def remove(self, path): + # remove all matching routes for the given path, except default route + for subpath in self._split_routes(path): + if subpath == "/" and path != "/": + continue + self.table.delete(path=subpath) + + def all(self): + # return all data for all paths + return {item["path"]: self._from_json(item["data"]) for item in self.table.find(order_by="id")} + + def _to_json(self, data): + # simple converter for serializable data + for k, v in dict(data).items(): + if isinstance(v, datetime): + data[k] = f"_dt_:{v.isoformat()}" + elif isinstance(v, dict): + data[k] = self._to_json(v) + return json.dumps(data) + + def _from_json(self, data): + # simple converter from serialized data + data = json.loads(data) if isinstance(data, (str, bytes)) else data + for k, v in dict(data).items(): + if isinstance(v, str) and v.startswith("_dt_:"): + data[k] = datetime.fromisoformat(v.split(":", 1)[-1]) + elif isinstance(v, dict): + data[k] = self._from_json(v) + return data + + def _split_routes(self, path): + # generator for reverse tree of routes + # e.g. /path/to/document + # => yields /path/to/document, /path/to, /path, / + levels = path.split("/") + for i, e in enumerate(levels): + yield "/".join(levels[: len(levels) - i + 1]) + # always yield top level route + yield "/" + + def clean(self): + self.table.delete() + + +class attrdict(dict): + # enable .attribute for dicts + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__dict__ = self diff --git a/configurable_http_proxy_test/test_api.py b/configurable_http_proxy_test/test_api.py index 26246bd..d685f10 100644 --- a/configurable_http_proxy_test/test_api.py +++ b/configurable_http_proxy_test/test_api.py @@ -1,5 +1,6 @@ import datetime import json +import os from tornado.testing import AsyncHTTPTestCase @@ -7,11 +8,11 @@ from configurable_http_proxy_test.testutil import pytest_regex -class TestAPI(AsyncHTTPTestCase): - def get_app(self): - self.proxy = PythonProxy({"auth_token": "secret"}) - self.proxy.add_route("/", {"target": "http://127.0.0.1:54321"}) - return self.proxy.api_app +class APITestsMixin: + """ + Test cases for TestAPI + This allows to reuse test cases for MemoryStore and DatabaseStore backends + """ def fetch(self, path, raise_error=True, with_auth=True, **kwargs): headers = kwargs.pop("headers", {}) @@ -144,3 +145,22 @@ def test_get_routes_with_inactive_since(self): resp = self.fetch(f"/api/routes?inactiveSince={hour_from_now.isoformat()}") reply = json.loads(resp.body) assert set(reply.keys()) == {"/", "/today", "/yesterday"} + + +class TestAPIWithMemoryStore(APITestsMixin, AsyncHTTPTestCase): + def get_app(self): + self.proxy = PythonProxy({"auth_token": "secret"}) + self.proxy.add_route("/", {"target": "http://127.0.0.1:54321"}) + return self.proxy.api_app + + +class TestAPIWithDatabaseStore(APITestsMixin, AsyncHTTPTestCase): + def get_app(self): + os.environ["CHP_DATABASE_URL"] = "sqlite:///chp_test.sqlite" + self.proxy = PythonProxy( + {"auth_token": "secret", "storage_backend": "configurable_http_proxy.dbstore.DatabaseStore"} + ) + self.proxy._routes.clean() + assert self.proxy._routes.get_all() == {} + self.proxy.add_route("/", {"target": "http://127.0.0.1:54321"}) + return self.proxy.api_app diff --git a/configurable_http_proxy_test/test_store.py b/configurable_http_proxy_test/test_store.py index faf6151..65d00f7 100644 --- a/configurable_http_proxy_test/test_store.py +++ b/configurable_http_proxy_test/test_store.py @@ -1,9 +1,14 @@ +import os + +from configurable_http_proxy.dbstore import DatabaseStore from configurable_http_proxy.store import MemoryStore -class TestMemoryStore: - def setup_method(self, method): - self.subject = MemoryStore() +class StoreTestsMixin: + """ + Test cases for the storage + This allows to reuse tests for MemoryStore and DatabaseStore + """ def test_get(self): self.subject.add("/myRoute", {"test": "value"}) @@ -73,3 +78,15 @@ def test_has_route(self): def test_has_route_path_not_found(self): route = self.subject.get("/wut") assert route is None + + +class TestMemoryStore(StoreTestsMixin): + def setup_method(self, method): + self.subject = MemoryStore() + + +class TestDataBaseStore(StoreTestsMixin): + def setup_method(self, method): + os.environ["CHP_DATABASE_URL"] = "sqlite:///chp_test.sqlite" + self.subject = DatabaseStore() + self.subject.clean() diff --git a/setup.py b/setup.py index 1096427..926e74a 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,9 @@ }, setup_requires=["setuptools_scm"], install_requires=open(os.path.join(BASE_PATH, "requirements", "base.txt")).readlines(), + extras_require={ + "sqla": ["dataset"], + }, python_requires=">=3.6", include_package_data=True, zip_safe=False,