Add SQLAlchemy storage backend

AbdealiLoKo · AbdealiLoKo · commit 343acace2255 · 2022-11-10T19:43:08.000+05:30
- add sqlastore.DatabaseStore and unittests
- update API unittests for use in database tests
- update README
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -26,7 +26,7 @@ jobs:
     - name: Install application
       run: |
         python -m pip install --upgrade pip setuptools wheel
-        pip install -e .
+        pip install -e .[sqla]
     - name: Lint with flake8 and black
       run: |
         pip install -r requirements/lint.txt
diff --git a/.gitignore b/.gitignore
@@ -134,3 +134,4 @@ dmypy.json
 
 # Project specific
 configurable_http_proxy/version.txt
+*sqlite*
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ The following items are supported:
 - Customizable storage backends
 - PID file writing
 - Logging
+- Configurable storage backend
 
 The following options are not supported (yet):
 
@@ -35,3 +36,36 @@ The following options are not supported (yet):
 - Change Origin: `--change-origin`
 - Rewrites in Location header: `--protocol-rewrite` and `--auto-rewrite`
 - Metrics server: `--metrics-port` and `--metrics-ip`
+
+
+## Database-backed storage backend
+
+Using a SQL DBMS instead of the default in-memory store enables chp to be replicated
+in a High Availability scenario.
+
+To use a SQL DBMS as the storage backend:
+
+1. Install DBMS support
+
+        $ pip install configurable-http-proxy[sqla]
+
+2. Set the CHP_DATABASE_URL env var to any db URL supported by SQLAlchemy.
+   The default is "sqlite://chp.sqlite".
+
+        $ export CHP_DATABASE_URL="sqlite:///chp.sqlite"
+        $ configurable-http-proxy --storage-backend configurable_http_proxy.dbstore.DatabaseStore
+
+3. Optionally you may set the table name by setting the CHP_DATABASE_TABLE.
+   The default is 'chp_routes'
+
+        $ export CHP_DATABASE_TABLE="chp_routes"
+
+
+Note::
+
+    While nodejs configurable-http-proxy supports custom storage backend, it does 
+    not currently support to use an SQL DBMS. This is a unique feature of the Python 
+    version. 
+
+
+
diff --git a/configurable_http_proxy/dbstore.py b/configurable_http_proxy/dbstore.py
@@ -0,0 +1,216 @@
+import json
+import logging
+import os
+from datetime import datetime
+
+from dataset import connect
+
+from configurable_http_proxy.store import BaseStore
+
+log = logging.getLogger(__name__)
+
+
+class DatabaseStore(BaseStore):
+    """A DBMS storage backend for configurable-http-proxy
+
+    This enables chp to run multiple times and serve routes from a central
+    DBMS. It uses SQLAlchemy as the database backend.
+
+    Usage:
+        Set the CHP_DATABASE_URL env var to any db URL supported by SQLAlchemy.
+        The default is "sqlite://chp.sqlite".
+
+        $ export CHP_DATABASE_URL="sqlite:///chp.sqlite"
+        $ configurable-http-proxy --storage-backend configurable_http_proxy.dbstore.DatabaseStore
+
+        Optionally you may set the table name by setting the CHP_DATABASE_TABLE.
+        The default is 'chp_routes'
+
+        $ export CHP_DATABASE_TABLE="chp_routes"
+
+    See Also:
+        * Valid URLs https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls
+    """
+
+    default_db_url = "sqlite:///chp.sqlite"
+    default_db_table = "chp_routes"
+
+    def __init__(self):
+        super().__init__()
+        db_url = os.environ.get("CHP_DATABASE_URL", self.default_db_url)
+        db_table = os.environ.get("CHP_DATABASE_TABLE", self.default_db_table)
+        self.routes: TableTrie = TableTrie(db_url, table=db_table)
+        log.info(f"Using database {db_url}")
+        for route, data in self.get_all().items():
+            log.info(f'Restoring {route} => {data.get("target", "<no target>")}')
+
+    def clean(self):
+        # remove all information stored so far
+        self.routes.clean()
+
+    def get_target(self, path: str):
+        # return the data for the most specific matching route
+        return self.routes.get(self.clean_path(path), trie=True)
+
+    def get_all(self):
+        # return all routes as route => data
+        return self.routes.all()
+
+    def add(self, path: str, data):
+        # add a new route /path, storing data
+        if self.get(path):
+            self.update(path, data)
+        else:
+            self.routes.add(path, data)
+
+    def update(self, path: str, data):
+        # update an existing route
+        self.routes.update(self.clean_path(path), data)
+
+    def remove(self, path: str):
+        # remove an existing route
+        path = self.clean_path(path)
+        route = self.routes.get(path)
+        if route:
+            self.routes.remove(path)
+        return route
+
+    def get(self, path):
+        # return the data for the exact match
+        return self.routes.get(self.clean_path(path))
+
+
+class TableTrie:
+    """A URLtrie-like backed by a database
+
+    This stores URL-path => data mappings. On retrieving, it will try
+    to retrieve all subpaths up to the default path.
+
+    Usage:
+
+        # create mapping
+        routes = TableTrie('sqlite:///:memory:')
+        routes.add('/', {'some': 'default'})
+        routes.add('/foo/bar', {'some': 'value'})
+
+        # query a mapping that exists
+        routes.get('/foo/bar/baz')
+        => {
+          'prefix': '/foo/bar',
+          'some': 'value'
+        }
+
+        # query a mapping that does not exist
+        routes.get('/fox/bax')
+        => {
+          'prefix': '/',
+          'some': 'default'
+        }
+
+    How values are stored:
+
+        Routes are stored in the given table (defaults to 'chp_routes').
+        The table has the following columns:
+
+            id: integer (primary key)
+            key: varchar(128, unique)
+            data: varchar
+
+        The data is the serialized JSON equivalent of the dictionary stored
+        by TableTrie.add() or .update(). The rationale for storing a serialized
+        version of the dict instead of using the sqlalchemy JSON support directly
+        is to improve compatibility across db dialects.
+
+    DB backend:
+
+        The backend is any database supported by SQLAlchemy. To simplify
+        implementation this uses the dataset library, which provides a very
+        straight-forward way of working with tables created from Python dicts.
+    """
+
+    def __init__(self, url, table=None):
+        table = table or "chp_routes"
+        self.db = connect(url)
+        self.table = self.db[table]
+        self.table.create_column("path", self.db.types.string(length=128), unique=True)
+
+    def get(self, path, trie=False):
+        # return the data store for path
+        # -- if trie is False (default), will return data for the exact path
+        # -- if trie is True, will return the data and the matching prefix
+        try_routes = self._split_routes(path) if trie else [path]
+        for path in try_routes:
+            doc = self.table.find_one(path=path, order_by="id")
+            if doc:
+                if not trie:
+                    data = self._from_json(doc["data"])
+                else:
+                    data = doc
+                    data["data"] = self._from_json(doc["data"])
+                    data["prefix"] = path
+                break
+        else:
+            data = None
+        return attrdict(data) if data else None
+
+    def add(self, path, data):
+        # add the data for the given exact path
+        self.table.insert({"path": path, "data": self._to_json(data)})
+
+    def update(self, path, data):
+        # update the data for the given exact path
+        doc = self.table.find_one(path=path, order_by="id")
+        doc["data"] = self._from_json(doc["data"])
+        doc["data"].update(data)
+        doc["data"] = self._to_json(doc["data"])
+        self.table.update(doc, "id")
+
+    def remove(self, path):
+        # remove all matching routes for the given path, except default route
+        for subpath in self._split_routes(path):
+            if subpath == "/" and path != "/":
+                continue
+            self.table.delete(path=subpath)
+
+    def all(self):
+        # return all data for all paths
+        return {item["path"]: self._from_json(item["data"]) for item in self.table.find(order_by="id")}
+
+    def _to_json(self, data):
+        # simple converter for serializable data
+        for k, v in dict(data).items():
+            if isinstance(v, datetime):
+                data[k] = f"_dt_:{v.isoformat()}"
+            elif isinstance(v, dict):
+                data[k] = self._to_json(v)
+        return json.dumps(data)
+
+    def _from_json(self, data):
+        # simple converter from serialized data
+        data = json.loads(data) if isinstance(data, (str, bytes)) else data
+        for k, v in dict(data).items():
+            if isinstance(v, str) and v.startswith("_dt_:"):
+                data[k] = datetime.fromisoformat(v.split(":", 1)[-1])
+            elif isinstance(v, dict):
+                data[k] = self._from_json(v)
+        return data
+
+    def _split_routes(self, path):
+        # generator for reverse tree of routes
+        # e.g. /path/to/document
+        # => yields /path/to/document, /path/to, /path, /
+        levels = path.split("/")
+        for i, e in enumerate(levels):
+            yield "/".join(levels[: len(levels) - i + 1])
+        # always yield top level route
+        yield "/"
+
+    def clean(self):
+        self.table.delete()
+
+
+class attrdict(dict):
+    # enable .attribute for dicts
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.__dict__ = self
diff --git a/configurable_http_proxy_test/test_api.py b/configurable_http_proxy_test/test_api.py
@@ -1,17 +1,18 @@
 import datetime
 import json
+import os
 
 from tornado.testing import AsyncHTTPTestCase
 
 from configurable_http_proxy.configproxy import PythonProxy
 from configurable_http_proxy_test.testutil import pytest_regex
 
 
-class TestAPI(AsyncHTTPTestCase):
-    def get_app(self):
-        self.proxy = PythonProxy({"auth_token": "secret"})
-        self.proxy.add_route("/", {"target": "http://127.0.0.1:54321"})
-        return self.proxy.api_app
+class APITestsMixin:
+    """
+    Test cases for TestAPI
+    This allows to reuse test cases for MemoryStore and DatabaseStore backends
+    """
 
     def fetch(self, path, raise_error=True, with_auth=True, **kwargs):
         headers = kwargs.pop("headers", {})
@@ -144,3 +145,22 @@ def test_get_routes_with_inactive_since(self):
         resp = self.fetch(f"/api/routes?inactiveSince={hour_from_now.isoformat()}")
         reply = json.loads(resp.body)
         assert set(reply.keys()) == {"/", "/today", "/yesterday"}
+
+
+class TestAPI_MemoryStore(APITestsMixin, AsyncHTTPTestCase):
+    def get_app(self):
+        self.proxy = PythonProxy({"auth_token": "secret"})
+        self.proxy.add_route("/", {"target": "http://127.0.0.1:54321"})
+        return self.proxy.api_app
+
+
+class TestAPI_DatabaseStore(APITestsMixin, AsyncHTTPTestCase):
+    def get_app(self):
+        os.environ["CHP_DATABASE_URL"] = "sqlite:///chp_test.sqlite"
+        self.proxy = PythonProxy(
+            {"auth_token": "secret", "storage_backend": "configurable_http_proxy.dbstore.DatabaseStore"}
+        )
+        self.proxy._routes.clean()
+        assert self.proxy._routes.get_all() == {}
+        self.proxy.add_route("/", {"target": "http://127.0.0.1:54321"})
+        return self.proxy.api_app
diff --git a/configurable_http_proxy_test/test_store.py b/configurable_http_proxy_test/test_store.py
@@ -1,10 +1,12 @@
-from configurable_http_proxy.store import MemoryStore
+import os
 
+from configurable_http_proxy.dbstore import DatabaseStore
+from configurable_http_proxy.store import MemoryStore
 
-class TestMemoryStore:
-    def setup_method(self, method):
-        self.subject = MemoryStore()
 
+class StoreTestMixin:
+    # test cases for the storage
+    # -- this allows to reuse tests for MemoryStore and DatabaseStore
     def test_get(self):
         self.subject.add("/myRoute", {"test": "value"})
         route = self.subject.get("/myRoute")
@@ -73,3 +75,15 @@ def test_has_route(self):
     def test_has_route_path_not_found(self):
         route = self.subject.get("/wut")
         assert route is None
+
+
+class TestMemoryStore(StoreTestMixin):
+    def setup_method(self, method):
+        self.subject = MemoryStore()
+
+
+class TestDataBaseStore(StoreTestMixin):
+    def setup_method(self, method):
+        os.environ["CHP_DATABASE_URL"] = "sqlite:///chp_test.sqlite"
+        self.subject = DatabaseStore()
+        self.subject.clean()
diff --git a/setup.py b/setup.py
@@ -19,6 +19,9 @@
         },
         setup_requires=["setuptools_scm"],
         install_requires=open(os.path.join(BASE_PATH, "requirements", "base.txt")).readlines(),
+        extras_require={
+            "sqla": ["dataset"],
+        },
         python_requires=">=3.6",
         include_package_data=True,
         zip_safe=False,

Original file line number	Diff line number	Diff line change
`@@ -134,3 +134,4 @@ dmypy.json`
`134`	`134`
`135`	`135`	`# Project specific`
`136`	`136`	`configurable_http_proxy/version.txt`
	`137`	`+sqlite`