|
3 | 3 | (:require [clojure.string :as str]
|
4 | 4 | [clojure.tools.logging :refer [info warn]]
|
5 | 5 | [next.jdbc :as j]
|
| 6 | + [dom-top.core :as dt] |
6 | 7 | [next.jdbc.connection :as connection]))
|
7 | 8 |
|
8 |
| -(def max-timeout "Longest timeout, in ms" 30000) |
| 9 | +(def max-timeout "Longest timeout, in ms" 300000) |
9 | 10 |
|
10 | 11 | (defn conn-spec
|
11 | 12 | "JDBC connection spec for a node."
|
|
41 | 42 | [conn id old new]
|
42 | 43 | (first (vals (first (j/execute! conn ["SELECT _CAS(?, ?, ?, 'JEPSEN')"
|
43 | 44 | id old new])))))
|
| 45 | + |
| 46 | +(defmacro with-error-handling |
| 47 | + "Common error handling for errors, including txn aborts." |
| 48 | + [op & body] |
| 49 | + `(try |
| 50 | + (with-txn-aborts ~op ~@body) |
| 51 | + |
| 52 | + (catch java.sql.BatchUpdateException e# |
| 53 | + (condp re-find (.getMessage e#) |
| 54 | + #"Query timed out" (assoc ~op :type :info, :error :query-timed-out) |
| 55 | + (throw e#))) |
| 56 | + |
| 57 | + (catch java.sql.SQLNonTransientConnectionException e# |
| 58 | + (condp re-find (.getMessage e#) |
| 59 | + #"Connection timed out" (assoc ~op :type :info, :error :conn-timed-out) |
| 60 | + (throw e#))) |
| 61 | + |
| 62 | + (catch clojure.lang.ExceptionInfo e# |
| 63 | + (cond (= "Connection is closed" (.cause (:rollback (ex-data e#)))) |
| 64 | + (assoc ~op :type :info, :error :conn-closed-rollback-failed) |
| 65 | + |
| 66 | + (= "createStatement() is called on closed connection" |
| 67 | + (.cause (:rollback (ex-data e#)))) |
| 68 | + (assoc ~op :type :fail, :error :conn-closed-rollback-failed) |
| 69 | + |
| 70 | + true (do (info e# :caught (pr-str (ex-data e#))) |
| 71 | + (info :caught-rollback (:rollback (ex-data e#))) |
| 72 | + (info :caught-cause (.cause (:rollback (ex-data e#)))) |
| 73 | + (throw e#)))))) |
| 74 | + |
| 75 | +(defmacro with-txn-aborts |
| 76 | + "Aborts body on rollbacks." |
| 77 | + [op & body] |
| 78 | + `(let [res# (capture-txn-abort ~@body)] |
| 79 | + (if (= ::abort res#) |
| 80 | + (assoc ~op :type :fail, :error :conflict) |
| 81 | + res#))) |
| 82 | + |
| 83 | +(defmacro with-conn-failure-retry |
| 84 | + "DBMS tends to be flaky for a few seconds after starting up, which can wind |
| 85 | + up breaking our setup code. This macro adds a little bit of backoff and retry |
| 86 | + for those conditions." |
| 87 | + [conn & body] |
| 88 | + (assert (symbol? conn)) |
| 89 | + (let [tries (gensym 'tries) ; try count |
| 90 | + e (gensym 'e) ; errors |
| 91 | + conn-sym (gensym 'conn) ; local conn reference |
| 92 | + retry `(do (when (zero? ~tries) |
| 93 | + (info "Out of retries!") |
| 94 | + (throw ~e)) |
| 95 | + (info "Connection failure; retrying...") |
| 96 | + (Thread/sleep (rand-int 2000)) |
| 97 | + (~'retry (reopen! ~conn-sym) (dec ~tries)))] |
| 98 | + `(dt/with-retry [~conn-sym ~conn |
| 99 | + ~tries 32] |
| 100 | + (let [~conn ~conn-sym] ; Rebind the conn symbol to our current connection |
| 101 | + ~@body) |
| 102 | + (catch org.tarantool.CommunicationException ~e ~retry) |
| 103 | + (catch java.sql.BatchUpdateException ~e ~retry) |
| 104 | + (catch java.sql.SQLTimeoutException ~e ~retry) |
| 105 | + (catch java.sql.SQLNonTransientConnectionException ~e ~retry) |
| 106 | + (catch java.sql.SQLException ~e |
| 107 | + (condp re-find (.getMessage ~e) |
| 108 | + #"Resolve lock timeout" ~retry ; high contention |
| 109 | + #"Information schema is changed" ~retry ; ??? |
| 110 | + #"called on closed connection" ~retry ; definitely didn't happen |
| 111 | + #"Region is unavailable" ~retry ; okay fine |
| 112 | + (do (info "with-conn-failure-retry isn't sure how to handle SQLException with message" (pr-str (class (.getMessage ~e))) (pr-str (.getMessage ~e))) |
| 113 | + (throw ~e))))))) |
| 114 | + |
| 115 | +(defn reopen! |
| 116 | + "Closes a connection and returns a new one based on the given connection." |
| 117 | + [conn] |
| 118 | + ; Don't know how to close connection in next.jdbc |
| 119 | + ;(close! conn) |
| 120 | + (open (::node conn) (::test conn))) |
| 121 | + |
| 122 | +(defmacro capture-txn-abort |
| 123 | + "Converts aborted transactions to an ::abort keyword" |
| 124 | + [& body] |
| 125 | + `(try ~@body |
| 126 | + (catch java.sql.SQLTransactionRollbackException e# |
| 127 | + (if (= (.getMessage e#) rollback-msg) |
| 128 | + ::abort |
| 129 | + (throw e#))) |
| 130 | + (catch java.sql.BatchUpdateException e# |
| 131 | + (if (= (.getMessage e#) rollback-msg) |
| 132 | + ::abort |
| 133 | + (throw e#))) |
| 134 | + (catch java.sql.SQLException e# |
| 135 | + (condp re-find (.getMessage e#) |
| 136 | + #"can not retry select for update statement" ::abort |
| 137 | + #"\[try again later\]" ::abort |
| 138 | + (throw e#))))) |
| 139 | + |
| 140 | +(def rollback-msg |
| 141 | + "Some drivers have a few exception classes that use this message." |
| 142 | + "Deadlock found when trying to get lock; try restarting transaction") |
0 commit comments