|
| 1 | +defmodule Mix.Tasks.Profile.Tprof do |
| 2 | + use Mix.Task |
| 3 | + |
| 4 | + @shortdoc "Profiles the given file or expression with tprof" |
| 5 | + |
| 6 | + @moduledoc """ |
| 7 | + Profiles the given file or expression using Erlang's `tprof` tool. |
| 8 | +
|
| 9 | + [`:tprof`](`:tprof`) provides time information of each function call and can be useful |
| 10 | + when you want to discover the bottlenecks related to this. |
| 11 | +
|
| 12 | + Before running the code, it invokes the `app.start` task which compiles |
| 13 | + and loads your project. After that, the target expression is profiled together |
| 14 | + with all matching function calls using the Erlang trace BIFs. The tracing of |
| 15 | + the function calls for that is enabled when the profiling is begun, and |
| 16 | + disabled when profiling is stopped. |
| 17 | +
|
| 18 | + To profile the code, you can use syntax similar to the `mix run` task: |
| 19 | +
|
| 20 | + $ mix profile.tprof -e Hello.world |
| 21 | + $ mix profile.tprof -e "[1, 2, 3] |> Enum.reverse |> Enum.map(&Integer.to_string/1)" |
| 22 | + $ mix profile.tprof my_script.exs arg1 arg2 arg3 |
| 23 | +
|
| 24 | + This task is automatically re-enabled, so you can profile multiple times |
| 25 | + in the same Mix invocation. |
| 26 | +
|
| 27 | + ## Command line options |
| 28 | +
|
| 29 | + * `--matching` - only profile calls matching the given `Module.function/arity` pattern |
| 30 | + * `--calls` - filters out any results with a call count lower than this |
| 31 | + * `--time` - filters out any results that took lower than specified (in µs) |
| 32 | + * `--sort` - sorts the results by `time` or `calls` (default: `time`) |
| 33 | + * `--eval`, `-e` - evaluates the given code |
| 34 | + * `--require`, `-r` - requires pattern before running the command |
| 35 | + * `--parallel`, `-p` - makes all requires parallel |
| 36 | + * `--no-warmup` - skips the warmup step before profiling |
| 37 | + * `--no-compile` - does not compile even if files require compilation |
| 38 | + * `--no-deps-check` - does not check dependencies |
| 39 | + * `--no-archives-check` - does not check archives |
| 40 | + * `--no-halt` - does not halt the system after running the command |
| 41 | + * `--no-start` - does not start applications after compilation |
| 42 | + * `--no-elixir-version-check` - does not check the Elixir version from mix.exs |
| 43 | +
|
| 44 | + ## Profile output |
| 45 | +
|
| 46 | + Example output: |
| 47 | +
|
| 48 | + # CALLS % TIME µS/CALL |
| 49 | + Total 24 100.0 26 1.08 |
| 50 | + Enum.reduce_range_inc/4 5 3.85 1 0.20 |
| 51 | + :erlang.make_fun/3 1 7.69 2 2.00 |
| 52 | + Enum.each/2 1 7.69 2 2.00 |
| 53 | + anonymous fn/0 in :elixir_compiler_0.__FILE__/1 1 7.69 2 2.00 |
| 54 | + :erlang.integer_to_binary/1 5 15.39 4 0.80 |
| 55 | + :erlang.apply/2 1 15.39 4 4.00 |
| 56 | + anonymous fn/3 in Enum.each/2 5 19.23 5 1.00 |
| 57 | + String.Chars.Integer.to_string/1 5 23.08 6 1.20 |
| 58 | +
|
| 59 | + Profile done over 8 matching functions |
| 60 | +
|
| 61 | + The default output contains data gathered from all matching functions. The first |
| 62 | + row after the header contains the sums of the partial results and the average time |
| 63 | + for all the function calls listed. The following rows contain the function call, |
| 64 | + followed by the number of times that the function was called, then by the percentage |
| 65 | + of time that the call uses, then the total time for that function in microseconds, |
| 66 | + and, finally, the average time per call in microseconds. |
| 67 | +
|
| 68 | + When `--matching` option is specified, call count tracing will be started only for |
| 69 | + the functions matching the given pattern: |
| 70 | +
|
| 71 | + # CALLS % TIME µS/CALL |
| 72 | + Total 5 100.0 6 1.20 |
| 73 | + String.Chars.Integer.to_string/1 5 100.0 6 1.20 |
| 74 | +
|
| 75 | + Profile done over 1 matching functions |
| 76 | +
|
| 77 | + The pattern can be a module name, such as `String` to count all calls to that module, |
| 78 | + a call without arity, such as `String.split`, to count all calls to that function |
| 79 | + regardless of arity, or a call with arity, such as `String.split/3`, to count all |
| 80 | + calls to that exact module, function and arity. |
| 81 | +
|
| 82 | + ## Caveats |
| 83 | +
|
| 84 | + You should be aware that the code being profiled is running in an anonymous |
| 85 | + function which is invoked by [`:tprof` module](https://www.erlang.org/doc/man/tprof.html). |
| 86 | + Thus, you'll see some additional entries in your profile output. It is also |
| 87 | + important to note that the profiler is stopped as soon as the code has finished running, |
| 88 | + and this may need special attention, when: running asynchronous code as function calls which were |
| 89 | + called before the profiler stopped will not be counted; running synchronous code as long |
| 90 | + running computations and a profiler without a proper MFA trace pattern or filter may |
| 91 | + lead to a result set which is difficult to comprehend. |
| 92 | +
|
| 93 | + You should expect a slowdown in your code execution using this tool since `:tprof` has |
| 94 | + some performance impact on the execution, but the impact is considerably lower than |
| 95 | + `Mix.Tasks.Profile.Fprof`. If you have a large system try to profile a limited |
| 96 | + scenario or focus on the main modules or processes. Another alternative is to use |
| 97 | + `Mix.Tasks.Profile.Cprof` that uses [`:cprof`](`:cprof`) and has a low performance degradation effect. |
| 98 | + """ |
| 99 | + |
| 100 | + @switches [ |
| 101 | + parallel: :boolean, |
| 102 | + require: :keep, |
| 103 | + eval: :keep, |
| 104 | + config: :keep, |
| 105 | + matching: :string, |
| 106 | + halt: :boolean, |
| 107 | + compile: :boolean, |
| 108 | + deps_check: :boolean, |
| 109 | + calls: :integer, |
| 110 | + time: :integer, |
| 111 | + sort: :string, |
| 112 | + start: :boolean, |
| 113 | + archives_check: :boolean, |
| 114 | + warmup: :boolean, |
| 115 | + elixir_version_check: :boolean, |
| 116 | + parallel_require: :keep |
| 117 | + ] |
| 118 | + |
| 119 | + @aliases [ |
| 120 | + r: :require, |
| 121 | + p: :parallel, |
| 122 | + e: :eval, |
| 123 | + c: :config |
| 124 | + ] |
| 125 | + |
| 126 | + @impl true |
| 127 | + def run(args) do |
| 128 | + {opts, head} = OptionParser.parse_head!(args, aliases: @aliases, strict: @switches) |
| 129 | + Mix.Task.reenable("profile.tprof") |
| 130 | + |
| 131 | + Mix.Tasks.Run.run( |
| 132 | + ["--no-mix-exs" | args], |
| 133 | + opts, |
| 134 | + head, |
| 135 | + &profile_code(&1, opts), |
| 136 | + &profile_code(File.read!(&1), opts) |
| 137 | + ) |
| 138 | + end |
| 139 | + |
| 140 | + defp profile_code(code_string, opts) do |
| 141 | + opts = Enum.map(opts, &parse_opt/1) |
| 142 | + |
| 143 | + content = |
| 144 | + quote do |
| 145 | + unquote(__MODULE__).profile( |
| 146 | + fn -> |
| 147 | + unquote(Code.string_to_quoted!(code_string)) |
| 148 | + end, |
| 149 | + unquote(Macro.escape(opts)) |
| 150 | + ) |
| 151 | + end |
| 152 | + |
| 153 | + # Use compile_quoted since it leaves less noise than eval_quoted |
| 154 | + Code.compile_quoted(content) |
| 155 | + end |
| 156 | + |
| 157 | + defp parse_opt({:matching, matching}) do |
| 158 | + case Mix.Utils.parse_mfa(matching) do |
| 159 | + {:ok, [m, f, a]} -> {:matching, {m, f, a}} |
| 160 | + {:ok, [m, f]} -> {:matching, {m, f, :_}} |
| 161 | + {:ok, [m]} -> {:matching, {m, :_, :_}} |
| 162 | + :error -> Mix.raise("Invalid matching pattern: #{matching}") |
| 163 | + end |
| 164 | + end |
| 165 | + |
| 166 | + defp parse_opt({:sort, "time"}), do: {:sort, :time} |
| 167 | + defp parse_opt({:sort, "calls"}), do: {:sort, :calls} |
| 168 | + defp parse_opt({:sort, other}), do: Mix.raise("Invalid sort option: #{other}") |
| 169 | + defp parse_opt(other), do: other |
| 170 | + |
| 171 | + @doc """ |
| 172 | + Allows to programmatically run the `tprof` profiler on expression in `fun`. |
| 173 | +
|
| 174 | + Returns the return value of `fun`. |
| 175 | +
|
| 176 | + ## Options |
| 177 | +
|
| 178 | + * `:matching` - only profile calls matching the given pattern in form of |
| 179 | + `{module, function, arity}`, where each element may be replaced by `:_` |
| 180 | + to allow any value |
| 181 | + * `:calls` - filters out any results with a call count lower than this |
| 182 | + * `:time` - filters out any results that took lower than specified (in µs) |
| 183 | + * `:sort` - sort the results by `:time` or `:calls` (default: `:time`) |
| 184 | + * `:warmup` - if the code should be warmed up before profiling (default: `true`) |
| 185 | + * `:set_on_spawn` - if newly spawned processes should be measured (default: `true`) |
| 186 | +
|
| 187 | + """ |
| 188 | + @spec profile((-> result), keyword()) :: result when result: any() |
| 189 | + def profile(fun, opts \\ []) when is_function(fun, 0) do |
| 190 | + Mix.ensure_application!(:tools) |
| 191 | + {return_value, results} = profile_and_analyse(fun, opts) |
| 192 | + print_output(results) |
| 193 | + return_value |
| 194 | + end |
| 195 | + |
| 196 | + defp profile_and_analyse(fun, opts) do |
| 197 | + if Keyword.get(opts, :warmup, true) do |
| 198 | + IO.puts("Warmup...\n") |
| 199 | + fun.() |
| 200 | + end |
| 201 | + |
| 202 | + :tprof.start() |
| 203 | + matching = Keyword.get(opts, :matching, {:_, :_, :_}) |
| 204 | + set_on_spawn = Keyword.get(opts, :set_on_spawn, true) |
| 205 | + {:ok, return_value} = :tprof.profile([], fun, matching, set_on_spawn: set_on_spawn) |
| 206 | + |
| 207 | + results = |
| 208 | + Enum.map(:tprof.dump(), fn {pid, call_results} -> |
| 209 | + parsed_calls = |
| 210 | + call_results |
| 211 | + |> filter_results(opts) |
| 212 | + |> sort_results(opts) |
| 213 | + |> add_totals() |
| 214 | + |
| 215 | + {pid, parsed_calls} |
| 216 | + end) |
| 217 | + |
| 218 | + :tprof.stop() |
| 219 | + |
| 220 | + {return_value, results} |
| 221 | + end |
| 222 | + |
| 223 | + defp filter_results(call_results, opts) do |
| 224 | + calls_opt = Keyword.get(opts, :calls, 0) |
| 225 | + time_opt = Keyword.get(opts, :time, 0) |
| 226 | + |
| 227 | + call_results |
| 228 | + |> Stream.filter(fn {_mfa, {count, _time}} -> count >= calls_opt end) |
| 229 | + |> Stream.filter(fn {_mfa, {_count, time}} -> time >= time_opt end) |
| 230 | + end |
| 231 | + |
| 232 | + defp sort_results(call_results, opts) do |
| 233 | + Enum.sort_by(call_results, sort_function(Keyword.get(opts, :sort, :time))) |
| 234 | + end |
| 235 | + |
| 236 | + defp sort_function(:time), do: fn {_mfa, {_count, time}} -> time end |
| 237 | + defp sort_function(:calls), do: fn {_mfa, {count, _time}} -> count end |
| 238 | + |
| 239 | + defp add_totals(call_results) do |
| 240 | + {function_count, call_count, total_time} = |
| 241 | + Enum.reduce(call_results, {0, 0, 0}, fn {_, {count, time}}, acc -> |
| 242 | + {function_count, call_count, total_time} = acc |
| 243 | + {function_count + 1, call_count + count, total_time + time} |
| 244 | + end) |
| 245 | + |
| 246 | + {function_count, call_results, call_count, total_time} |
| 247 | + end |
| 248 | + |
| 249 | + @header ["#", "CALLS", "%", "TIME", "µS/CALL"] |
| 250 | + |
| 251 | + defp print_output([]) do |
| 252 | + print_function_count(0) |
| 253 | + end |
| 254 | + |
| 255 | + defp print_output(results) do |
| 256 | + Enum.each(results, &print_result/1) |
| 257 | + end |
| 258 | + |
| 259 | + defp print_result({pid, {function_count, call_results, call_count, total_time}}) do |
| 260 | + formatted_rows = Enum.map(call_results, &format_row(&1, total_time)) |
| 261 | + formatted_total = format_total(total_time, call_count) |
| 262 | + |
| 263 | + column_lengths = column_lengths(@header, formatted_rows) |
| 264 | + |
| 265 | + IO.puts("") |
| 266 | + |
| 267 | + print_pid_row(pid) |
| 268 | + print_row(@header, column_lengths) |
| 269 | + print_row(formatted_total, column_lengths) |
| 270 | + Enum.each(formatted_rows, &print_row(&1, column_lengths)) |
| 271 | + |
| 272 | + IO.puts("") |
| 273 | + |
| 274 | + print_function_count(function_count) |
| 275 | + end |
| 276 | + |
| 277 | + defp print_pid_row(pid) do |
| 278 | + IO.puts("Profile results of #{inspect(pid)}") |
| 279 | + end |
| 280 | + |
| 281 | + defp format_row({{module, function, arity}, {count, time}}, total_time) do |
| 282 | + mfa = Exception.format_mfa(module, function, arity) |
| 283 | + time_percentage = :erlang.float_to_binary(100 * divide(time, total_time), [{:decimals, 2}]) |
| 284 | + time_per_call = :erlang.float_to_binary(divide(time, count), [{:decimals, 2}]) |
| 285 | + count = Integer.to_string(count) |
| 286 | + time = Integer.to_string(time) |
| 287 | + |
| 288 | + [mfa, count, time_percentage, time, time_per_call] |
| 289 | + end |
| 290 | + |
| 291 | + defp format_total(total_time, total_count) do |
| 292 | + time_per_call = :erlang.float_to_binary(divide(total_time, total_count), [{:decimals, 2}]) |
| 293 | + |
| 294 | + [ |
| 295 | + "Total", |
| 296 | + Integer.to_string(total_count), |
| 297 | + "100.00", |
| 298 | + Integer.to_string(total_time), |
| 299 | + time_per_call |
| 300 | + ] |
| 301 | + end |
| 302 | + |
| 303 | + defp divide(_, 0), do: 0.0 |
| 304 | + defp divide(t, n), do: t / n |
| 305 | + |
| 306 | + defp column_lengths(header, rows) do |
| 307 | + max_lengths = Enum.map(header, &String.length/1) |
| 308 | + |
| 309 | + Enum.reduce(rows, max_lengths, fn row, max_lengths -> |
| 310 | + Stream.map(row, &String.length/1) |
| 311 | + |> Stream.zip(max_lengths) |
| 312 | + |> Enum.map(&max/1) |
| 313 | + end) |
| 314 | + end |
| 315 | + |
| 316 | + defp max({a, b}) when a >= b, do: a |
| 317 | + defp max({_, b}), do: b |
| 318 | + |
| 319 | + @format "~-*s ~*s ~*s ~*s ~*s~n" |
| 320 | + |
| 321 | + defp print_row(row, column_lengths) do |
| 322 | + to_print = |
| 323 | + column_lengths |
| 324 | + |> Stream.zip(Stream.map(row, &String.to_charlist/1)) |
| 325 | + |> Enum.flat_map(&Tuple.to_list/1) |
| 326 | + |
| 327 | + :io.format(@format, to_print) |
| 328 | + end |
| 329 | + |
| 330 | + defp print_function_count(count) do |
| 331 | + IO.puts("Profile done over #{count} matching functions") |
| 332 | + end |
| 333 | +end |
0 commit comments