Skip to content

Commit 4e74433

Browse files
authored
Introduce the tracer pattern, and new patterns for configuration. (#48)
1 parent 32cf698 commit 4e74433

28 files changed

+863
-735
lines changed

.credo.exs

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
#
2121
# You can give explicit globs or simply directories.
2222
# In the latter case `**/*.{ex,exs}` will be used.
23-
#
24-
included: ["lib/", "src/", "web/", "apps/"],
23+
#
24+
included: ["lib/", "src/", "test/", "web/", "apps/"],
2525
excluded: [~r"/_build/", ~r"/deps/"]
2626
},
2727
#
@@ -30,11 +30,6 @@
3030
#
3131
requires: [],
3232
#
33-
# Credo automatically checks for updates, like e.g. Hex does.
34-
# You can disable this behaviour below:
35-
#
36-
check_for_updates: true,
37-
#
3833
# If you want to enforce a style guide and need a more traditional linting
3934
# experience, you can change `strict` to `true` below:
4035
#
@@ -53,40 +48,46 @@
5348
# {Credo.Check.Design.DuplicatedCode, false}
5449
#
5550
checks: [
51+
#
52+
## Consistency Checks
53+
#
5654
{Credo.Check.Consistency.ExceptionNames},
5755
{Credo.Check.Consistency.LineEndings},
5856
{Credo.Check.Consistency.ParameterPatternMatching},
5957
{Credo.Check.Consistency.SpaceAroundOperators},
6058
{Credo.Check.Consistency.SpaceInParentheses},
6159
{Credo.Check.Consistency.TabsOrSpaces},
6260

63-
# For some checks, like AliasUsage, you can only customize the priority
61+
#
62+
## Design Checks
63+
#
64+
# You can customize the priority of any check
6465
# Priority values are: `low, normal, high, higher`
6566
#
66-
{Credo.Check.Design.AliasUsage, false},
67-
68-
# For others you can set parameters
69-
67+
# For some checks, you can also set other parameters
68+
#
7069
# If you don't want the `setup` and `test` macro calls in ExUnit tests
7170
# or the `schema` macro in Ecto schemas to trigger DuplicatedCode, just
7271
# set the `excluded_macros` parameter to `[:schema, :setup, :test]`.
7372
#
7473
{Credo.Check.Design.DuplicatedCode, excluded_macros: []},
75-
7674
# You can also customize the exit_status of each check.
7775
# If you don't want TODO comments to cause `mix credo` to fail, just
7876
# set this value to 0 (zero).
7977
#
80-
{Credo.Check.Design.TagTODO, exit_status: 2},
78+
{Credo.Check.Design.TagTODO, exit_status: 0},
8179
{Credo.Check.Design.TagFIXME},
8280

81+
#
82+
## Readability Checks
83+
#
8384
{Credo.Check.Readability.FunctionNames},
8485
{Credo.Check.Readability.LargeNumbers},
85-
{Credo.Check.Readability.MaxLineLength, false},
86+
{Credo.Check.Readability.MaxLineLength, priority: :low, max_length: 100},
8687
{Credo.Check.Readability.ModuleAttributeNames},
8788
{Credo.Check.Readability.ModuleDoc},
8889
{Credo.Check.Readability.ModuleNames},
89-
{Credo.Check.Readability.ParenthesesOnZeroArityDefs, false},
90+
{Credo.Check.Readability.ParenthesesOnZeroArityDefs},
9091
{Credo.Check.Readability.ParenthesesInCondition},
9192
{Credo.Check.Readability.PredicateFunctionNames},
9293
{Credo.Check.Readability.PreferImplicitTry},
@@ -98,6 +99,9 @@
9899
{Credo.Check.Readability.Semicolons},
99100
{Credo.Check.Readability.SpaceAfterCommas},
100101

102+
#
103+
## Refactoring Opportunities
104+
#
101105
{Credo.Check.Refactor.DoubleBooleanNegation},
102106
{Credo.Check.Refactor.CondStatements},
103107
{Credo.Check.Refactor.CyclomaticComplexity},
@@ -107,10 +111,15 @@
107111
{Credo.Check.Refactor.NegatedConditionsInUnless},
108112
{Credo.Check.Refactor.NegatedConditionsWithElse},
109113
{Credo.Check.Refactor.Nesting},
110-
{Credo.Check.Refactor.PipeChainStart},
114+
{Credo.Check.Refactor.PipeChainStart,
115+
excluded_argument_types: [:atom, :binary, :fn, :keyword], excluded_functions: []},
111116
{Credo.Check.Refactor.UnlessWithElse},
112117

118+
#
119+
## Warnings
120+
#
113121
{Credo.Check.Warning.BoolOperationOnSameValues},
122+
{Credo.Check.Warning.ExpensiveEmptyEnumCheck},
114123
{Credo.Check.Warning.IExPry},
115124
{Credo.Check.Warning.IoInspect},
116125
{Credo.Check.Warning.LazyLogging},
@@ -126,6 +135,7 @@
126135
{Credo.Check.Warning.UnusedTupleOperation},
127136
{Credo.Check.Warning.RaiseInsideRescue},
128137

138+
#
129139
# Controversial and experimental checks (opt-in, just remove `, false`)
130140
#
131141
{Credo.Check.Refactor.ABCSize, false},
@@ -134,14 +144,12 @@
134144
{Credo.Check.Warning.MapGetUnsafePass, false},
135145
{Credo.Check.Consistency.MultiAliasImportRequireUse, false},
136146

147+
#
137148
# Deprecated checks (these will be deleted after a grace period)
138149
#
139-
{Credo.Check.Readability.Specs, false},
140-
{Credo.Check.Warning.NameRedeclarationByAssignment, false},
141-
{Credo.Check.Warning.NameRedeclarationByCase, false},
142-
{Credo.Check.Warning.NameRedeclarationByDef, false},
143-
{Credo.Check.Warning.NameRedeclarationByFn, false},
150+
{Credo.Check.Readability.Specs, false}
144151

152+
#
145153
# Custom checks can be created using `mix credo.gen.check`.
146154
#
147155
]

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
language: elixir
22
elixir:
3-
- 1.4.0
3+
- 1.5.0
44
otp_release:
55
- 19.0
66
script:
77
- mix compile --warnings-as-errors
8-
- mix credo --strict
8+
- mix credo
99
- mix coveralls.travis
1010
after_script:
1111
- mix deps.get --only docs

README.md

Lines changed: 63 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10,85 +10,84 @@ View the [documentation](https://hexdocs.pm/spandex)
1010

1111
Spandex is a platform agnostic tracing library. Currently there is only a datadog APM adapter, but its designed to be able to have more adapters written for it.
1212

13+
This library is undergoing some structural changes for future versions. This documentation will be kept up to date, but if there are any inconsistencies, don't hesitate to make an issue.
14+
1315
## Installation
16+
1417
```elixir
1518
def deps do
16-
[{:spandex, "~> 1.3.4"}]
19+
[{:spandex, "~> 1.4.0"}]
1720
end
1821
```
19-
## Warning
20-
21-
Don't use the endpoint/channel configuration in your production environment. We saw a significant increase in scheduler/cpu load during high traffic times due to this feature. It was intended to provide a way to write custom visualizations by subscribing to a channel. We haven't removed it yet, but we probably will soon.
2222

23-
## Performance
23+
## Setup and Configuration
2424

25-
Originally, the library had an api server and spans were sent via `GenServer.cast`, but we've seen the need to introduce backpressure, and limit the overall amount of requests made. As such, there are two new configuration options (also shown in the configuration section below)
25+
Define your tracer:
2626

2727
```elixir
28-
config :spandex, :datadog,
29-
batch_size: 10,
30-
sync_threshold: 20
28+
defmodule MyApp.Tracer do
29+
use Spandex.Tracer, otp_app: :mya_app
30+
end
3131
```
3232

33-
Batch size refers to *traces* not spans, so if you send a large amount of spans per trace, then you probably want to keep that number low. If you send only a few spans, then you could set it significantly higher.
34-
35-
Sync threshold refers to the *number of processes concurrently sending spans*. *NOT* the number of traces queued up waiting to be sent. It is used to apply backpressure while still taking advantage of parallelism. Ideally, the sync threshold would be set to a point that you wouldn't reasonably reach often, but that is low enough to not cause systemic performance issues if you don't apply backpressure. A simple way to think about it is that if you are seeing 1000 request per second, and your batch size is 10, then you'll be making 100 requests per second to datadog(probably a bad config). But if your sync_threshold is set to 10, you'll almost certainly exceed that because 100 requests in 1 second will likely overlap in that way. So when that is exceeded, the work is done synchronously, (not waiting for the asynchronous ones to complete even). This concept of backpressure is very important, and strategies for switching to synchronous operation are often surprisingly far more performant than purely asynchronous strategies (and much more predictable).
33+
Configure it:
3634

37-
## Configuration
35+
```elixir
36+
config :my_app, MyApp.Tracer,
37+
service: :my_api,
38+
adapter: Spandex.Adapters.Datadog,
39+
disabled?: false,
40+
env: "PROD"
41+
```
3842

39-
Spandex uses `Confex` under the hood. See the formats usable for declaring values at their [documentation](https://github.com/Nebo15/confex)
43+
Or at runtime, by calling `configure/1` (usually in your application's startup)
4044

4145
```elixir
42-
config :spandex,
43-
service: :my_api, # required, default service name
44-
adapter: Spandex.Adapters.Datadog, # required
45-
disabled?: {:system, :boolean, "DISABLE_SPANDEX", false},
46-
env: {:system, "APM_ENVIRONMENT", "unknown"},
47-
application: :my_app,
48-
ignored_methods: ["OPTIONS"],
49-
# ignored routes accepts regexes, and strings. If it is a string it must match exactly.
50-
ignored_routes: [~r/health_check/, "/status"],
51-
# do not set the following configurations unless you are sure.
52-
log_traces?: false # You probably don't want this to be on, *especially* if you have high load. For debugging.
46+
MyApp.Tracer.configure(disabled?: Mix.env == :test)
5347
```
5448

55-
Even though datadog is the only adapter currently, configurations are still namespaced by the adapter to allow adding more in the future.
49+
For more information on tracer configuration, view the docs for `Spandex.Tracer`. There you will find the documentation for the opts schema. The entire configuration can also be passed into each function in your tracer to be overridden if desired. For example:
50+
51+
`MyApp.Tracer.start_span("span_name", service: :some_special_service)`
52+
53+
Your configuration and the configuration in your config files is merged together, to avoid needing to specify this config at all times.
54+
55+
To bypass the tracer pattern entirely, you can call directly into the functions in `Spandex`, like `Spandex.start_span("span_name", [adapter: Foo, service: :bar])`
56+
57+
### Adapter specific configuration
58+
59+
To start the datadog adapter, add a worker to your application's supervisor
5660

5761
```elixir
58-
config :spandex, :datadog,
59-
host: {:system, "DATADOG_HOST", "localhost"},
60-
port: {:system, "DATADOG_PORT", 8126},
61-
batch_size: 10,
62-
sync_threshold: 20,
63-
services: [ # for defaults mapping in spans service => type
64-
ecto: :db,
65-
my_api: :web,
66-
my_cache: :cache,
67-
],
68-
# Do not set the following configurations unless you are sure.
69-
api_adapter: Spandex.Datadog.ApiServer, # Traces will get sent in background
70-
asynchronous_send?: true, # Defaults to `true`. no reason to change it except perhaps for testing purposes. If changed, expect performance impacts.
71-
endpoint: MyApp.Endpoint, # See notice about potential performance impacts from publishing traces to channels.
72-
channel: "spandex_traces", # If endpoint and channel are set, all traces will be broadcast across that channel
62+
# Example configuration
63+
opts =
64+
[
65+
host: System.get_env("DATADOG_HOST") || "localhost",
66+
port: System.get_env("DATADOG_PORT") || 8126,
67+
batch_size: System.get_env("SPANDEX_BATCH_SIZE") || 10,
68+
sync_threshold: System.get_env("SPANDEX_SYNC_THRESHOLD") || 100,
69+
http: HTTPoison
70+
]
71+
72+
# in your supervision tree
73+
74+
worker(Spandex.Datadog.ApiServer, [opts])
7375
```
7476

7577
## Phoenix Plugs
7678

7779
There are 3 plugs provided for usage w/ Phoenix:
7880

79-
* `Spandex.Plug.StartTrace`
80-
* `Spandex.Plug.AddContext`
81-
* `Spandex.Plug.EndTrace`
82-
83-
`Spandex.Plug.AddContext` can be modified to include options for `:allowed_route_replacements` and `:disallowed_route_replacements`, so a route of `:base_route/:id/:relationship` would only have `:base_route` and `:relationship` swapped to their param values if included in `:allowed_route_replacements` and not included in `:disallowed_route_replacements`.
84-
85-
Ensure that `Spandex.Plug.EndTrace` goes *after* your router. This is important because we want rendering the response to be included in the tracing/timing. Put `Spandex.Plug.StartTrace` as early as is reasonable in your pipeline. Put `Spandex.Plug.AddContext` either after router or inside a pipeline in router.
81+
* `Spandex.Plug.StartTrace` - See moduledocs for options. Goes as early in your pipeline as possible.
82+
* `Spandex.Plug.AddContext` - See moduledocs for options. Either after the router, or inside a pipeline in the router.
83+
* `Spandex.Plug.EndTrace` - Must go *after* your router.
8684

8785
## Distributed Tracing
8886

8987
Distributed tracing is supported via headers `x-datadog-trace-id` and `x-datadog-parent-id`. If they are set, the `StartTrace` plug will act accordingly, continuing that trace and span instead of starting a new one. *Both* must be set for distributed tracing to work.
9088

9189
## Logger metadata
90+
9291
In general, you'll probably want the current span_id and trace_id in your logs, so that you can find them in your tracing service. Make sure to add `span_id` and `trace_id` to logger_metadata
9392

9493
```elixir
@@ -106,29 +105,29 @@ defmodule ManuallyTraced do
106105

107106
# Does not handle exceptions for you.
108107
def trace_me() do
109-
_ = Spandex.start_trace("my_trace") #also opens a span
110-
_ = Spandex.update_span(%{service: :my_app, type: :db})
108+
_ = Tracer.start_trace("my_trace") #also opens a span
109+
_ = Tracer.update_span(%{service: :my_app, type: :db})
111110

112111
result = span_me()
113112

114-
_ = Spandex.finish_trace()
113+
_ = Tracer.finish_trace()
115114

116115
result
117116
end
118117

119118
# Does not handle exceptions for you.
120119
def span_me() do
121-
_ = Spandex.start_span("this_span")
122-
_ = Spandex.update_span(%{service: :my_app, type: :web})
120+
_ = Tracer.start_span("this_span")
121+
_ = Tracer.update_span(%{service: :my_app, type: :web})
123122

124123
result = span_me_also()
125124

126-
_ = Spandex.finish_span()
125+
_ = Tracer.finish_span()
127126
end
128127

129128
# Handles exception at the span level. Trace still must be reported.
130129
def span_me_also() do
131-
Spandex.span("span_me_also) do
130+
Tracer.span("span_me_also) do
132131
...
133132
end
134133
end
@@ -139,4 +138,12 @@ Spandex used to ship with function decorators, but those decorators had a habit
139138
140139
## Asynchronous Processes
141140
142-
The current trace_id and span_id can be retrieved with `Spandex.current_trace_id()` and `Spandex.current_span_id()`. This can then be used as `Spandex.continue_trace("new_trace", trace_id, span_id)`. New spans can then be logged from there and will be sent in a separate batch.
141+
The current trace_id and span_id can be retrieved with `Tracer.current_trace_id()` and `Tracer.current_span_id()`. This can then be used as `Tracer.continue_trace("new_trace", trace_id, span_id)`. New spans can then be logged from there and will be sent in a separate batch.
142+
143+
## Datadog Api Sender Performance
144+
145+
Originally, the library had an api server and spans were sent via `GenServer.cast`, but we've seen the need to introduce backpressure, and limit the overall amount of requests made. As such, the datadog api sender accepts `batch_size` and `sync_threshold` options.
146+
147+
Batch size refers to *traces* not spans, so if you send a large amount of spans per trace, then you probably want to keep that number low. If you send only a few spans, then you could set it significantly higher.
148+
149+
Sync threshold refers to the *number of processes concurrently sending spans*. *NOT* the number of traces queued up waiting to be sent. It is used to apply backpressure while still taking advantage of parallelism. Ideally, the sync threshold would be set to a point that you wouldn't reasonably reach often, but that is low enough to not cause systemic performance issues if you don't apply backpressure. A simple way to think about it is that if you are seeing 1000 request per second, and your batch size is 10, then you'll be making 100 requests per second to datadog(probably a bad config). But if your sync_threshold is set to 10, you'll almost certainly exceed that because 100 requests in 1 second will likely overlap in that way. So when that is exceeded, the work is done synchronously, (not waiting for the asynchronous ones to complete even). This concept of backpressure is very important, and strategies for switching to synchronous operation are often surprisingly far more performant than purely asynchronous strategies (and much more predictable).

config/test.exs

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,15 @@
11
use Mix.Config
22

3+
# TODO: Update this!
4+
35
config :logger, :console,
46
level: :debug,
57
colors: [enabled: false],
68
format: "$time $metadata[$level] $message\n",
79
metadata: [:trace_id, :span_id]
810

9-
config :spandex,
11+
config :spandex, Spandex.Test.Support.Tracer,
1012
service: :spandex_test,
1113
adapter: Spandex.Adapters.Datadog,
12-
disabled?: false,
1314
env: "test",
14-
application: :spandex,
15-
ignored_methods: ["OPTIONS"],
16-
ignored_routes: [~r/healthz/],
17-
log_traces?: false
18-
19-
config :spandex, :datadog,
20-
host: "datadog",
21-
port: 8126,
22-
services: [
23-
spandex_test: :job
24-
],
25-
api_adapter: Spandex.Datadog.TestApiServer
15+
sender: Spandex.Test.DatadogTestApiServer

lib/adapters/adapter.ex

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,18 @@ defmodule Spandex.Adapters.Adapter do
44
implementations of reporting/aggregating spans while still using the spandex
55
internal implementation.
66
"""
7-
@callback start_trace(String.t()) :: {:ok, term} | {:error, term}
8-
@callback start_span(String.t()) :: {:ok, term} | {:error, term}
9-
@callback update_span(map) :: :ok | {:error, term}
10-
@callback finish_span() :: :ok | {:error, term}
11-
@callback finish_trace() :: :ok | {:error, term}
12-
@callback span_error(Exception.t()) :: :ok | {:error, term}
13-
@callback current_trace_id() :: term | nil | {:error, term}
14-
@callback current_span_id() :: term | nil | {:error, term}
15-
@callback current_span() :: term | nil
16-
@callback continue_trace(String.t(), term, term) :: {:ok, term} | {:error, term}
17-
@callback continue_trace_from_span(String.t(), map) :: {:ok, term} | {:error, term}
18-
@callback update_top_span(map) :: :ok | {:error, term}
19-
@callback update_all_spans(map) :: :ok | {}
20-
@callback distributed_context(Plug.Conn.t()) :: {:ok, term} | {:error, term}
7+
@callback start_trace(String.t(), Keyword.t()) :: {:ok, term} | {:error, term}
8+
@callback start_span(String.t(), Keyword.t()) :: {:ok, term} | {:error, term}
9+
@callback update_span(map, Keyword.t()) :: :ok | {:error, term}
10+
@callback finish_span(Keyword.t()) :: :ok | {:error, term}
11+
@callback finish_trace(Keyword.t()) :: :ok | {:error, term}
12+
@callback span_error(Exception.t(), Keyword.t()) :: :ok | {:error, term}
13+
@callback current_trace_id(Keyword.t()) :: term | nil | {:error, term}
14+
@callback current_span_id(Keyword.t()) :: term | nil | {:error, term}
15+
@callback current_span(Keyword.t()) :: term | nil
16+
@callback continue_trace(String.t(), term, term, Keyword.t()) :: {:ok, term} | {:error, term}
17+
@callback continue_trace_from_span(String.t(), map, Keyword.t()) :: {:ok, term} | {:error, term}
18+
@callback update_top_span(map, Keyword.t()) :: :ok | {:error, term}
19+
@callback update_all_spans(map, Keyword.t()) :: :ok | {}
20+
@callback distributed_context(Plug.Conn.t(), Keyword.t()) :: {:ok, term} | {:error, term}
2121
end

0 commit comments

Comments
 (0)