opentelemetry_sdk/trace/sampler.rs
1use opentelemetry::{
2 trace::{
3 Link, SamplingDecision, SamplingResult, SpanKind, TraceContextExt, TraceId, TraceState,
4 },
5 Context, KeyValue,
6};
7
8#[cfg(feature = "jaeger_remote_sampler")]
9mod jaeger_remote;
10
11#[cfg(feature = "jaeger_remote_sampler")]
12pub use jaeger_remote::{JaegerRemoteSampler, JaegerRemoteSamplerBuilder};
13#[cfg(feature = "jaeger_remote_sampler")]
14use opentelemetry_http::HttpClient;
15
16/// The [`ShouldSample`] interface allows implementations to provide samplers
17/// which will return a sampling [`SamplingResult`] based on information that
18/// is typically available just before the [`Span`] was created.
19///
20/// # Sampling
21///
22/// Sampling is a mechanism to control the noise and overhead introduced by
23/// OpenTelemetry by reducing the number of samples of traces collected and
24/// sent to the backend.
25///
26/// Sampling may be implemented on different stages of a trace collection.
27/// [OpenTelemetry SDK] defines a [`ShouldSample`] interface that can be used at
28/// instrumentation points by libraries to check the sampling [`SamplingDecision`]
29/// early and optimize the amount of telemetry that needs to be collected.
30///
31/// All other sampling algorithms may be implemented on SDK layer in exporters,
32/// or even out of process in Agent or Collector.
33///
34/// The OpenTelemetry API has two properties responsible for the data collection:
35///
36/// * [`Span::is_recording()`]. If `true` the current [`Span`] records
37/// tracing events (attributes, events, status, etc.), otherwise all tracing
38/// events are dropped. Users can use this property to determine if expensive
39/// trace events can be avoided. [`SpanProcessor`]s will receive
40/// all spans with this flag set. However, [`SpanExporter`]s will
41/// not receive them unless the `Sampled` flag was set.
42/// * `Sampled` flag in [`SpanContext::trace_flags()`]. This flag is propagated
43/// via the [`SpanContext`] to child Spans. For more details see the [W3C
44/// specification](https://w3c.github.io/trace-context/). This flag indicates
45/// that the [`Span`] has been `sampled` and will be exported. [`SpanProcessor`]s
46/// and [`SpanExporter`]s will receive spans with the `Sampled` flag set for
47/// processing.
48///
49/// The flag combination `Sampled == false` and `is_recording == true` means
50/// that the current `Span` does record information, but most likely the child
51/// `Span` will not.
52///
53/// The flag combination `Sampled == true` and `is_recording == false` could
54/// cause gaps in the distributed trace, and because of this OpenTelemetry API
55/// MUST NOT allow this combination.
56///
57/// [OpenTelemetry SDK]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#sampling
58/// [`SpanContext`]: opentelemetry::trace::SpanContext
59/// [`SpanContext::trace_flags()`]: opentelemetry::trace::SpanContext#method.trace_flags
60/// [`SpanExporter`]: crate::export::trace::SpanExporter
61/// [`SpanProcessor`]: crate::trace::SpanProcessor
62/// [`Span`]: opentelemetry::trace::Span
63/// [`Span::is_recording()`]: opentelemetry::trace::Span#tymethod.is_recording
64pub trait ShouldSample: CloneShouldSample + Send + Sync + std::fmt::Debug {
65 /// Returns the [`SamplingDecision`] for a [`Span`] to be created.
66 ///
67 /// The [`should_sample`] function can use any of the information provided to it in order to
68 /// make a decision about whether or not a [`Span`] should or should not be sampled. However,
69 /// there are performance implications on the creation of a span
70 ///
71 /// [`Span`]: opentelemetry::trace::Span
72 /// [`should_sample`]: ShouldSample::should_sample
73 #[allow(clippy::too_many_arguments)]
74 fn should_sample(
75 &self,
76 parent_context: Option<&Context>,
77 trace_id: TraceId,
78 name: &str,
79 span_kind: &SpanKind,
80 attributes: &[KeyValue],
81 links: &[Link],
82 ) -> SamplingResult;
83}
84
85/// This trait should not be used directly instead users should use [`ShouldSample`].
86pub trait CloneShouldSample {
87 fn box_clone(&self) -> Box<dyn ShouldSample>;
88}
89
90impl<T> CloneShouldSample for T
91where
92 T: ShouldSample + Clone + 'static,
93{
94 fn box_clone(&self) -> Box<dyn ShouldSample> {
95 Box::new(self.clone())
96 }
97}
98
99impl Clone for Box<dyn ShouldSample> {
100 fn clone(&self) -> Self {
101 self.box_clone()
102 }
103}
104
105/// Default Sampling options
106///
107/// The [built-in samplers] allow for simple decisions. For more complex scenarios consider
108/// implementing your own sampler using [`ShouldSample`] trait.
109///
110/// [built-in samplers]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#built-in-samplers
111#[derive(Clone, Debug)]
112#[non_exhaustive]
113pub enum Sampler {
114 /// Always sample the trace
115 AlwaysOn,
116 /// Never sample the trace
117 AlwaysOff,
118 /// Respects the parent span's sampling decision or delegates a delegate sampler for root spans.
119 ParentBased(Box<dyn ShouldSample>),
120 /// Sample a given fraction of traces. Fractions >= 1 will always sample. If the parent span is
121 /// sampled, then it's child spans will automatically be sampled. Fractions < 0 are treated as
122 /// zero, but spans may still be sampled if their parent is.
123 /// *Note:* If this is used then all Spans in a trace will become sampled assuming that the
124 /// first span is sampled as it is based on the `trace_id` not the `span_id`
125 TraceIdRatioBased(f64),
126 /// Jaeger remote sampler supports any remote service that implemented the jaeger remote sampler protocol.
127 /// The proto definition can be found [here](https://github.com/jaegertracing/jaeger-idl/blob/main/proto/api_v2/sampling.proto)
128 ///
129 /// Jaeger remote sampler allows remotely controlling the sampling configuration for the SDKs.
130 /// The sampling is typically configured at the collector and the SDKs actively poll for changes.
131 /// The sampler uses TraceIdRatioBased or rate-limited sampler under the hood.
132 /// These samplers can be configured per whole service (a.k.a default), or per span name in a
133 /// given service (a.k.a per operation).
134 #[cfg(feature = "jaeger_remote_sampler")]
135 JaegerRemote(JaegerRemoteSampler),
136}
137
138impl Sampler {
139 /// Create a jaeger remote sampler builder.
140 ///
141 /// ### Arguments
142 /// * `runtime` - A runtime to run the HTTP client.
143 /// * `http_client` - An HTTP client to query the sampling endpoint.
144 /// * `default_sampler` - A default sampler to make a sampling decision when the remote is unavailable or before the SDK receives the first response from remote.
145 /// * `service_name` - The name of the service. This is a required parameter to query the sampling endpoint.
146 ///
147 /// See [here](https://github.com/open-telemetry/opentelemetry-rust/blob/main/examples/jaeger-remote-sampler/src/main.rs) for an example.
148 #[cfg(feature = "jaeger_remote_sampler")]
149 pub fn jaeger_remote<C, Sampler, R, Svc>(
150 runtime: R,
151 http_client: C,
152 default_sampler: Sampler,
153 service_name: Svc,
154 ) -> JaegerRemoteSamplerBuilder<C, Sampler, R>
155 where
156 C: HttpClient + 'static,
157 Sampler: ShouldSample,
158 R: crate::runtime::RuntimeChannel,
159 Svc: Into<String>,
160 {
161 JaegerRemoteSamplerBuilder::new(runtime, http_client, default_sampler, service_name)
162 }
163}
164
165impl ShouldSample for Sampler {
166 fn should_sample(
167 &self,
168 parent_context: Option<&Context>,
169 trace_id: TraceId,
170 name: &str,
171 span_kind: &SpanKind,
172 attributes: &[KeyValue],
173 links: &[Link],
174 ) -> SamplingResult {
175 let decision = match self {
176 // Always sample the trace
177 Sampler::AlwaysOn => SamplingDecision::RecordAndSample,
178 // Never sample the trace
179 Sampler::AlwaysOff => SamplingDecision::Drop,
180 // The parent decision if sampled; otherwise the decision of delegate_sampler
181 Sampler::ParentBased(delegate_sampler) => parent_context
182 .filter(|cx| cx.has_active_span())
183 .map_or_else(
184 || {
185 delegate_sampler
186 .should_sample(
187 parent_context,
188 trace_id,
189 name,
190 span_kind,
191 attributes,
192 links,
193 )
194 .decision
195 },
196 |ctx| {
197 let span = ctx.span();
198 let parent_span_context = span.span_context();
199 if parent_span_context.is_sampled() {
200 SamplingDecision::RecordAndSample
201 } else {
202 SamplingDecision::Drop
203 }
204 },
205 ),
206 // Probabilistically sample the trace.
207 Sampler::TraceIdRatioBased(prob) => sample_based_on_probability(prob, trace_id),
208 #[cfg(feature = "jaeger_remote_sampler")]
209 Sampler::JaegerRemote(remote_sampler) => {
210 remote_sampler
211 .should_sample(parent_context, trace_id, name, span_kind, attributes, links)
212 .decision
213 }
214 };
215 SamplingResult {
216 decision,
217 // No extra attributes ever set by the SDK samplers.
218 attributes: Vec::new(),
219 // all sampler in SDK will not modify trace state.
220 trace_state: match parent_context {
221 Some(ctx) => ctx.span().span_context().trace_state().clone(),
222 None => TraceState::default(),
223 },
224 }
225 }
226}
227
228pub(crate) fn sample_based_on_probability(prob: &f64, trace_id: TraceId) -> SamplingDecision {
229 if *prob >= 1.0 {
230 SamplingDecision::RecordAndSample
231 } else {
232 let prob_upper_bound = (prob.max(0.0) * (1u64 << 63) as f64) as u64;
233 // TODO: update behavior when the spec definition resolves
234 // https://github.com/open-telemetry/opentelemetry-specification/issues/1413
235 let bytes = trace_id.to_bytes();
236 let (_, low) = bytes.split_at(8);
237 let trace_id_low = u64::from_be_bytes(low.try_into().unwrap());
238 let rnd_from_trace_id = trace_id_low >> 1;
239
240 if rnd_from_trace_id < prob_upper_bound {
241 SamplingDecision::RecordAndSample
242 } else {
243 SamplingDecision::Drop
244 }
245 }
246}
247
248#[cfg(all(test, feature = "testing", feature = "trace"))]
249mod tests {
250 use super::*;
251 use crate::testing::trace::TestSpan;
252 use opentelemetry::trace::{SpanContext, SpanId, TraceFlags};
253 use rand::Rng;
254
255 #[rustfmt::skip]
256 fn sampler_data() -> Vec<(&'static str, Sampler, f64, bool, bool)> {
257 vec![
258 // Span w/o a parent
259 ("never_sample", Sampler::AlwaysOff, 0.0, false, false),
260 ("always_sample", Sampler::AlwaysOn, 1.0, false, false),
261 ("ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, false, false),
262 ("ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, false, false),
263 ("ratio_.50", Sampler::TraceIdRatioBased(0.50), 0.5, false, false),
264 ("ratio_.75", Sampler::TraceIdRatioBased(0.75), 0.75, false, false),
265 ("ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, false, false),
266
267 // Spans w/o a parent delegate
268 ("delegate_to_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 1.0, false, false),
269 ("delegate_to_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 0.0, false, false),
270 ("delegate_to_ratio_-1", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(-1.0))), 0.0, false, false),
271 ("delegate_to_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 0.25, false, false),
272 ("delegate_to_ratio_.50", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.50))), 0.50, false, false),
273 ("delegate_to_ratio_.75", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.75))), 0.75, false, false),
274 ("delegate_to_ratio_2.0", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(2.0))), 1.0, false, false),
275
276 // Spans with a parent that is *not* sampled act like spans w/o a parent
277 ("unsampled_parent_with_ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, true, false),
278 ("unsampled_parent_with_ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, true, false),
279 ("unsampled_parent_with_ratio_.50", Sampler::TraceIdRatioBased(0.50), 0.5, true, false),
280 ("unsampled_parent_with_ratio_.75", Sampler::TraceIdRatioBased(0.75), 0.75, true, false),
281 ("unsampled_parent_with_ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, true, false),
282 ("unsampled_parent_or_else_with_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 0.0, true, false),
283 ("unsampled_parent_or_else_with_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 0.0, true, false),
284 ("unsampled_parent_or_else_with_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 0.0, true, false),
285
286 // A ratio sampler with a parent that is sampled will ignore the parent
287 ("sampled_parent_with_ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, true, true),
288 ("sampled_parent_with_ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, true, true),
289 ("sampled_parent_with_ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, true, true),
290
291 // Spans with a parent that is sampled, will always sample, regardless of the delegate sampler
292 ("sampled_parent_or_else_with_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 1.0, true, true),
293 ("sampled_parent_or_else_with_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 1.0, true, true),
294 ("sampled_parent_or_else_with_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 1.0, true, true),
295
296 // Spans with a sampled parent, but when using the NeverSample Sampler, aren't sampled
297 ("sampled_parent_span_with_never_sample", Sampler::AlwaysOff, 0.0, true, true),
298 ]
299 }
300
301 #[test]
302 fn sampling() {
303 let total = 10_000;
304 let mut rng = rand::thread_rng();
305 for (name, sampler, expectation, parent, sample_parent) in sampler_data() {
306 let mut sampled = 0;
307 for _ in 0..total {
308 let parent_context = if parent {
309 let trace_flags = if sample_parent {
310 TraceFlags::SAMPLED
311 } else {
312 TraceFlags::default()
313 };
314 let span_context = SpanContext::new(
315 TraceId::from_u128(1),
316 SpanId::from_u64(1),
317 trace_flags,
318 false,
319 TraceState::default(),
320 );
321
322 Some(Context::current_with_span(TestSpan(span_context)))
323 } else {
324 None
325 };
326
327 let trace_id = TraceId::from(rng.gen::<u128>());
328 if sampler
329 .should_sample(
330 parent_context.as_ref(),
331 trace_id,
332 name,
333 &SpanKind::Internal,
334 &[],
335 &[],
336 )
337 .decision
338 == SamplingDecision::RecordAndSample
339 {
340 sampled += 1;
341 }
342 }
343 let mut tolerance = 0.0;
344 let got = sampled as f64 / total as f64;
345
346 if expectation > 0.0 && expectation < 1.0 {
347 // See https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
348 let z = 4.75342; // This should succeed 99.9999% of the time
349 tolerance = z * (got * (1.0 - got) / total as f64).sqrt();
350 }
351
352 let diff = (got - expectation).abs();
353 assert!(
354 diff <= tolerance,
355 "{} got {:?} (diff: {}), expected {} (w/tolerance: {})",
356 name,
357 got,
358 diff,
359 expectation,
360 tolerance
361 );
362 }
363 }
364
365 #[test]
366 fn clone_a_parent_sampler() {
367 let sampler = Sampler::ParentBased(Box::new(Sampler::AlwaysOn));
368 #[allow(clippy::redundant_clone)]
369 let cloned_sampler = sampler.clone();
370
371 let cx = Context::current_with_value("some_value");
372
373 let result = sampler.should_sample(
374 Some(&cx),
375 TraceId::from_u128(1),
376 "should sample",
377 &SpanKind::Internal,
378 &[],
379 &[],
380 );
381
382 let cloned_result = cloned_sampler.should_sample(
383 Some(&cx),
384 TraceId::from_u128(1),
385 "should sample",
386 &SpanKind::Internal,
387 &[],
388 &[],
389 );
390
391 assert_eq!(result, cloned_result);
392 }
393
394 #[test]
395 fn parent_sampler() {
396 // name, delegate, context(with or without parent), expected decision
397 let test_cases = vec![
398 (
399 "should using delegate sampler",
400 Sampler::AlwaysOn,
401 Context::new(),
402 SamplingDecision::RecordAndSample,
403 ),
404 (
405 "should use parent result, always off",
406 Sampler::AlwaysOn,
407 Context::current_with_span(TestSpan(SpanContext::new(
408 TraceId::from_u128(1),
409 SpanId::from_u64(1),
410 TraceFlags::default(), // not sampling
411 false,
412 TraceState::default(),
413 ))),
414 SamplingDecision::Drop,
415 ),
416 (
417 "should use parent result, always on",
418 Sampler::AlwaysOff,
419 Context::current_with_span(TestSpan(SpanContext::new(
420 TraceId::from_u128(1),
421 SpanId::from_u64(1),
422 TraceFlags::SAMPLED, // not sampling
423 false,
424 TraceState::default(),
425 ))),
426 SamplingDecision::RecordAndSample,
427 ),
428 ];
429
430 for (name, delegate, parent_cx, expected) in test_cases {
431 let sampler = Sampler::ParentBased(Box::new(delegate));
432 let result = sampler.should_sample(
433 Some(&parent_cx),
434 TraceId::from_u128(1),
435 name,
436 &SpanKind::Internal,
437 &[],
438 &[],
439 );
440
441 assert_eq!(result.decision, expected);
442 }
443 }
444}