opentelemetry_sdk/trace/
sampler.rs

1use opentelemetry::{
2    trace::{
3        Link, SamplingDecision, SamplingResult, SpanKind, TraceContextExt, TraceId, TraceState,
4    },
5    Context, KeyValue,
6};
7
8#[cfg(feature = "jaeger_remote_sampler")]
9mod jaeger_remote;
10
11#[cfg(feature = "jaeger_remote_sampler")]
12pub use jaeger_remote::{JaegerRemoteSampler, JaegerRemoteSamplerBuilder};
13#[cfg(feature = "jaeger_remote_sampler")]
14use opentelemetry_http::HttpClient;
15
16/// The [`ShouldSample`] interface allows implementations to provide samplers
17/// which will return a sampling [`SamplingResult`] based on information that
18/// is typically available just before the [`Span`] was created.
19///
20/// # Sampling
21///
22/// Sampling is a mechanism to control the noise and overhead introduced by
23/// OpenTelemetry by reducing the number of samples of traces collected and
24/// sent to the backend.
25///
26/// Sampling may be implemented on different stages of a trace collection.
27/// [OpenTelemetry SDK] defines a [`ShouldSample`] interface that can be used at
28/// instrumentation points by libraries to check the sampling [`SamplingDecision`]
29/// early and optimize the amount of telemetry that needs to be collected.
30///
31/// All other sampling algorithms may be implemented on SDK layer in exporters,
32/// or even out of process in Agent or Collector.
33///
34/// The OpenTelemetry API has two properties responsible for the data collection:
35///
36/// * [`Span::is_recording()`]. If `true` the current [`Span`] records
37///   tracing events (attributes, events, status, etc.), otherwise all tracing
38///   events are dropped. Users can use this property to determine if expensive
39///   trace events can be avoided. [`SpanProcessor`]s will receive
40///   all spans with this flag set. However, [`SpanExporter`]s will
41///   not receive them unless the `Sampled` flag was set.
42/// * `Sampled` flag in [`SpanContext::trace_flags()`]. This flag is propagated
43///   via the [`SpanContext`] to child Spans. For more details see the [W3C
44///   specification](https://w3c.github.io/trace-context/). This flag indicates
45///   that the [`Span`] has been `sampled` and will be exported. [`SpanProcessor`]s
46///   and [`SpanExporter`]s will receive spans with the `Sampled` flag set for
47///   processing.
48///
49/// The flag combination `Sampled == false` and `is_recording == true` means
50/// that the current `Span` does record information, but most likely the child
51/// `Span` will not.
52///
53/// The flag combination `Sampled == true` and `is_recording == false` could
54/// cause gaps in the distributed trace, and because of this OpenTelemetry API
55/// MUST NOT allow this combination.
56///
57/// [OpenTelemetry SDK]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#sampling
58/// [`SpanContext`]: opentelemetry::trace::SpanContext
59/// [`SpanContext::trace_flags()`]: opentelemetry::trace::SpanContext#method.trace_flags
60/// [`SpanExporter`]: crate::export::trace::SpanExporter
61/// [`SpanProcessor`]: crate::trace::SpanProcessor
62/// [`Span`]: opentelemetry::trace::Span
63/// [`Span::is_recording()`]: opentelemetry::trace::Span#tymethod.is_recording
64pub trait ShouldSample: CloneShouldSample + Send + Sync + std::fmt::Debug {
65    /// Returns the [`SamplingDecision`] for a [`Span`] to be created.
66    ///
67    /// The [`should_sample`] function can use any of the information provided to it in order to
68    /// make a decision about whether or not a [`Span`] should or should not be sampled. However,
69    /// there are performance implications on the creation of a span
70    ///
71    /// [`Span`]: opentelemetry::trace::Span
72    /// [`should_sample`]: ShouldSample::should_sample
73    #[allow(clippy::too_many_arguments)]
74    fn should_sample(
75        &self,
76        parent_context: Option<&Context>,
77        trace_id: TraceId,
78        name: &str,
79        span_kind: &SpanKind,
80        attributes: &[KeyValue],
81        links: &[Link],
82    ) -> SamplingResult;
83}
84
85/// This trait should not be used directly instead users should use [`ShouldSample`].
86pub trait CloneShouldSample {
87    fn box_clone(&self) -> Box<dyn ShouldSample>;
88}
89
90impl<T> CloneShouldSample for T
91where
92    T: ShouldSample + Clone + 'static,
93{
94    fn box_clone(&self) -> Box<dyn ShouldSample> {
95        Box::new(self.clone())
96    }
97}
98
99impl Clone for Box<dyn ShouldSample> {
100    fn clone(&self) -> Self {
101        self.box_clone()
102    }
103}
104
105/// Default Sampling options
106///
107/// The [built-in samplers] allow for simple decisions. For more complex scenarios consider
108/// implementing your own sampler using [`ShouldSample`] trait.
109///
110/// [built-in samplers]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#built-in-samplers
111#[derive(Clone, Debug)]
112#[non_exhaustive]
113pub enum Sampler {
114    /// Always sample the trace
115    AlwaysOn,
116    /// Never sample the trace
117    AlwaysOff,
118    /// Respects the parent span's sampling decision or delegates a delegate sampler for root spans.
119    ParentBased(Box<dyn ShouldSample>),
120    /// Sample a given fraction of traces. Fractions >= 1 will always sample. If the parent span is
121    /// sampled, then it's child spans will automatically be sampled. Fractions < 0 are treated as
122    /// zero, but spans may still be sampled if their parent is.
123    /// *Note:* If this is used then all Spans in a trace will become sampled assuming that the
124    /// first span is sampled as it is based on the `trace_id` not the `span_id`
125    TraceIdRatioBased(f64),
126    /// Jaeger remote sampler supports any remote service that implemented the jaeger remote sampler protocol.
127    /// The proto definition can be found [here](https://github.com/jaegertracing/jaeger-idl/blob/main/proto/api_v2/sampling.proto)
128    ///
129    /// Jaeger remote sampler allows remotely controlling the sampling configuration for the SDKs.
130    /// The sampling is typically configured at the collector and the SDKs actively poll for changes.
131    /// The sampler uses TraceIdRatioBased or rate-limited sampler under the hood.
132    /// These samplers can be configured per whole service (a.k.a default), or per span name in a
133    /// given service (a.k.a per operation).
134    #[cfg(feature = "jaeger_remote_sampler")]
135    JaegerRemote(JaegerRemoteSampler),
136}
137
138impl Sampler {
139    /// Create a jaeger remote sampler builder.
140    ///
141    /// ### Arguments
142    /// * `runtime` - A runtime to run the HTTP client.
143    /// * `http_client` - An HTTP client to query the sampling endpoint.
144    /// * `default_sampler` - A default sampler to make a sampling decision when the remote is unavailable or before the SDK receives the first response from remote.
145    /// * `service_name` - The name of the service. This is a required parameter to query the sampling endpoint.
146    ///
147    /// See [here](https://github.com/open-telemetry/opentelemetry-rust/blob/main/examples/jaeger-remote-sampler/src/main.rs) for an example.
148    #[cfg(feature = "jaeger_remote_sampler")]
149    pub fn jaeger_remote<C, Sampler, R, Svc>(
150        runtime: R,
151        http_client: C,
152        default_sampler: Sampler,
153        service_name: Svc,
154    ) -> JaegerRemoteSamplerBuilder<C, Sampler, R>
155    where
156        C: HttpClient + 'static,
157        Sampler: ShouldSample,
158        R: crate::runtime::RuntimeChannel,
159        Svc: Into<String>,
160    {
161        JaegerRemoteSamplerBuilder::new(runtime, http_client, default_sampler, service_name)
162    }
163}
164
165impl ShouldSample for Sampler {
166    fn should_sample(
167        &self,
168        parent_context: Option<&Context>,
169        trace_id: TraceId,
170        name: &str,
171        span_kind: &SpanKind,
172        attributes: &[KeyValue],
173        links: &[Link],
174    ) -> SamplingResult {
175        let decision = match self {
176            // Always sample the trace
177            Sampler::AlwaysOn => SamplingDecision::RecordAndSample,
178            // Never sample the trace
179            Sampler::AlwaysOff => SamplingDecision::Drop,
180            // The parent decision if sampled; otherwise the decision of delegate_sampler
181            Sampler::ParentBased(delegate_sampler) => parent_context
182                .filter(|cx| cx.has_active_span())
183                .map_or_else(
184                    || {
185                        delegate_sampler
186                            .should_sample(
187                                parent_context,
188                                trace_id,
189                                name,
190                                span_kind,
191                                attributes,
192                                links,
193                            )
194                            .decision
195                    },
196                    |ctx| {
197                        let span = ctx.span();
198                        let parent_span_context = span.span_context();
199                        if parent_span_context.is_sampled() {
200                            SamplingDecision::RecordAndSample
201                        } else {
202                            SamplingDecision::Drop
203                        }
204                    },
205                ),
206            // Probabilistically sample the trace.
207            Sampler::TraceIdRatioBased(prob) => sample_based_on_probability(prob, trace_id),
208            #[cfg(feature = "jaeger_remote_sampler")]
209            Sampler::JaegerRemote(remote_sampler) => {
210                remote_sampler
211                    .should_sample(parent_context, trace_id, name, span_kind, attributes, links)
212                    .decision
213            }
214        };
215        SamplingResult {
216            decision,
217            // No extra attributes ever set by the SDK samplers.
218            attributes: Vec::new(),
219            // all sampler in SDK will not modify trace state.
220            trace_state: match parent_context {
221                Some(ctx) => ctx.span().span_context().trace_state().clone(),
222                None => TraceState::default(),
223            },
224        }
225    }
226}
227
228pub(crate) fn sample_based_on_probability(prob: &f64, trace_id: TraceId) -> SamplingDecision {
229    if *prob >= 1.0 {
230        SamplingDecision::RecordAndSample
231    } else {
232        let prob_upper_bound = (prob.max(0.0) * (1u64 << 63) as f64) as u64;
233        // TODO: update behavior when the spec definition resolves
234        // https://github.com/open-telemetry/opentelemetry-specification/issues/1413
235        let bytes = trace_id.to_bytes();
236        let (_, low) = bytes.split_at(8);
237        let trace_id_low = u64::from_be_bytes(low.try_into().unwrap());
238        let rnd_from_trace_id = trace_id_low >> 1;
239
240        if rnd_from_trace_id < prob_upper_bound {
241            SamplingDecision::RecordAndSample
242        } else {
243            SamplingDecision::Drop
244        }
245    }
246}
247
248#[cfg(all(test, feature = "testing", feature = "trace"))]
249mod tests {
250    use super::*;
251    use crate::testing::trace::TestSpan;
252    use opentelemetry::trace::{SpanContext, SpanId, TraceFlags};
253    use rand::Rng;
254
255    #[rustfmt::skip]
256    fn sampler_data() -> Vec<(&'static str, Sampler, f64, bool, bool)> {
257        vec![
258            // Span w/o a parent
259            ("never_sample", Sampler::AlwaysOff, 0.0, false, false),
260            ("always_sample", Sampler::AlwaysOn, 1.0, false, false),
261            ("ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, false, false),
262            ("ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, false, false),
263            ("ratio_.50", Sampler::TraceIdRatioBased(0.50), 0.5, false, false),
264            ("ratio_.75", Sampler::TraceIdRatioBased(0.75), 0.75, false, false),
265            ("ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, false, false),
266
267            // Spans w/o a parent delegate
268            ("delegate_to_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 1.0, false, false),
269            ("delegate_to_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 0.0, false, false),
270            ("delegate_to_ratio_-1", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(-1.0))), 0.0, false, false),
271            ("delegate_to_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 0.25, false, false),
272            ("delegate_to_ratio_.50", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.50))), 0.50, false, false),
273            ("delegate_to_ratio_.75", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.75))), 0.75, false, false),
274            ("delegate_to_ratio_2.0", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(2.0))), 1.0, false, false),
275
276            // Spans with a parent that is *not* sampled act like spans w/o a parent
277            ("unsampled_parent_with_ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, true, false),
278            ("unsampled_parent_with_ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, true, false),
279            ("unsampled_parent_with_ratio_.50", Sampler::TraceIdRatioBased(0.50), 0.5, true, false),
280            ("unsampled_parent_with_ratio_.75", Sampler::TraceIdRatioBased(0.75), 0.75, true, false),
281            ("unsampled_parent_with_ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, true, false),
282            ("unsampled_parent_or_else_with_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 0.0, true, false),
283            ("unsampled_parent_or_else_with_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 0.0, true, false),
284            ("unsampled_parent_or_else_with_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 0.0, true, false),
285
286            // A ratio sampler with a parent that is sampled will ignore the parent
287            ("sampled_parent_with_ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, true, true),
288            ("sampled_parent_with_ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, true, true),
289            ("sampled_parent_with_ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, true, true),
290
291            // Spans with a parent that is sampled, will always sample, regardless of the delegate sampler
292            ("sampled_parent_or_else_with_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 1.0, true, true),
293            ("sampled_parent_or_else_with_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 1.0, true, true),
294            ("sampled_parent_or_else_with_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 1.0, true, true),
295
296            // Spans with a sampled parent, but when using the NeverSample Sampler, aren't sampled
297            ("sampled_parent_span_with_never_sample", Sampler::AlwaysOff, 0.0, true, true),
298        ]
299    }
300
301    #[test]
302    fn sampling() {
303        let total = 10_000;
304        let mut rng = rand::thread_rng();
305        for (name, sampler, expectation, parent, sample_parent) in sampler_data() {
306            let mut sampled = 0;
307            for _ in 0..total {
308                let parent_context = if parent {
309                    let trace_flags = if sample_parent {
310                        TraceFlags::SAMPLED
311                    } else {
312                        TraceFlags::default()
313                    };
314                    let span_context = SpanContext::new(
315                        TraceId::from_u128(1),
316                        SpanId::from_u64(1),
317                        trace_flags,
318                        false,
319                        TraceState::default(),
320                    );
321
322                    Some(Context::current_with_span(TestSpan(span_context)))
323                } else {
324                    None
325                };
326
327                let trace_id = TraceId::from(rng.gen::<u128>());
328                if sampler
329                    .should_sample(
330                        parent_context.as_ref(),
331                        trace_id,
332                        name,
333                        &SpanKind::Internal,
334                        &[],
335                        &[],
336                    )
337                    .decision
338                    == SamplingDecision::RecordAndSample
339                {
340                    sampled += 1;
341                }
342            }
343            let mut tolerance = 0.0;
344            let got = sampled as f64 / total as f64;
345
346            if expectation > 0.0 && expectation < 1.0 {
347                // See https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
348                let z = 4.75342; // This should succeed 99.9999% of the time
349                tolerance = z * (got * (1.0 - got) / total as f64).sqrt();
350            }
351
352            let diff = (got - expectation).abs();
353            assert!(
354                diff <= tolerance,
355                "{} got {:?} (diff: {}), expected {} (w/tolerance: {})",
356                name,
357                got,
358                diff,
359                expectation,
360                tolerance
361            );
362        }
363    }
364
365    #[test]
366    fn clone_a_parent_sampler() {
367        let sampler = Sampler::ParentBased(Box::new(Sampler::AlwaysOn));
368        #[allow(clippy::redundant_clone)]
369        let cloned_sampler = sampler.clone();
370
371        let cx = Context::current_with_value("some_value");
372
373        let result = sampler.should_sample(
374            Some(&cx),
375            TraceId::from_u128(1),
376            "should sample",
377            &SpanKind::Internal,
378            &[],
379            &[],
380        );
381
382        let cloned_result = cloned_sampler.should_sample(
383            Some(&cx),
384            TraceId::from_u128(1),
385            "should sample",
386            &SpanKind::Internal,
387            &[],
388            &[],
389        );
390
391        assert_eq!(result, cloned_result);
392    }
393
394    #[test]
395    fn parent_sampler() {
396        // name, delegate, context(with or without parent), expected decision
397        let test_cases = vec![
398            (
399                "should using delegate sampler",
400                Sampler::AlwaysOn,
401                Context::new(),
402                SamplingDecision::RecordAndSample,
403            ),
404            (
405                "should use parent result, always off",
406                Sampler::AlwaysOn,
407                Context::current_with_span(TestSpan(SpanContext::new(
408                    TraceId::from_u128(1),
409                    SpanId::from_u64(1),
410                    TraceFlags::default(), // not sampling
411                    false,
412                    TraceState::default(),
413                ))),
414                SamplingDecision::Drop,
415            ),
416            (
417                "should use parent result, always on",
418                Sampler::AlwaysOff,
419                Context::current_with_span(TestSpan(SpanContext::new(
420                    TraceId::from_u128(1),
421                    SpanId::from_u64(1),
422                    TraceFlags::SAMPLED, // not sampling
423                    false,
424                    TraceState::default(),
425                ))),
426                SamplingDecision::RecordAndSample,
427            ),
428        ];
429
430        for (name, delegate, parent_cx, expected) in test_cases {
431            let sampler = Sampler::ParentBased(Box::new(delegate));
432            let result = sampler.should_sample(
433                Some(&parent_cx),
434                TraceId::from_u128(1),
435                name,
436                &SpanKind::Internal,
437                &[],
438                &[],
439            );
440
441            assert_eq!(result.decision, expected);
442        }
443    }
444}