falco_plugin/extract/
mod.rs

1//! # Field extraction plugin support
2//!
3//! Plugins with field extraction capability can extract information from events
4//! based on fields. For example, a field (e.g. `proc.name`) extracts a value (e.g. process name
5//! like `nginx`) from a syscall event. The plugin returns a set of supported fields, and there are
6//! functions to extract a value given an event and field. The plugin framework can then build
7//! filtering expressions (e.g. rule conditions) based on these fields combined with relational
8//! and/or logical operators.
9//!
10//! For example, given the expression `ct.name=root and ct.region=us-east-1`,
11//! the plugin framework handles parsing the expression, calling the plugin to extract values for
12//! fields `ct.name`/`ct.region` for a given event, and determining the result of the expression.
13//! In a Falco output string like `An EC2 Node was created (name=%ct.name region=%ct.region)`,
14//! the plugin framework handles parsing the output string, calling the plugin to extract values
15//! for fields, and building the resolved string, replacing the template field names
16//! (e.g. `%ct.region`) with values (e.g. `us-east-1`).
17//!
18//! Plugins with this capability only focus on field extraction from events generated by other
19//! plugins or by the core libraries. They do not provide an event source but can extract fields
20//! from other event sources. The supported field extraction can be generic or be tied to a specific
21//! event source. An example is JSON field extraction, where a plugin might be able to extract
22//! fields from generic JSON payloads.
23//!
24//! For your plugin to support field extraction, you will need to implement the [`ExtractPlugin`]
25//! trait and invoke the [`extract_plugin`](crate::extract_plugin) macro, for example:
26//!
27//! ```
28//! use std::ffi::{CStr, CString};
29//! use anyhow::Error;
30//! use falco_event::events::RawEvent;
31//! use falco_plugin::base::{Metric, Plugin};
32//! use falco_plugin::{extract_plugin, plugin};
33//! use falco_plugin::extract::{
34//!     EventInput,
35//!     ExtractFieldInfo,
36//!     ExtractPlugin,
37//!     ExtractRequest,
38//!     field};
39//! use falco_plugin::tables::TablesInput;
40//!
41//! struct MyExtractPlugin;
42//! impl Plugin for MyExtractPlugin {
43//!     // ...
44//! #    const NAME: &'static CStr = c"sample-plugin-rs";
45//! #    const PLUGIN_VERSION: &'static CStr = c"0.0.1";
46//! #    const DESCRIPTION: &'static CStr = c"A sample Falco plugin that does nothing";
47//! #    const CONTACT: &'static CStr = c"you@example.com";
48//! #    type ConfigType = ();
49//! #
50//! #    fn new(input: Option<&TablesInput>, config: Self::ConfigType)
51//! #        -> Result<Self, anyhow::Error> {
52//! #        Ok(MyExtractPlugin)
53//! #    }
54//! #
55//! #    fn set_config(&mut self, config: Self::ConfigType) -> Result<(), anyhow::Error> {
56//! #        Ok(())
57//! #    }
58//! #
59//! #    fn get_metrics(&mut self) -> impl IntoIterator<Item=Metric> {
60//! #        []
61//! #    }
62//! }
63//!
64//! impl MyExtractPlugin { // note this is not the trait implementation
65//!     fn extract_sample(
66//!         &mut self,
67//!         _req: ExtractRequest<Self>,
68//!     ) -> Result<CString, Error> {
69//!         Ok(c"hello".to_owned())
70//!     }
71//! }
72//!
73//! impl ExtractPlugin for MyExtractPlugin {
74//!     type Event<'a> = RawEvent<'a>;
75//!     type ExtractContext = ();
76//!
77//!     const EXTRACT_FIELDS: &'static [ExtractFieldInfo<Self>] = &[
78//!         field("my_extract.sample", &Self::extract_sample),
79//!     ];
80//! }
81//!
82//! plugin!(MyExtractPlugin);
83//! extract_plugin!(MyExtractPlugin);
84//! ```
85//!
86//! See the [`ExtractPlugin`] trait documentation for details.
87
88use crate::base::Plugin;
89use crate::extract::wrappers::ExtractPluginExported;
90use crate::tables::LazyTableReader;
91use falco_event::events::{AnyEventPayload, RawEvent};
92use falco_plugin_api::{ss_plugin_extract_field, ss_plugin_extract_value_offsets};
93use std::any::TypeId;
94use std::collections::BTreeMap;
95use std::ffi::{CStr, CString};
96use std::ops::Range;
97use std::sync::Mutex;
98
99mod extractor_fn;
100mod fields;
101mod schema;
102#[doc(hidden)]
103pub mod wrappers;
104
105pub use crate::event::EventInput;
106pub use schema::{field, ExtractFieldInfo};
107
108/// An invalid range (not supported)
109///
110/// This is used when an extractor that does not support ranges is used together with extractors
111/// that do, and range extraction is requested. Due to the design of the Falco plugin API,
112/// there must be a range for all the fields (or none of them), so we fill out the missing ranges
113/// with this value.
114///
115/// **Note**: you should not use this value in plugins yourself. If an extractor returns data that is
116/// not corresponding to any particular byte offset in the plugin payload, it should set the range
117/// to [`UNSPECIFIED_RANGE`].
118#[allow(clippy::reversed_empty_ranges)]
119pub const INVALID_RANGE: Range<usize> = 1..0;
120
121/// An unspecified range (computed data)
122///
123/// Use this range to indicate that the extracted value does not correspond to any specific
124/// byte range in the event (for example, it was calculated based on the event data).
125pub const UNSPECIFIED_RANGE: Range<usize> = 0..0;
126
127/// The offset in the event where a plugin event payload starts
128///
129/// Since the event payload is at a fixed offset, you can add this value
130/// to the start of an extracted field within the payload to get the offset
131/// from the start of the event.
132///
133/// 26 bytes for the event header, plus 2*4 bytes for the parameter lengths,
134/// plus 4 bytes for the plugin ID.
135const PLUGIN_EVENT_PAYLOAD_OFFSET: usize = falco_plugin_api::PLUGIN_EVENT_PAYLOAD_OFFSET as usize;
136
137/// Range extraction request/response
138#[derive(Debug, Eq, PartialEq)]
139pub enum ExtractByteRange {
140    /// Range extraction was not requested
141    NotRequested,
142
143    /// Range extraction was requested but not performed
144    ///
145    /// This value is set upon entry to the extractor function. The function may replace the value
146    /// with [`ExtractByteRange::Found`] if it supports finding byte ranges. If the extractor does
147    /// not support byte ranges, it can ignore this value completely and leave it unchanged.
148    Requested,
149
150    /// Range extraction finished successfully
151    ///
152    /// Note that for fields extracted from the plugin event data field, you will probably want
153    /// to construct this value using [`ExtractByteRange::in_plugin_data`].
154    Found(Range<usize>),
155}
156
157impl ExtractByteRange {
158    /// Create a range pointing into a plugin event data field
159    ///
160    /// This is a helper for the common case of returning offsets inside the data field
161    /// of a plugin event. It simply shifts the provided range by 38 bytes (26 header bytes,
162    /// 2*4 length bytes, 4 plugin id bytes) to make the resulting range relative to the full
163    /// event buffer.
164    pub fn in_plugin_data(range: Range<usize>) -> Self {
165        Self::Found(
166            PLUGIN_EVENT_PAYLOAD_OFFSET + range.start..PLUGIN_EVENT_PAYLOAD_OFFSET + range.end,
167        )
168    }
169}
170
171/// An extraction request
172#[derive(Debug)]
173pub struct ExtractRequest<'c, 'e, 'r, 't, P: ExtractPlugin> {
174    /// A context instance, potentially shared between extractions
175    pub context: &'c mut P::ExtractContext,
176
177    /// The event being processed
178    pub event: &'e EventInput<'r, P::Event<'r>>,
179
180    /// An interface to access tables exposed from Falco core and other plugins
181    ///
182    /// See [`crate::tables`] for details.
183    pub table_reader: &'t LazyTableReader<'t>,
184
185    /// Offset of extracted data in event payload
186    ///
187    /// If set to [`ExtractByteRange::Requested`], and the plugin supports it, replace this
188    /// with a [`ExtractByteRange::Found`] containing the byte range containing the extracted value,
189    /// *within the whole event buffer*. In the typical case of a range inside the plugin event
190    /// data, you can use the [`ExtractByteRange::in_plugin_data`] helper.
191    ///
192    /// If the data is computed (not directly coming from any byte range in the event), use
193    /// [`UNSPECIFIED_RANGE`] instead.
194    ///
195    /// **Note**: range support is optional, and this field can be ignored.
196    pub offset: &'c mut ExtractByteRange,
197}
198
199/// Support for field extraction plugins
200pub trait ExtractPlugin: Plugin + ExtractPluginExported + Sized
201where
202    Self: 'static,
203{
204    /// # Event type to perform extractions on
205    ///
206    /// Events will be parsed into this type before being passed to the plugin, so you can
207    /// work directly on the deserialized form and don't need to worry about validating
208    /// the events.
209    ///
210    /// If an event fails this conversion, an error will be returned from [`EventInput::event`],
211    /// which you can propagate directly to the caller.
212    ///
213    /// If you don't want any specific validation/conversion to be performed, specify the type as
214    /// ```
215    /// type Event<'a> = falco_event::events::RawEvent<'a>;
216    /// ```
217    type Event<'a>: AnyEventPayload + TryFrom<&'a RawEvent<'a>>;
218
219    /// The extraction context
220    ///
221    /// It might be useful if your plugin supports multiple fields, and they all share some common
222    /// preprocessing steps. Instead of redoing the preprocessing for each field, intermediate
223    /// results can be stored in the context for subsequent extractions (from the same event).
224    ///
225    /// If you do not need a context to share between extracting fields of the same event, use `()`
226    /// as the type.
227    ///
228    /// Since the context is created using the [`Default`] trait, you may prefer to use an Option
229    /// wrapping the actual context type:
230    ///
231    /// ```ignore
232    /// impl ExtractPlugin for MyPlugin {
233    ///     type ExtractContext = Option<ActualContext>;
234    ///     // ...
235    /// }
236    ///
237    /// impl MyPlugin {
238    ///     fn make_context(&mut self, ...) -> ActualContext { /* ... */ }
239    ///
240    ///     fn extract_field_one(
241    ///         &mut self,
242    ///         req: ExtractContext<Self>) -> ... {
243    ///         let context = req.context.get_or_insert_with(|| self.make_context(...));
244    ///
245    ///         // use context
246    ///     }
247    /// }
248    /// ```
249    type ExtractContext: Default + 'static;
250
251    /// The actual list of extractable fields
252    ///
253    /// An extraction method is a method with the following signature:
254    /// ```ignore
255    /// use anyhow::Error;
256    /// use falco_plugin::extract::{EventInput, ExtractFieldRequestArg, ExtractRequest};
257    /// use falco_plugin::tables::TableReader;
258    ///
259    /// fn extract_sample(
260    ///     &mut self,
261    ///     req: ExtractRequest<Self>,
262    ///     arg: A, // optional
263    /// ) -> Result<R, Error>;
264    ///
265    /// ```
266    /// where `R` is one of the following types or a [`Vec`] of them:
267    /// - [`u64`]
268    /// - [`bool`]
269    /// - [`CString`]
270    /// - [`std::time::SystemTime`]
271    /// - [`std::time::Duration`]
272    /// - [`std::net::IpAddr`]
273    /// - [`falco_event::types::IpNet`]
274    ///
275    /// and `A` is the argument to the field extraction:
276    ///
277    /// | Argument declaration | `field` lookup | `field[5]` lookup | `field[foo]` lookup |
278    /// |----------------------|----------------|-------------------|---------------------|
279    /// | _missing_            | valid          | -                 | -                   |
280    /// | `arg: u64`           | -              | valid             | -                   |
281    /// | `arg: Option<u64>`   | valid          | valid             | -                   |
282    /// | `arg: &CStr`         | -              | -                 | valid               |
283    /// | `arg: Option<&CStr>` | valid          | -                 | valid               |
284    ///
285    /// `req` is the extraction request ([`ExtractRequest`]), containing the context in which
286    /// the plugin is doing the work.
287    ///
288    /// To register extracted fields, add them to the [`ExtractPlugin::EXTRACT_FIELDS`] array, wrapped via [`crate::extract::field`]:
289    /// ```
290    /// use std::ffi::CStr;
291    /// use falco_event::events::RawEvent;
292    /// use falco_plugin::anyhow::Error;
293    /// use falco_plugin::base::Plugin;
294    /// use falco_plugin::extract::{
295    ///     field,
296    ///     ExtractFieldInfo,
297    ///     ExtractPlugin,
298    ///     ExtractRequest};
299    ///# use falco_plugin::{extract_plugin, plugin};
300    /// use falco_plugin::tables::TablesInput;
301    ///
302    /// struct SampleExtractPlugin;
303    ///
304    /// impl Plugin for SampleExtractPlugin {
305    ///      const NAME: &'static CStr = c"dummy";
306    ///      const PLUGIN_VERSION: &'static CStr = c"0.0.0";
307    ///      const DESCRIPTION: &'static CStr = c"test plugin";
308    ///      const CONTACT: &'static CStr = c"rust@localdomain.pl";
309    ///      type ConfigType = ();
310    ///
311    ///      fn new(_input: Option<&TablesInput>, _config: Self::ConfigType) -> Result<Self, Error> {
312    ///          Ok(Self)
313    ///      }
314    /// }
315    ///
316    /// impl SampleExtractPlugin {
317    ///     fn extract_sample(
318    ///         &mut self,
319    ///         _req: ExtractRequest<Self>,
320    ///     ) -> Result<u64, Error> {
321    ///         Ok(10u64)
322    ///     }
323    ///
324    ///     fn extract_arg(
325    ///         &mut self,
326    ///         _req: ExtractRequest<Self>,
327    ///         arg: u64,
328    ///     ) -> Result<u64, Error> {
329    ///         Ok(arg)
330    ///     }
331    /// }
332    ///
333    /// impl ExtractPlugin for SampleExtractPlugin {
334    ///     type ExtractContext = ();
335    ///     type Event<'a> = RawEvent<'a>;
336    ///     const EXTRACT_FIELDS: &'static [ExtractFieldInfo<Self>] = &[
337    ///         field("sample.always_10", &Self::extract_sample),
338    ///         field("sample.arg", &Self::extract_arg)
339    ///     ];
340    /// }
341    ///
342    ///# plugin!(SampleExtractPlugin);
343    ///# extract_plugin!(SampleExtractPlugin);
344    /// ```
345    const EXTRACT_FIELDS: &'static [ExtractFieldInfo<Self>];
346
347    /// Generate the field schema for the Falco plugin framework
348    ///
349    /// The default implementation inspects all fields from [`Self::EXTRACT_FIELDS`] and generates
350    /// a JSON description in the format expected by the framework.
351    ///
352    /// You probably won't need to provide your own implementation.
353    fn get_fields() -> &'static CStr {
354        static FIELD_SCHEMA: Mutex<BTreeMap<TypeId, CString>> = Mutex::new(BTreeMap::new());
355
356        let ty = TypeId::of::<Self>();
357        let mut schema_map = FIELD_SCHEMA.lock().unwrap();
358        // Safety:
359        //
360        // we only generate the string once and never change or delete it
361        // so the pointer should remain valid for the static lifetime
362        // hence the dance of converting a reference to a raw pointer and back
363        // to erase the lifetime
364        unsafe {
365            CStr::from_ptr(
366                schema_map
367                    .entry(ty)
368                    .or_insert_with(|| {
369                        let schema = serde_json::to_string_pretty(&Self::EXTRACT_FIELDS)
370                            .expect("failed to serialize extraction schema");
371                        CString::new(schema.into_bytes())
372                            .expect("failed to add NUL to extraction schema")
373                    })
374                    .as_ptr(),
375            )
376        }
377    }
378
379    /// Perform the actual field extraction
380    ///
381    /// The default implementation creates an empty context and loops over all extraction
382    /// requests, invoking the relevant function to actually generate the field value.
383    ///
384    /// You probably won't need to provide your own implementation.
385    fn extract_fields<'a>(
386        &'a mut self,
387        event_input: &EventInput<'a, Self::Event<'a>>,
388        table_reader: &LazyTableReader,
389        fields: &mut [ss_plugin_extract_field],
390        offsets: Option<&mut ss_plugin_extract_value_offsets>,
391        storage: &'a bumpalo::Bump,
392    ) -> Result<(), anyhow::Error> {
393        let mut context = Self::ExtractContext::default();
394
395        let (mut offset_vec, mut length_vec) = if offsets.is_some() {
396            (
397                Some(bumpalo::collections::Vec::with_capacity_in(
398                    fields.len(),
399                    storage,
400                )),
401                Some(bumpalo::collections::Vec::with_capacity_in(
402                    fields.len(),
403                    storage,
404                )),
405            )
406        } else {
407            (None, None)
408        };
409
410        let mut any_offsets = false;
411
412        for req in fields {
413            let info = Self::EXTRACT_FIELDS
414                .get(req.field_id as usize)
415                .ok_or_else(|| anyhow::anyhow!("field index out of bounds"))?;
416
417            let mut offset = if offsets.is_some() {
418                ExtractByteRange::Requested
419            } else {
420                ExtractByteRange::NotRequested
421            };
422
423            let request = ExtractRequest::<Self> {
424                context: &mut context,
425                event: event_input,
426                table_reader,
427                offset: &mut offset,
428            };
429
430            info.func.call(self, req, request, storage)?;
431
432            if let (Some(offsets_vec), Some(lengths_vec)) =
433                (offset_vec.as_mut(), length_vec.as_mut())
434            {
435                let range = match offset {
436                    ExtractByteRange::Found(range) => {
437                        any_offsets = true;
438                        range
439                    }
440                    _ => INVALID_RANGE,
441                };
442                offsets_vec.push(range.start as u32);
443                lengths_vec.push(range.end.wrapping_sub(range.start) as u32);
444            }
445        }
446
447        fn pointer_to_vec<T>(v: &Option<bumpalo::collections::Vec<T>>) -> *mut T {
448            match v {
449                None => std::ptr::null_mut(),
450                Some(v) => v.as_ptr().cast_mut(),
451            }
452        }
453
454        if let Some(offsets) = offsets {
455            if any_offsets {
456                offsets.start = pointer_to_vec(&offset_vec);
457                offsets.length = pointer_to_vec(&length_vec);
458            }
459        }
460
461        Ok(())
462    }
463}