falco_plugin/plugin/extract/
mod.rs

1use crate::extract::EventInput;
2use crate::plugin::base::Plugin;
3use crate::plugin::extract::schema::ExtractFieldInfo;
4use crate::plugin::extract::wrappers::ExtractPluginExported;
5use crate::tables::LazyTableReader;
6use falco_event::events::types::EventType;
7use falco_plugin_api::{ss_plugin_extract_field, ss_plugin_extract_value_offsets};
8use std::any::TypeId;
9use std::collections::BTreeMap;
10use std::ffi::{CStr, CString};
11use std::ops::Range;
12use std::sync::Mutex;
13
14mod extractor_fn;
15pub mod fields;
16pub mod schema;
17#[doc(hidden)]
18pub mod wrappers;
19
20/// The actual argument passed to the extractor function
21///
22/// It is validated based on the [`ExtractFieldInfo`] definition (use [`ExtractFieldInfo::with_arg`]
23/// to specify the expected argument type).
24///
25/// **Note**: this type describes the actual argument in a particular invocation.
26/// For describing the type of arguments the extractor accepts, please see [`ExtractArgType`]`
27#[derive(Debug, Clone, Eq, PartialEq)]
28pub enum ExtractFieldRequestArg<'a> {
29    /// no argument, the extractor was invoked as plain `field_name`
30    None,
31    /// an integer argument, the extractor was invoked as e.g. `field_name[1]`
32    Int(u64),
33    /// a string argument, the extractor was invoked as e.g. `field_name[foo]`
34    String(&'a CStr),
35}
36
37pub trait ExtractField {
38    unsafe fn key_unchecked(&self) -> ExtractFieldRequestArg;
39}
40
41impl ExtractField for ss_plugin_extract_field {
42    unsafe fn key_unchecked(&self) -> ExtractFieldRequestArg {
43        if self.arg_present == 0 {
44            return ExtractFieldRequestArg::None;
45        }
46
47        if self.arg_key.is_null() {
48            return ExtractFieldRequestArg::Int(self.arg_index);
49        }
50
51        unsafe { ExtractFieldRequestArg::String(CStr::from_ptr(self.arg_key)) }
52    }
53}
54
55/// An invalid range (not supported)
56///
57/// This is used when an extractor that does not support ranges is used together with extractors
58/// that do, and range extraction is requested. Due to the design of the Falco plugin API,
59/// there must be a range for all the fields (or none of them), so we fill out the missing ranges
60/// with this value.
61///
62/// **Note**: you should not use this value in plugins yourself. If an extractor returns data that is
63/// not corresponding to any particular byte offset in the plugin payload, it should set the range
64/// to [`UNSPECIFIED_RANGE`].
65#[allow(clippy::reversed_empty_ranges)]
66pub const INVALID_RANGE: Range<usize> = 1..0;
67
68/// An unspecified range (computed data)
69///
70/// Use this range to indicate that the extracted value does not correspond to any specific
71/// byte range in the event (for example, it was calculated based on the event data).
72pub const UNSPECIFIED_RANGE: Range<usize> = 0..0;
73
74/// The offset in the event where a plugin event payload starts
75///
76/// Since the event payload is at a fixed offset, you can add this value
77/// to the start of an extracted field within the payload to get the offset
78/// from the start of the event.
79///
80/// 26 bytes for the event header, plus 2*4 bytes for the parameter lengths,
81/// plus 4 bytes for the plugin ID.
82const PLUGIN_EVENT_PAYLOAD_OFFSET: usize = 38;
83
84/// Range extraction request/response
85#[derive(Debug, Eq, PartialEq)]
86pub enum ExtractByteRange {
87    /// Range extraction was not requested
88    NotRequested,
89
90    /// Range extraction was requested but not performed
91    ///
92    /// This value is set upon entry to the extractor function. The function may replace the value
93    /// with [`ExtractByteRange::Found`] if it supports finding byte ranges. If the extractor does
94    /// not support byte ranges, it can ignore this value completely and leave it unchanged.
95    Requested,
96
97    /// Range extraction finished successfully
98    ///
99    /// Note that for fields extracted from the plugin event data field, you will probably want
100    /// to construct this value using [`ExtractByteRange::in_plugin_data`].
101    Found(Range<usize>),
102}
103
104impl ExtractByteRange {
105    /// Create a range pointing into a plugin event data field
106    ///
107    /// This is a helper for the common case of returning offsets inside the data field
108    /// of a plugin event. It simply shifts the provided range by 38 bytes (26 header bytes,
109    /// 2*4 length bytes, 4 plugin id bytes) to make the resulting range relative to the full
110    /// event buffer.
111    pub fn in_plugin_data(range: Range<usize>) -> Self {
112        Self::Found(
113            PLUGIN_EVENT_PAYLOAD_OFFSET + range.start..PLUGIN_EVENT_PAYLOAD_OFFSET + range.end,
114        )
115    }
116}
117
118/// An extraction request
119#[derive(Debug)]
120pub struct ExtractRequest<'c, 'e, 't, P: ExtractPlugin> {
121    /// A context instance, potentially shared between extractions
122    pub context: &'c mut P::ExtractContext,
123
124    /// The event being processed
125    pub event: &'e EventInput,
126
127    /// An interface to access tables exposed from Falco core and other plugins
128    ///
129    /// See [`crate::tables`] for details.
130    pub table_reader: &'t LazyTableReader<'t>,
131
132    /// Offset of extracted data in event payload
133    ///
134    /// If set to [`ExtractByteRange::Requested`], and the plugin supports it, replace this
135    /// with a [`ExtractByteRange::Found`] containing the byte range containing the extracted value,
136    /// *within the whole event buffer*. In the typical case of a range inside the plugin event
137    /// data, you can use the [`ExtractByteRange::in_plugin_data`] helper.
138    ///
139    /// If the data is computed (not directly coming from any byte range in the event), use
140    /// [`UNSPECIFIED_RANGE`] instead.
141    ///
142    /// **Note**: range support is optional, and this field can be ignored.
143    pub offset: &'c mut ExtractByteRange,
144}
145
146/// Support for field extraction plugins
147pub trait ExtractPlugin: Plugin + ExtractPluginExported + Sized
148where
149    Self: 'static,
150{
151    /// The set of event types supported by this plugin
152    ///
153    /// If empty, the plugin will get invoked for all event types, otherwise it will only
154    /// get invoked for event types from this list.
155    ///
156    /// **Note**: some notable event types are:
157    /// - [`EventType::ASYNCEVENT_E`], generated from async plugins
158    /// - [`EventType::PLUGINEVENT_E`], generated from source plugins
159    const EVENT_TYPES: &'static [EventType];
160    /// The set of event sources supported by this plugin
161    ///
162    /// If empty, the plugin will get invoked for events coming from all sources, otherwise it will
163    /// only get invoked for events from sources named in this list.
164    ///
165    /// **Note**: one notable event source is called `syscall`
166    const EVENT_SOURCES: &'static [&'static str];
167
168    /// The extraction context
169    ///
170    /// It might be useful if your plugin supports multiple fields, and they all share some common
171    /// preprocessing steps. Instead of redoing the preprocessing for each field, intermediate
172    /// results can be stored in the context for subsequent extractions (from the same event).
173    ///
174    /// If you do not need a context to share between extracting fields of the same event, use `()`
175    /// as the type.
176    ///
177    /// Since the context is created using the [`Default`] trait, you may prefer to use an Option
178    /// wrapping the actual context type:
179    ///
180    /// ```ignore
181    /// impl ExtractPlugin for MyPlugin {
182    ///     type ExtractContext = Option<ActualContext>;
183    ///     // ...
184    /// }
185    ///
186    /// impl MyPlugin {
187    ///     fn make_context(&mut self, ...) -> ActualContext { /* ... */ }
188    ///
189    ///     fn extract_field_one(
190    ///         &mut self,
191    ///         req: ExtractContext<Self>) -> ... {
192    ///         let context = req.context.get_or_insert_with(|| self.make_context(...));
193    ///
194    ///         // use context
195    ///     }
196    /// }
197    /// ```
198    type ExtractContext: Default + 'static;
199
200    /// The actual list of extractable fields
201    ///
202    /// An extraction method is a method with the following signature:
203    /// ```ignore
204    /// use anyhow::Error;
205    /// use falco_plugin::extract::{EventInput, ExtractFieldRequestArg, ExtractRequest};
206    /// use falco_plugin::tables::TableReader;
207    ///
208    /// fn extract_sample(
209    ///     &mut self,
210    ///     req: ExtractRequest<Self>,
211    ///     arg: A, // optional
212    /// ) -> Result<R, Error>;
213    ///
214    /// ```
215    /// where `R` is one of the following types or a [`Vec`] of them:
216    /// - [`u64`]
217    /// - [`bool`]
218    /// - [`CString`]
219    /// - [`std::time::SystemTime`]
220    /// - [`std::time::Duration`]
221    /// - [`std::net::IpAddr`]
222    /// - [`falco_event::fields::types::PT_IPNET`]
223    ///
224    /// and `A` is the argument to the field extraction:
225    ///
226    /// | Argument declaration | `field` lookup | `field[5]` lookup | `field[foo]` lookup |
227    /// |----------------------|----------------|-------------------|---------------------|
228    /// | _missing_            | valid          | -                 | -                   |
229    /// | `arg: u64`           | -              | valid             | -                   |
230    /// | `arg: Option<u64>`   | valid          | valid             | -                   |
231    /// | `arg: &CStr`         | -              | -                 | valid               |
232    /// | `arg: Option<&CStr>` | valid          | -                 | valid               |
233    ///
234    /// `req` is the extraction request ([`ExtractRequest`]), containing the context in which
235    /// the plugin is doing the work.
236    ///
237    /// To register extracted fields, add them to the [`ExtractPlugin::EXTRACT_FIELDS`] array, wrapped via [`crate::extract::field`]:
238    /// ```
239    /// use std::ffi::CStr;
240    /// use falco_plugin::event::events::types::EventType;
241    /// use falco_plugin::event::events::types::EventType::PLUGINEVENT_E;
242    /// use falco_plugin::anyhow::Error;
243    /// use falco_plugin::base::Plugin;
244    /// use falco_plugin::extract::{
245    ///     field,
246    ///     ExtractFieldInfo,
247    ///     ExtractPlugin,
248    ///     ExtractRequest};
249    ///# use falco_plugin::{extract_plugin, plugin};
250    /// use falco_plugin::tables::TablesInput;
251    ///
252    /// struct SampleExtractPlugin;
253    ///
254    /// impl Plugin for SampleExtractPlugin {
255    ///      const NAME: &'static CStr = c"dummy";
256    ///      const PLUGIN_VERSION: &'static CStr = c"0.0.0";
257    ///      const DESCRIPTION: &'static CStr = c"test plugin";
258    ///      const CONTACT: &'static CStr = c"rust@localdomain.pl";
259    ///      type ConfigType = ();
260    ///
261    ///      fn new(_input: Option<&TablesInput>, _config: Self::ConfigType) -> Result<Self, Error> {
262    ///          Ok(Self)
263    ///      }
264    /// }
265    ///
266    /// impl SampleExtractPlugin {
267    ///     fn extract_sample(
268    ///         &mut self,
269    ///         _req: ExtractRequest<Self>,
270    ///     ) -> Result<u64, Error> {
271    ///         Ok(10u64)
272    ///     }
273    ///
274    ///     fn extract_arg(
275    ///         &mut self,
276    ///         _req: ExtractRequest<Self>,
277    ///         arg: u64,
278    ///     ) -> Result<u64, Error> {
279    ///         Ok(arg)
280    ///     }
281    /// }
282    ///
283    /// impl ExtractPlugin for SampleExtractPlugin {
284    ///     const EVENT_TYPES: &'static [EventType] = &[PLUGINEVENT_E];
285    ///     const EVENT_SOURCES: &'static [&'static str] = &["dummy"];
286    ///     type ExtractContext = ();
287    ///     const EXTRACT_FIELDS: &'static [ExtractFieldInfo<Self>] = &[
288    ///         field("sample.always_10", &Self::extract_sample),
289    ///         field("sample.arg", &Self::extract_arg)
290    ///     ];
291    /// }
292    ///
293    ///# plugin!(SampleExtractPlugin);
294    ///# extract_plugin!(SampleExtractPlugin);
295    /// ```
296    const EXTRACT_FIELDS: &'static [ExtractFieldInfo<Self>];
297
298    /// Generate the field schema for the Falco plugin framework
299    ///
300    /// The default implementation inspects all fields from [`Self::EXTRACT_FIELDS`] and generates
301    /// a JSON description in the format expected by the framework.
302    ///
303    /// You probably won't need to provide your own implementation.
304    fn get_fields() -> &'static CStr {
305        static FIELD_SCHEMA: Mutex<BTreeMap<TypeId, CString>> = Mutex::new(BTreeMap::new());
306
307        let ty = TypeId::of::<Self>();
308        let mut schema_map = FIELD_SCHEMA.lock().unwrap();
309        // Safety:
310        //
311        // we only generate the string once and never change or delete it
312        // so the pointer should remain valid for the static lifetime
313        // hence the dance of converting a reference to a raw pointer and back
314        // to erase the lifetime
315        unsafe {
316            CStr::from_ptr(
317                schema_map
318                    .entry(ty)
319                    .or_insert_with(|| {
320                        let schema = serde_json::to_string_pretty(&Self::EXTRACT_FIELDS)
321                            .expect("failed to serialize extraction schema");
322                        CString::new(schema.into_bytes())
323                            .expect("failed to add NUL to extraction schema")
324                    })
325                    .as_ptr(),
326            )
327        }
328    }
329
330    /// Perform the actual field extraction
331    ///
332    /// The default implementation creates an empty context and loops over all extraction
333    /// requests, invoking the relevant function to actually generate the field value.
334    ///
335    /// You probably won't need to provide your own implementation.
336    fn extract_fields<'a>(
337        &'a mut self,
338        event_input: &EventInput,
339        table_reader: &LazyTableReader,
340        fields: &mut [ss_plugin_extract_field],
341        offsets: Option<&mut ss_plugin_extract_value_offsets>,
342        storage: &'a bumpalo::Bump,
343    ) -> Result<(), anyhow::Error> {
344        let mut context = Self::ExtractContext::default();
345
346        let (mut offset_vec, mut length_vec) = if offsets.is_some() {
347            (
348                Some(bumpalo::collections::Vec::with_capacity_in(
349                    fields.len(),
350                    storage,
351                )),
352                Some(bumpalo::collections::Vec::with_capacity_in(
353                    fields.len(),
354                    storage,
355                )),
356            )
357        } else {
358            (None, None)
359        };
360
361        let mut any_offsets = false;
362
363        for req in fields {
364            let info = Self::EXTRACT_FIELDS
365                .get(req.field_id as usize)
366                .ok_or_else(|| anyhow::anyhow!("field index out of bounds"))?;
367
368            let mut offset = if offsets.is_some() {
369                ExtractByteRange::Requested
370            } else {
371                ExtractByteRange::NotRequested
372            };
373
374            let request = ExtractRequest::<Self> {
375                context: &mut context,
376                event: event_input,
377                table_reader,
378                offset: &mut offset,
379            };
380
381            info.func.call(self, req, request, storage)?;
382
383            if let (Some(offsets_vec), Some(lengths_vec)) =
384                (offset_vec.as_mut(), length_vec.as_mut())
385            {
386                let range = match offset {
387                    ExtractByteRange::Found(range) => {
388                        any_offsets = true;
389                        range
390                    }
391                    _ => INVALID_RANGE,
392                };
393                offsets_vec.push(range.start as u32);
394                lengths_vec.push(range.end.wrapping_sub(range.start) as u32);
395            }
396        }
397
398        fn pointer_to_vec<T>(v: &Option<bumpalo::collections::Vec<T>>) -> *mut T {
399            match v {
400                None => std::ptr::null_mut(),
401                Some(v) => v.as_ptr().cast_mut(),
402            }
403        }
404
405        if let Some(offsets) = offsets {
406            if any_offsets {
407                offsets.start = pointer_to_vec(&offset_vec);
408                offsets.length = pointer_to_vec(&length_vec);
409            }
410        }
411
412        Ok(())
413    }
414}