falco_plugin/plugin/extract/mod.rs
1use crate::extract::EventInput;
2use crate::plugin::base::Plugin;
3use crate::plugin::extract::schema::ExtractFieldInfo;
4use crate::plugin::extract::wrappers::ExtractPluginExported;
5use crate::tables::LazyTableReader;
6use falco_event::events::{AnyEventPayload, RawEvent};
7use falco_plugin_api::{ss_plugin_extract_field, ss_plugin_extract_value_offsets};
8use std::any::TypeId;
9use std::collections::BTreeMap;
10use std::ffi::{CStr, CString};
11use std::ops::Range;
12use std::sync::Mutex;
13
14mod extractor_fn;
15pub mod fields;
16pub mod schema;
17#[doc(hidden)]
18pub mod wrappers;
19
20/// The actual argument passed to the extractor function
21///
22/// It is validated based on the [`ExtractFieldInfo`] definition (use [`ExtractFieldInfo::with_arg`]
23/// to specify the expected argument type).
24///
25/// **Note**: this type describes the actual argument in a particular invocation.
26/// For describing the type of arguments the extractor accepts, please see [`ExtractArgType`]`
27#[derive(Debug, Clone, Eq, PartialEq)]
28pub enum ExtractFieldRequestArg<'a> {
29 /// no argument, the extractor was invoked as plain `field_name`
30 None,
31 /// an integer argument, the extractor was invoked as e.g. `field_name[1]`
32 Int(u64),
33 /// a string argument, the extractor was invoked as e.g. `field_name[foo]`
34 String(&'a CStr),
35}
36
37pub trait ExtractField {
38 unsafe fn key_unchecked(&self) -> ExtractFieldRequestArg<'_>;
39}
40
41impl ExtractField for ss_plugin_extract_field {
42 unsafe fn key_unchecked(&self) -> ExtractFieldRequestArg<'_> {
43 if self.arg_present == 0 {
44 return ExtractFieldRequestArg::None;
45 }
46
47 if self.arg_key.is_null() {
48 return ExtractFieldRequestArg::Int(self.arg_index);
49 }
50
51 unsafe { ExtractFieldRequestArg::String(CStr::from_ptr(self.arg_key)) }
52 }
53}
54
55/// An invalid range (not supported)
56///
57/// This is used when an extractor that does not support ranges is used together with extractors
58/// that do, and range extraction is requested. Due to the design of the Falco plugin API,
59/// there must be a range for all the fields (or none of them), so we fill out the missing ranges
60/// with this value.
61///
62/// **Note**: you should not use this value in plugins yourself. If an extractor returns data that is
63/// not corresponding to any particular byte offset in the plugin payload, it should set the range
64/// to [`UNSPECIFIED_RANGE`].
65#[allow(clippy::reversed_empty_ranges)]
66pub const INVALID_RANGE: Range<usize> = 1..0;
67
68/// An unspecified range (computed data)
69///
70/// Use this range to indicate that the extracted value does not correspond to any specific
71/// byte range in the event (for example, it was calculated based on the event data).
72pub const UNSPECIFIED_RANGE: Range<usize> = 0..0;
73
74/// The offset in the event where a plugin event payload starts
75///
76/// Since the event payload is at a fixed offset, you can add this value
77/// to the start of an extracted field within the payload to get the offset
78/// from the start of the event.
79///
80/// 26 bytes for the event header, plus 2*4 bytes for the parameter lengths,
81/// plus 4 bytes for the plugin ID.
82const PLUGIN_EVENT_PAYLOAD_OFFSET: usize = 38;
83
84/// Range extraction request/response
85#[derive(Debug, Eq, PartialEq)]
86pub enum ExtractByteRange {
87 /// Range extraction was not requested
88 NotRequested,
89
90 /// Range extraction was requested but not performed
91 ///
92 /// This value is set upon entry to the extractor function. The function may replace the value
93 /// with [`ExtractByteRange::Found`] if it supports finding byte ranges. If the extractor does
94 /// not support byte ranges, it can ignore this value completely and leave it unchanged.
95 Requested,
96
97 /// Range extraction finished successfully
98 ///
99 /// Note that for fields extracted from the plugin event data field, you will probably want
100 /// to construct this value using [`ExtractByteRange::in_plugin_data`].
101 Found(Range<usize>),
102}
103
104impl ExtractByteRange {
105 /// Create a range pointing into a plugin event data field
106 ///
107 /// This is a helper for the common case of returning offsets inside the data field
108 /// of a plugin event. It simply shifts the provided range by 38 bytes (26 header bytes,
109 /// 2*4 length bytes, 4 plugin id bytes) to make the resulting range relative to the full
110 /// event buffer.
111 pub fn in_plugin_data(range: Range<usize>) -> Self {
112 Self::Found(
113 PLUGIN_EVENT_PAYLOAD_OFFSET + range.start..PLUGIN_EVENT_PAYLOAD_OFFSET + range.end,
114 )
115 }
116}
117
118/// An extraction request
119#[derive(Debug)]
120pub struct ExtractRequest<'c, 'e, 'r, 't, P: ExtractPlugin> {
121 /// A context instance, potentially shared between extractions
122 pub context: &'c mut P::ExtractContext,
123
124 /// The event being processed
125 pub event: &'e EventInput<'r, P::Event<'r>>,
126
127 /// An interface to access tables exposed from Falco core and other plugins
128 ///
129 /// See [`crate::tables`] for details.
130 pub table_reader: &'t LazyTableReader<'t>,
131
132 /// Offset of extracted data in event payload
133 ///
134 /// If set to [`ExtractByteRange::Requested`], and the plugin supports it, replace this
135 /// with a [`ExtractByteRange::Found`] containing the byte range containing the extracted value,
136 /// *within the whole event buffer*. In the typical case of a range inside the plugin event
137 /// data, you can use the [`ExtractByteRange::in_plugin_data`] helper.
138 ///
139 /// If the data is computed (not directly coming from any byte range in the event), use
140 /// [`UNSPECIFIED_RANGE`] instead.
141 ///
142 /// **Note**: range support is optional, and this field can be ignored.
143 pub offset: &'c mut ExtractByteRange,
144}
145
146/// Support for field extraction plugins
147pub trait ExtractPlugin: Plugin + ExtractPluginExported + Sized
148where
149 Self: 'static,
150{
151 /// # Event type to perform extractions on
152 ///
153 /// Events will be parsed into this type before being passed to the plugin, so you can
154 /// work directly on the deserialized form and don't need to worry about validating
155 /// the events.
156 ///
157 /// If an event fails this conversion, an error will be returned from [`EventInput::event`],
158 /// which you can propagate directly to the caller.
159 ///
160 /// If you don't want any specific validation/conversion to be performed, specify the type as
161 /// ```
162 /// type Event<'a> = falco_event::events::RawEvent<'a>;
163 /// ```
164 type Event<'a>: AnyEventPayload + TryFrom<&'a RawEvent<'a>>;
165
166 /// The extraction context
167 ///
168 /// It might be useful if your plugin supports multiple fields, and they all share some common
169 /// preprocessing steps. Instead of redoing the preprocessing for each field, intermediate
170 /// results can be stored in the context for subsequent extractions (from the same event).
171 ///
172 /// If you do not need a context to share between extracting fields of the same event, use `()`
173 /// as the type.
174 ///
175 /// Since the context is created using the [`Default`] trait, you may prefer to use an Option
176 /// wrapping the actual context type:
177 ///
178 /// ```ignore
179 /// impl ExtractPlugin for MyPlugin {
180 /// type ExtractContext = Option<ActualContext>;
181 /// // ...
182 /// }
183 ///
184 /// impl MyPlugin {
185 /// fn make_context(&mut self, ...) -> ActualContext { /* ... */ }
186 ///
187 /// fn extract_field_one(
188 /// &mut self,
189 /// req: ExtractContext<Self>) -> ... {
190 /// let context = req.context.get_or_insert_with(|| self.make_context(...));
191 ///
192 /// // use context
193 /// }
194 /// }
195 /// ```
196 type ExtractContext: Default + 'static;
197
198 /// The actual list of extractable fields
199 ///
200 /// An extraction method is a method with the following signature:
201 /// ```ignore
202 /// use anyhow::Error;
203 /// use falco_plugin::extract::{EventInput, ExtractFieldRequestArg, ExtractRequest};
204 /// use falco_plugin::tables::TableReader;
205 ///
206 /// fn extract_sample(
207 /// &mut self,
208 /// req: ExtractRequest<Self>,
209 /// arg: A, // optional
210 /// ) -> Result<R, Error>;
211 ///
212 /// ```
213 /// where `R` is one of the following types or a [`Vec`] of them:
214 /// - [`u64`]
215 /// - [`bool`]
216 /// - [`CString`]
217 /// - [`std::time::SystemTime`]
218 /// - [`std::time::Duration`]
219 /// - [`std::net::IpAddr`]
220 /// - [`falco_event::types::IpNet`]
221 ///
222 /// and `A` is the argument to the field extraction:
223 ///
224 /// | Argument declaration | `field` lookup | `field[5]` lookup | `field[foo]` lookup |
225 /// |----------------------|----------------|-------------------|---------------------|
226 /// | _missing_ | valid | - | - |
227 /// | `arg: u64` | - | valid | - |
228 /// | `arg: Option<u64>` | valid | valid | - |
229 /// | `arg: &CStr` | - | - | valid |
230 /// | `arg: Option<&CStr>` | valid | - | valid |
231 ///
232 /// `req` is the extraction request ([`ExtractRequest`]), containing the context in which
233 /// the plugin is doing the work.
234 ///
235 /// To register extracted fields, add them to the [`ExtractPlugin::EXTRACT_FIELDS`] array, wrapped via [`crate::extract::field`]:
236 /// ```
237 /// use std::ffi::CStr;
238 /// use falco_event::events::RawEvent;
239 /// use falco_plugin::anyhow::Error;
240 /// use falco_plugin::base::Plugin;
241 /// use falco_plugin::extract::{
242 /// field,
243 /// ExtractFieldInfo,
244 /// ExtractPlugin,
245 /// ExtractRequest};
246 ///# use falco_plugin::{extract_plugin, plugin};
247 /// use falco_plugin::tables::TablesInput;
248 ///
249 /// struct SampleExtractPlugin;
250 ///
251 /// impl Plugin for SampleExtractPlugin {
252 /// const NAME: &'static CStr = c"dummy";
253 /// const PLUGIN_VERSION: &'static CStr = c"0.0.0";
254 /// const DESCRIPTION: &'static CStr = c"test plugin";
255 /// const CONTACT: &'static CStr = c"rust@localdomain.pl";
256 /// type ConfigType = ();
257 ///
258 /// fn new(_input: Option<&TablesInput>, _config: Self::ConfigType) -> Result<Self, Error> {
259 /// Ok(Self)
260 /// }
261 /// }
262 ///
263 /// impl SampleExtractPlugin {
264 /// fn extract_sample(
265 /// &mut self,
266 /// _req: ExtractRequest<Self>,
267 /// ) -> Result<u64, Error> {
268 /// Ok(10u64)
269 /// }
270 ///
271 /// fn extract_arg(
272 /// &mut self,
273 /// _req: ExtractRequest<Self>,
274 /// arg: u64,
275 /// ) -> Result<u64, Error> {
276 /// Ok(arg)
277 /// }
278 /// }
279 ///
280 /// impl ExtractPlugin for SampleExtractPlugin {
281 /// type ExtractContext = ();
282 /// type Event<'a> = RawEvent<'a>;
283 /// const EXTRACT_FIELDS: &'static [ExtractFieldInfo<Self>] = &[
284 /// field("sample.always_10", &Self::extract_sample),
285 /// field("sample.arg", &Self::extract_arg)
286 /// ];
287 /// }
288 ///
289 ///# plugin!(SampleExtractPlugin);
290 ///# extract_plugin!(SampleExtractPlugin);
291 /// ```
292 const EXTRACT_FIELDS: &'static [ExtractFieldInfo<Self>];
293
294 /// Generate the field schema for the Falco plugin framework
295 ///
296 /// The default implementation inspects all fields from [`Self::EXTRACT_FIELDS`] and generates
297 /// a JSON description in the format expected by the framework.
298 ///
299 /// You probably won't need to provide your own implementation.
300 fn get_fields() -> &'static CStr {
301 static FIELD_SCHEMA: Mutex<BTreeMap<TypeId, CString>> = Mutex::new(BTreeMap::new());
302
303 let ty = TypeId::of::<Self>();
304 let mut schema_map = FIELD_SCHEMA.lock().unwrap();
305 // Safety:
306 //
307 // we only generate the string once and never change or delete it
308 // so the pointer should remain valid for the static lifetime
309 // hence the dance of converting a reference to a raw pointer and back
310 // to erase the lifetime
311 unsafe {
312 CStr::from_ptr(
313 schema_map
314 .entry(ty)
315 .or_insert_with(|| {
316 let schema = serde_json::to_string_pretty(&Self::EXTRACT_FIELDS)
317 .expect("failed to serialize extraction schema");
318 CString::new(schema.into_bytes())
319 .expect("failed to add NUL to extraction schema")
320 })
321 .as_ptr(),
322 )
323 }
324 }
325
326 /// Perform the actual field extraction
327 ///
328 /// The default implementation creates an empty context and loops over all extraction
329 /// requests, invoking the relevant function to actually generate the field value.
330 ///
331 /// You probably won't need to provide your own implementation.
332 fn extract_fields<'a>(
333 &'a mut self,
334 event_input: &EventInput<'a, Self::Event<'a>>,
335 table_reader: &LazyTableReader,
336 fields: &mut [ss_plugin_extract_field],
337 offsets: Option<&mut ss_plugin_extract_value_offsets>,
338 storage: &'a bumpalo::Bump,
339 ) -> Result<(), anyhow::Error> {
340 let mut context = Self::ExtractContext::default();
341
342 let (mut offset_vec, mut length_vec) = if offsets.is_some() {
343 (
344 Some(bumpalo::collections::Vec::with_capacity_in(
345 fields.len(),
346 storage,
347 )),
348 Some(bumpalo::collections::Vec::with_capacity_in(
349 fields.len(),
350 storage,
351 )),
352 )
353 } else {
354 (None, None)
355 };
356
357 let mut any_offsets = false;
358
359 for req in fields {
360 let info = Self::EXTRACT_FIELDS
361 .get(req.field_id as usize)
362 .ok_or_else(|| anyhow::anyhow!("field index out of bounds"))?;
363
364 let mut offset = if offsets.is_some() {
365 ExtractByteRange::Requested
366 } else {
367 ExtractByteRange::NotRequested
368 };
369
370 let request = ExtractRequest::<Self> {
371 context: &mut context,
372 event: event_input,
373 table_reader,
374 offset: &mut offset,
375 };
376
377 info.func.call(self, req, request, storage)?;
378
379 if let (Some(offsets_vec), Some(lengths_vec)) =
380 (offset_vec.as_mut(), length_vec.as_mut())
381 {
382 let range = match offset {
383 ExtractByteRange::Found(range) => {
384 any_offsets = true;
385 range
386 }
387 _ => INVALID_RANGE,
388 };
389 offsets_vec.push(range.start as u32);
390 lengths_vec.push(range.end.wrapping_sub(range.start) as u32);
391 }
392 }
393
394 fn pointer_to_vec<T>(v: &Option<bumpalo::collections::Vec<T>>) -> *mut T {
395 match v {
396 None => std::ptr::null_mut(),
397 Some(v) => v.as_ptr().cast_mut(),
398 }
399 }
400
401 if let Some(offsets) = offsets {
402 if any_offsets {
403 offsets.start = pointer_to_vec(&offset_vec);
404 offsets.length = pointer_to_vec(&length_vec);
405 }
406 }
407
408 Ok(())
409 }
410}