Skip to content

Commit

Permalink
Add 'python-parser/' from commit 'd124464b41d4ff0ce96884b3d617afa357d…
Browse files Browse the repository at this point in the history
…af837'

git-subtree-dir: python-parser
git-subtree-mainline: ce0b9ed
git-subtree-split: d124464
  • Loading branch information
Tibor Benke committed Apr 22, 2016
2 parents ce0b9ed + d124464 commit c7819ba
Show file tree
Hide file tree
Showing 12 changed files with 715 additions and 0 deletions.
3 changes: 3 additions & 0 deletions python-parser/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
target
Cargo.lock
*.pyc
17 changes: 17 additions & 0 deletions python-parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "python-parser"
version = "0.2.0"
authors = ["Tibor Benke <[email protected]>"]
build = "build.rs"

[lib]
crate-type = ["rlib", "dylib"]

[dependencies]
syslog-ng-common = "0.8"
cpython = { git = "https://github.com/ihrwein/rust-cpython.git" }
log = "0.3"
env_logger = "0.3"

[build-dependencies]
syslog-ng-build = "0.2"
95 changes: 95 additions & 0 deletions python-parser/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Python parser for syslog-ng

For a real world tested example, check the `_test_module/regex.py` file.

A Python parser is Python class which implements two methods:
* `init(self, options)`: (optional) After the parser instance was created, this method
is called on it. The `options` variable is a dictionary with key-value pairs
* `parse(self, logmsg, message)`: (mandatory) This method is called upon receiving
a new log mesage. The first, `logmsg` parameter is a dictionary-like data-structure
which contains the already parsed key-value pairs. You can fetch the existing ones
or insert new ones with the `__getitem__/__setitem__` methods.

Example:
```python
import re

class RegexParser:
def init(self, options):
pattern = options["regex"]
self.regex = re.compile(pattern)

def parse(self, logmsg, message):
match = self.regex.match(message)
if match is not None:
for key, value in match.groupdict().items():
logmsg[key] = value
return True
else:
return False
```

If an exception is thrown during `init()` is is considered an initialization error and syslog-ng won't be started.

## Configuration


```
@version: 3.8
block parser regex(
regex("")
)
{
regex-rs(
option("regex", `regex`)
);
};
source s_localhost {
network(
ip(
127.0.0.1
),
port(
1514
),
transport("tcp")
);
};
log {
source(
s_localhost
);
parser {
python-rs(
option("module", "_test_module.regex")
option("class", "RegexParser")
option("regex", "seq: (?P<seq>\\d+), thread: (?P<thread>\\d+), runid: (?P<runid>\\d+), stamp: (?P<stamp>[^ ]+) (?P<padding>.*$)")
);
};
destination {
file("/dev/stdout" template("runid=$runid\n"));
};
};
```

Make sure, that you can import the `_test_module.regex` module
from a Python shell. If not, you can add its directory to
the `PYTHONPATH` environment variable:

```
PYTHONPATH=/home/tibi/workspace/python-parser sbin/syslog-ng -Fevd
```

## Compilation

You need a nightly Rust compiler.
Make sure, pkg-config is able to find syslog-ng and `libsyslog-ng.so` is in your
library path.

```
cargo build --release
cp target/release/libpython_parser.so <syslog-ng install prefix>/lib/syslog-ng/
```
46 changes: 46 additions & 0 deletions python-parser/_test_module/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Keep this class commented out
# class NonExistingParser: pass

class ExistingParser: pass

class CallableClass: pass

NotCallableObject = int()

class ClassWithInitMethod:
def init(self, options):
pass

class InitMethodReturnsNotNone:
def init(self, options):
return True

class ParserWithoutInitMethod: pass

class ParserClassWithGoodParseMethod:
def parse(self, logmsg, input):
return True

class ParserWithoutParseMethod: pass

class ParseMethodReturnsNotBoolean:
def parse(self, logmsg, input):
return None

class ParseReturnsTrue:
def parse(self, logmsg, input):
return True

class ParseReturnsFalse:
def parse(self, logmsg, input):
return False

class ExceptionIsRaisedInParseMethod:
def parse(self, logmsg, input):
raise TypeError("text")
return False

class ExceptionIsRaisedInInitMethod:
def init(self, options):
raise TypeError("text")
return True
15 changes: 15 additions & 0 deletions python-parser/_test_module/regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import re

class RegexParser:
def init(self, options):
pattern = options["regex"]
self.regex = re.compile(pattern)

def parse(self, logmsg, message):
match = self.regex.match(message)
if match is not None:
for key, value in match.groupdict().items():
logmsg[key] = value
return True
else:
return False
26 changes: 26 additions & 0 deletions python-parser/benches/parse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#![feature(test)]
extern crate test;
extern crate python_parser;
extern crate syslog_ng_common;
extern crate env_logger;

use std::env;
use test::Bencher;
use syslog_ng_common::{LogMessage, Parser};
use python_parser::utils::build_parser_with_options;

use syslog_ng_common::sys::logmsg::log_msg_registry_init;

#[bench]
fn bench_parse(b: &mut Bencher) {
unsafe {
log_msg_registry_init();
};
let _ = env_logger::init();
env::set_var("PYTHONPATH", env::current_dir().unwrap());
let options = [("regex", r#"seq: (?P<seq>\d+), thread: (?P<thread>\d+), runid: (?P<runid>\d+), stamp: (?P<stamp>[^ ]+) (?P<padding>.*$)"#)];
let message = "seq: 0000000000, thread: 0000, runid: 1456947132, stamp: 2016-03-02T20:32:12 PAD";
let mut parser = build_parser_with_options("_test_module.regex", "RegexParser", &options);
let mut logmsg = LogMessage::new();
b.iter(|| parser.parse(&mut logmsg, message));
}
8 changes: 8 additions & 0 deletions python-parser/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
extern crate syslog_ng_build;

fn main() {
let canonical_name = "python-parser";
let description = "This is a Python parser written in Rust";
let parser_name = "python-rs";
syslog_ng_build::create_module(canonical_name, description, Some(parser_name));
}
180 changes: 180 additions & 0 deletions python-parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#[macro_use]
extern crate syslog_ng_common;
#[macro_use]
extern crate log;
#[macro_use]
extern crate cpython;

pub mod py_logmsg;
pub mod utils;

use std::borrow::Borrow;
use std::marker::PhantomData;

use syslog_ng_common::{LogMessage, Parser, ParserBuilder, OptionError, Pipe, GlobalConfig};
use cpython::{Python, PyDict, NoArgs, PyClone, PyObject, PyResult, PyModule, PyErr, PyString};
use cpython::ObjectProtocol; //for call method
use cpython::exc::TypeError;

pub use py_logmsg::PyLogMessage;

pub mod options {
pub const MODULE: &'static str = "module";
pub const CLASS: &'static str = "class";
}

pub struct PythonParser<P: Pipe> {
parser: PyObject,
_marker: PhantomData<P>
}

impl<P: Pipe> Clone for PythonParser<P> {
fn clone(&self) -> Self {
let gil = Python::acquire_gil();
let py = gil.python(); // obtain `Python` token
PythonParser {parser: self.parser.clone_ref(py), _marker: PhantomData}
}
}

pub struct PythonParserBuilder<P: Pipe> {
module: Option<String>,
class: Option<String>,
options: Vec<(String, String)>,
_marker: PhantomData<P>
}

impl<P: Pipe> PythonParserBuilder<P> {
// Although these functions are very small ones, they are very useful for testing
pub fn load_module<'p>(py: Python<'p>, module_name: &str) -> PyResult<PyModule> {
debug!("Trying to load Python module, module='{}'", module_name);
py.import(module_name)
}
pub fn load_class<'p>(py: Python<'p>, module: &PyModule, class_name: &str) -> PyResult<PyObject> {
debug!("Trying to load Python class, class='{}'", class_name);
module.get(py, class_name)
}
pub fn instantiate_class<'p>(py: Python<'p>, class: &PyObject) -> PyResult<PyObject> {
debug!("Trying to instantiate Python parser");
class.call(py, NoArgs, None)
}
pub fn create_options_dict<'p>(py: Python<'p>, init_options: &[(String, String)]) -> PyResult<PyDict> {
debug!("Instantiating the options dict");
let options = PyDict::new(py);
for &(ref k, ref v) in init_options {
debug!("Adding values to the options dict, key='{}', value='{}'", k, v);
try!(options.set_item(py, k, v));
}
Ok(options)
}
fn call_init<'p>(py: Python<'p>, instance: &PyObject, options: PyDict) -> PyResult<()> {
let init_result = try!(instance.call_method(py, "init", (&options, ), None));
if init_result == Python::None(py) {
Ok(())
} else {
let errmsg = PyString::new(py, "The init() method mustn't return any value");
Err(PyErr::new::<TypeError, PyString>(py, errmsg))
}
}
pub fn initialize_instance<'p>(py: Python<'p>, instance: &PyObject, options: PyDict) -> PyResult<()> {
debug!("Trying to call init() on the Python parser instance");
if try!(instance.hasattr(py, "init")) {
Self::call_init(py, instance, options)
} else {
Ok(())
}
}
pub fn initialize_class<'p>(py: Python<'p>, class: &PyObject, options: &[(String, String)]) -> PyResult<PyObject> {
let parser_instance = try!(Self::instantiate_class(py, &class));
let options = try!(Self::create_options_dict(py, options));
let _ = try!(Self::initialize_instance(py, &parser_instance, options));
Ok(parser_instance)
}

pub fn load_and_init_class<'p>(py: Python<'p>, module_name: &str, class_name: &str, options: &[(String, String)]) -> PyResult<PyObject> {
let module = try!(Self::load_module(py, module_name));
let class = try!(Self::load_class(py, &module, class_name));
Self::initialize_class(py, &class, options)
}
}

impl<P: Pipe> ParserBuilder<P> for PythonParserBuilder<P> {
type Parser = PythonParser<P>;
fn new(_: GlobalConfig) -> Self {
PythonParserBuilder {
module: None,
class: None,
options: Vec::new(),
_marker: PhantomData
}
}
fn option(&mut self, name: String, value: String) {
match name.borrow() {
options::MODULE => { self.module = Some(value); },
options::CLASS => { self.class = Some(value); },
_ => { self.options.push((name, value)); }
}
}
fn build(self) -> Result<Self::Parser, OptionError> {
let gil = Python::acquire_gil();
let py = gil.python(); // obtain `Python` token

match (self.module, self.class) {
(Some(ref module_name), Some(ref class_name)) => {
match PythonParserBuilder::<P>::load_and_init_class(py, module_name, class_name, &self.options) {
Ok(parser_instance) => {
debug!("Python parser successfully initialized, class='{}'", &class_name);
Ok(PythonParser {parser: parser_instance, _marker: PhantomData})
},
Err(error) => {
error!("Failed to create Python parser, class='{}'", class_name);
Err(OptionError::verbatim_error(format!("{:?}", error)))
}
}
},
(ref module, ref class) => {
error!("Missing parameters in Python parser: module={:?}, class={:?}", module, class);
Err(OptionError::missing_required_option("module"))
}
}
}
}

impl<P: Pipe> PythonParser<P> {
pub fn process_parsing<'p>(&mut self, py: Python<'p>, logmsg: PyLogMessage, message: &str) -> PyResult<PyObject> {
debug!("Trying to call parse() method on Python parser");
self.parser.call_method(py, "parse", (logmsg, message), None)
}
pub fn process_parse_result<'p>(py: Python<'p>, result: PyObject) -> PyResult<bool> {
debug!("Trying to check the result of parse()");
result.extract::<bool>(py)
}
pub fn call_parse<'p>(&mut self, py: Python<'p>, logmsg: PyLogMessage, input: &str) -> PyResult<bool> {
let result = try!(self.process_parsing(py, logmsg, input));
PythonParser::<P>::process_parse_result(py, result)
}
}

impl<P: Pipe> Parser<P> for PythonParser<P> {
fn parse(&mut self, _: &mut P, logmsg: &mut LogMessage, input: &str) -> bool {
let gil = Python::acquire_gil();
let py = gil.python();
match PyLogMessage::new(py, logmsg.clone()) {
Ok(pylogmsg) => {
match self.call_parse(py, pylogmsg, input) {
Ok(result) => result,
Err(error) => {
error!("Failed to extract return value of parse() method: {:?}", error);
false
}
}
},
// I didn't find a way to test this case :-(
Err(error) => {
error!("Failed to create PyLogMessage: {:?}", error);
false
}
}
}
}

parser_plugin!(PythonParserBuilder<LogParser>);
Loading

0 comments on commit c7819ba

Please sign in to comment.