|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +from dateutil.parser import parse |
| 4 | + |
| 5 | +from intelmq.lib.bot import ParserBot |
| 6 | +from intelmq.lib.harmonization import DateTime |
| 7 | +from intelmq.lib.message import Message |
| 8 | +from intelmq.lib.utils import base64_decode |
| 9 | +from intelmq.lib.harmonization import DateTime |
| 10 | + |
| 11 | + |
| 12 | +class JSONCustomParserBot(ParserBot): |
| 13 | + |
| 14 | + def init(self): |
| 15 | + self.time_format = getattr(self.parameters, "time_format", None) |
| 16 | + if self.time_format and self.time_format.split('|')[0] not in DateTime.TIME_CONVERSIONS.keys(): |
| 17 | + raise InvalidArgument('time_format', got=self.time_format, |
| 18 | + expected=list(DateTime.TIME_CONVERSIONS.keys()), |
| 19 | + docs='https://intelmq.readthedocs.io/en/latest/user/Bots.html#json-custom-parser') |
| 20 | + |
| 21 | + self.json_data_format = getattr(self.parameters, 'json_data_format', False) |
| 22 | + self.json_data_key = getattr(self.parameters, 'json_data_key', '') |
| 23 | + self.multiple_msg_field = getattr(self.parameters, 'multiple_msg_field', None) |
| 24 | + self.translate_fields = getattr(self.parameters, 'translate_fields', {}) |
| 25 | + self.split_lines = getattr(self.parameters, 'splitlines', False) |
| 26 | + self.default_url_protocol = getattr(self.parameters, 'default_url_protocol', 'http://') |
| 27 | + self.classification_type = getattr(self.parameters, 'type') |
| 28 | + |
| 29 | + def flatten_json(self, json_object): |
| 30 | + out = {} |
| 31 | + |
| 32 | + def flatten(x, name='', separator='.'): |
| 33 | + if type(x) is dict: |
| 34 | + for a in x: |
| 35 | + flatten(x[a], name + a + separator) |
| 36 | + else: |
| 37 | + out[name[:-1]] = x |
| 38 | + |
| 39 | + flatten(json_object) |
| 40 | + return out |
| 41 | + |
| 42 | + def process(self): |
| 43 | + |
| 44 | + report = self.receive_message() |
| 45 | + raw_report = base64_decode(report["raw"]) |
| 46 | + |
| 47 | + if self.json_data_format: |
| 48 | + lines = Message.unserialize(raw_report)[self.json_data_key] |
| 49 | + elif self.split_lines: |
| 50 | + lines = raw_report.splitlines() |
| 51 | + else: |
| 52 | + lines = [raw_report] |
| 53 | + |
| 54 | + for line in lines: |
| 55 | + if not line: |
| 56 | + continue |
| 57 | + |
| 58 | + msg = Message.unserialize(line) if not self.json_data_format else line |
| 59 | + flatten_msg = self.flatten_json(msg) |
| 60 | + event_msg = {} |
| 61 | + |
| 62 | + for key in self.translate_fields: |
| 63 | + data = flatten_msg.get(self.translate_fields[key]) |
| 64 | + |
| 65 | + if key in ["time.source", "time.destination"]: |
| 66 | + try: |
| 67 | + data = int(data) |
| 68 | + except ValueError: |
| 69 | + pass |
| 70 | + data = DateTime.convert(data, format=self.time_format) |
| 71 | + |
| 72 | + elif key.endswith('.url'): |
| 73 | + if not data: |
| 74 | + continue |
| 75 | + if '://' not in data: |
| 76 | + data = self.default_url_protocol + data |
| 77 | + |
| 78 | + event_msg[key] = data |
| 79 | + |
| 80 | + multiple_msgs = [] |
| 81 | + if self.multiple_msg_field in event_msg and type(event_msg[self.multiple_msg_field]) is list: |
| 82 | + for value in event_msg[self.multiple_msg_field]: |
| 83 | + new_msg = event_msg.copy() |
| 84 | + new_msg[self.multiple_msg_field] = value |
| 85 | + multiple_msgs.append(new_msg) |
| 86 | + else: |
| 87 | + multiple_msgs = [event_msg] |
| 88 | + |
| 89 | + for event_msg in multiple_msgs: |
| 90 | + event = self.new_event(report) |
| 91 | + event.update(event_msg) |
| 92 | + |
| 93 | + if self.classification_type and "classification.type" not in event: |
| 94 | + event.add('classification.type', self.classification_type) |
| 95 | + event['raw'] = Message.serialize(line) if self.json_data_format else line |
| 96 | + |
| 97 | + self.send_message(event) |
| 98 | + |
| 99 | + self.acknowledge_message() |
| 100 | + |
| 101 | + |
| 102 | +BOT = JSONCustomParserBot |
0 commit comments