This repository was archived by the owner on Dec 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.php
251 lines (232 loc) · 11.9 KB
/
main.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
<?php
/**
* Fetches DMARC aggregate reports using IMAP and indexes each record using
* Elastic Search.
*
* @copyright Copyright 2018 Clay Freeman. All rights reserved.
* @license GNU Lesser General Public License v3 (LGPL-3.0).
*/
////////////////////////////////////////////////////////////////////////////
///////////////////// SECTION 0: Runtime Configuration /////////////////////
////////////////////////////////////////////////////////////////////////////
// Define a regular expression to filter compatible attachment filenames
define('FILENAME_REGEX', '/^(.+?)\\.'.
'(?P<extension>(?:(?:xml|zip|tar|gz|bz2))'.
'(?:\\.(?:gz|bz2))?)$/i');
// Load the Composer dependency autoload file
require_once(implode(DIRECTORY_SEPARATOR,
[__DIR__, 'vendor', 'autoload.php']));
// Load the configuration file for this script
require_once(implode(DIRECTORY_SEPARATOR,
[__DIR__, 'config.php']));
////////////////////////////////////////////////////////////////////////////
//////////////////////// SECTION 1: IMAP Connection ////////////////////////
////////////////////////////////////////////////////////////////////////////
// Attempt to connect to the IMAP server using the configured details
$imap = imap_open($GLOBALS['imap_server'], $GLOBALS['imap_username'],
$GLOBALS['imap_password'], OP_HALFOPEN);
// Determine if the connection attempt was successful
if (!is_resource($imap)) {
throw new \Exception('Could not connect to IMAP server. Are the '.
'configured credentials correct?');
}
// Determine whether we should list available mailboxes or open a mailbox
$mailbox = $GLOBALS['imap_mailbox'] ?? '';
if (is_string($mailbox) && strlen($mailbox) > 0) {
// Attempt to re-open the IMAP connection to the specified mailbox
if (!imap_reopen($imap, $GLOBALS['imap_server'].$mailbox)) {
throw new \Exception('Could not switch mailbox. Does the configured '.
'mailbox exist?');
}
} else {
// Attempt to fetch a list of mailboxes on the remote server
$mailboxes = imap_list($imap, $GLOBALS['imap_server'], '*');
if (!is_array($mailboxes)) {
throw new \Exception('Could not fetch a list of mailboxes.');
}
// Print the list of mailboxes
echo "Available mailboxes:\r\n";
foreach ($mailboxes as $mailbox) {
echo ' - '.var_export(substr($mailbox,
strlen($GLOBALS['imap_server'])), true)."\r\n";
} exit(0);
}
////////////////////////////////////////////////////////////////////////////
///////////////////////// SECTION 2: Message Query /////////////////////////
////////////////////////////////////////////////////////////////////////////
// Build an IMAP search string based on the configuration
$search = implode(' ', array_filter([ 'ALL',
// Determine whether we should fetch all messages or only unseen
($GLOBALS['imap_flag_unseen_only'] ? 'UNSEEN' : false),
// Determine if we should limit messages by recipient
($GLOBALS['imap_filter_recipient'] ? 'TO "'.str_replace('"', null,
$GLOBALS['imap_filter_recipient']).'"' : false)
]));
// Perform the IMAP search so that the results can be processed
$messages = imap_search($imap, $search, SE_UID, 'UTF-8') ?: [];
////////////////////////////////////////////////////////////////////////////
///////////////////// SECTION 3: Attachment Extraction /////////////////////
////////////////////////////////////////////////////////////////////////////
// Construct an instance of the MIME message parser
$parser = new \ZBateson\MailMimeParser\MailMimeParser();
// Fetch and parse each individual message for applicable attachments
$atts = array_merge([], ...array_map(function($id) use (&$imap, &$parser) {
// Fetch and parse the raw message source using the provided UID
$msg = $parser->parse(imap_fetchbody($imap, $id, '', FT_UID));
// Merge each attachment descriptor array into a single associative array
return array_filter(array_map(function($att) {
// Use the attachment file name as the index for this array
$name = $att->getHeaderParameter('Content-Disposition', 'filename', '');
// Fetch the content of the attachment for the value of the entry
$content = $att->getContent();
// Fetch information about this attachment from its file name
@preg_match(FILENAME_REGEX, $name, $info); $info = $info ?? [];
// Process the information about the attachment
$info = (object)array_filter($info, 'is_string', ARRAY_FILTER_USE_KEY);
// Return an attachment descriptor fully describing the file
return (object)['content' => $content, 'info' => $info, 'name' => $name];
// Filter each attachment descriptor by file name for compatibility
}, $msg->getAllAttachmentParts()), function($att) use (&$id) {
// Check if the attachment name conforms to the RFC (relaxed)
$exists = property_exists($att->info, 'extension');
// Log a message if this attachment is being filtered
if (!$exists) {
unset($att->content);
trigger_error('Skipping attachment: '.var_export($att, true).' '.
'Message ID: '.$id.' Reason: File extension not found.');
}
return $exists;
});
}, $messages));
////////////////////////////////////////////////////////////////////////////
///////////////////// SECTION 4: Attachment Processing /////////////////////
////////////////////////////////////////////////////////////////////////////
// Reduce the array of attachments to an array of records
$records = array_merge([], ...array_filter(array_map(function($att) {
// Force the file's extension to lowercase (for reliability)
$att->info->extension = strtolower($att->info->extension);
// Determine whether this attachment requires decompression
if ($att->info->extension !== 'xml') {
// Generate a random file name for temporary use
$path = implode(DIRECTORY_SEPARATOR, [sys_get_temp_dir(),
bin2hex(random_bytes(16)).'.'.$att->info->extension]);
// Write the current attachment content to the temporary file
file_put_contents($path, $att->content);
// Attempt to open the temporary file with UnifiedArchive
$archive = \wapmorgan\UnifiedArchive\UnifiedArchive::open($path);
// Ensure that a valid instance of UnifiedArchive was returned
if (count($files = $archive->getFileNames()) > 0 &&
// Find the path of the first XML member in this archive
($xml = array_slice(array_filter($files, function($path) {
return preg_match('/\\.xml$/i', $path);
}), 0, 1)[0] ?? null) !== null) {
// Assign the contents of the first XML file
$att->content = $archive->getFileContent($xml);
}
// Remove the temporary file from disk
unlink($path);
}
try {
// Augment the content by parsing it with SimpleXMLElement
$att->content = new \SimpleXMLElement($att->content);
// Create some local shortcuts to long hierarchies
$metadata = $att->content->report_metadata ?: new stdClass;
$date = $att->content->report_metadata->date_range ?: new stdClass;
$policy = $att->content->policy_published ?: new stdClass;
// Assemble a base array from generic report information
$base = [
'org_name' => (string)$metadata->org_name ?: null,
'email' => (string)$metadata->email ?: null,
'extra_contact_info' => (string)$metadata->extra_contact_info ?: null,
'report_id' => (string)$metadata->report_id ?: null,
'begin_timestamp' => (int) $date->begin * 1000 ?: null,
'end_timestamp' => (int) $date->end * 1000 ?: null,
'domain' => (string)$policy->domain ?: null,
'adkim' => (string)$policy->adkim ?: null,
'aspf' => (string)$policy->aspf ?: null,
'p' => (string)$policy->p ?: null,
'sp' => (string)$policy->sp ?: null,
'pct' => (string)$policy->pct ?: null,
];
// Iterate over each row in the record
$results = [];
foreach ($att->content->record as $item) {
// Create some local shortcuts to long hierarchies
$row = $item->row;
$policy_evaluated = $row ->policy_evaluated;
$identifiers = $item->identifiers;
$auth_results = $item->auth_results;
// Merge the following record array with the base array
$results[] = array_merge($base, [
'source_ip' => (string)$row->source_ip ?: null,
'count' => (int) $row->count ?: null,
'disposition' => (string)$policy_evaluated->disposition ?: null,
'dkim' => (string)$policy_evaluated->dkim ?: null,
'spf' => (string)$policy_evaluated->spf ?: null,
'reason' => (string)$policy_evaluated->reason ?: null,
'envelope_to' => (string)$identifiers->envelope_to ?: null,
'envelope_from' => (string)$identifiers->envelope_from ?: null,
'header_from' => (string)$identifiers->header_from ?: null,
'auth_results' => $auth_results ?: []
]);
}
// Return the record result set for this attachment
return $results;
} catch (\Exception $e) {}
// If we've reached this point, we're skipping this attachment
unset($att->content);
trigger_error('Skipping attachment: '.json_encode($att));
return false;
}, $atts)));
////////////////////////////////////////////////////////////////////////////
///////////////////////// SECTION 5: Index Records /////////////////////////
////////////////////////////////////////////////////////////////////////////
// Build an instance used to interface with Elastic Search over cURL
$client = \Elasticsearch\ClientBuilder::create()->build();
// Create a list of indices based on the end timestamp of each record
$dates = array_unique(array_map(function($record) {
return 'dmarc-'.date('Y.m.d', $record['end_timestamp'] / 1000);
}, $records));
// Define a list of columns separated by column type for building a schema
$schema = [
'keyword' => ['adkim', 'aspf', 'p', 'sp', 'disposition', 'dkim', 'spf'],
'long' => ['pct', 'count'],
'date' => ['begin_timestamp', 'end_timestamp'],
'ip' => ['source_ip'],
'object' => ['auth_results']
];
// Map each column to a type-specific schema subset
foreach ($schema as $type => &$columns) {
// Create the schema subset for this type by specifying the column type
$columns = array_merge([], ...array_map(function($column) use ($type) {
return [$column => ['type' => $type]];
}, $columns));
}
// Merge the schema subsets into the final schema
$schema = array_merge([], ...array_values($schema));
// Iterate over the list of indices that should exist before indexing records
foreach ($dates as $date) {
try {
// Ensure that the required index is not yet defined before defining it
$client->indices()->getMapping(['index' => $date, 'type' => 'doc']);
} catch (\Exception $e) {
// Create the required index using the pre-build schema
$client->indices()->create([
'index' => $date,
'body' => ['mappings' => ['doc' => ['properties' => $schema]]]
]);
}
}
// Iterate over each record to be indexed in Elastic Search
foreach ($records as $record) {
try {
// Attempt to index the record
$response = $client->index([
'index' => 'dmarc-'.date('Y.m.d', $record['end_timestamp'] / 1000),
'type' => 'doc',
'body' => $record
]);
} catch (\Exception $e) {
trigger_error('Unable to index record: '.var_export($record, true));
}
}