12
12
use HelgeSverre \Extractor \Text \Loaders \Web ;
13
13
use HelgeSverre \Extractor \Text \Loaders \Word ;
14
14
use Illuminate \Contracts \Container \Container ;
15
+ use Illuminate \Support \Str ;
15
16
use Illuminate \Support \Traits \Macroable ;
16
17
use InvalidArgumentException ;
17
18
@@ -38,6 +39,39 @@ public function create(string $type): TextLoader
38
39
};
39
40
}
40
41
42
+ public function fromMime (string $ mime , mixed $ content ): ?TextContent
43
+ {
44
+ return match (true ) {
45
+ blank ($ content ) => null ,
46
+ Str::contains ($ mime , 'image ' ) => rescue (
47
+ callback: fn () => $ this ->textract ($ content ),
48
+ rescue: $ this ->textractUsingS3Upload ($ content )
49
+ ),
50
+ Str::contains ($ mime , 'pdf ' ) => rescue (
51
+ callback: fn () => $ this ->pdf ($ content ),
52
+ rescue: $ this ->textractUsingS3Upload ($ content )
53
+ ),
54
+ Str::contains ($ mime , ['xml ' , 'html ' ]) => $ this ->html ($ content ),
55
+ Str::contains ($ mime , 'text/plain ' ) => $ this ->text ($ content ),
56
+ Str::contains ($ mime , 'text/rtf ' ) => $ this ->rtf ($ content ),
57
+
58
+ // Not commonly used, but let's use it anyways.
59
+ Str::contains ($ mime , 'text/x-uri ' ) => $ this ->web ($ content ),
60
+
61
+ // Stolen from: https://stackoverflow.com/questions/4212861/what-is-a-correct-mime-type-for-docx-pptx-etc
62
+ in_array ($ mime , [
63
+ 'application/msword ' ,
64
+ 'application/msword ' ,
65
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document ' ,
66
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.template ' ,
67
+ 'application/vnd.ms-word.document.macroEnabled.12 ' ,
68
+ 'application/vnd.ms-word.template.macroEnabled.12 ' ,
69
+ ]) => $ this ->word ($ content ),
70
+
71
+ default => $ this ->textractUsingS3Upload ($ content )
72
+ };
73
+ }
74
+
41
75
// Convenience Methods
42
76
public function html (mixed $ data ): ?TextContent
43
77
{
0 commit comments