diff --git a/docs/_posts/aymanechilah/2023-01-10-general_model_table_detection_v2_en_3_2.md b/docs/_posts/aymanechilah/2023-01-10-general_model_table_detection_v2_en_3_2.md index e716e0f821..6a104475b2 100644 --- a/docs/_posts/aymanechilah/2023-01-10-general_model_table_detection_v2_en_3_2.md +++ b/docs/_posts/aymanechilah/2023-01-10-general_model_table_detection_v2_en_3_2.md @@ -25,7 +25,7 @@ Here it is used the CascadeTabNet general model for table detection inspired by ## Predicted Entities {:.btn-box} - +[Live Demo](https://demo.johnsnowlabs.com/ocr/IMAGE_TABLE_DETECTION_ONLY/){:.button.button-orange.button-orange-trans.co.button-icon} [Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-ocr-workshop/blob/master/jupyter/Cards/SparkOcrImageTableDetection.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} [Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/general_model_table_detection_v2_en_3.3.0_3.0_1623301511401.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden} [Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/ocr/general_model_table_detection_v2_en_3.3.0_3.0_1623301511401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} diff --git a/docs/_posts/aymanechilah/2023-01-10-ocr_small_handwritten_en_2_4.md b/docs/_posts/aymanechilah/2023-01-10-ocr_small_handwritten_en_2_4.md index c753452825..0d9a7f686c 100644 --- a/docs/_posts/aymanechilah/2023-01-10-ocr_small_handwritten_en_2_4.md +++ b/docs/_posts/aymanechilah/2023-01-10-ocr_small_handwritten_en_2_4.md @@ -47,8 +47,7 @@ text_detector = ImageTextDetectorV2 \ .setOutputCol("text_regions") \ .setWithRefiner(True) \ .setSizeThreshold(-1) \ - .setLinkThreshold(0.3) \ - .setWidth(500) + .setLinkThreshol .setWidth(500) # Try "ocr_base_handwritten" for better quality ocr = ImageToTextV2.pretrained("ocr_small_handwritten", "en", "clinical/ocr") \ diff --git a/docs/_posts/aymanechilah/2023-01-10-visualner_keyvalue_10kfilings_en_3_2.md b/docs/_posts/aymanechilah/2023-01-10-visualner_keyvalue_10kfilings_en_3_2.md index eaba18573c..9ae588a524 100644 --- a/docs/_posts/aymanechilah/2023-01-10-visualner_keyvalue_10kfilings_en_3_2.md +++ b/docs/_posts/aymanechilah/2023-01-10-visualner_keyvalue_10kfilings_en_3_2.md @@ -26,7 +26,8 @@ This is a Form Recognition / Key Value extraction model, trained on the summary `KEY`, `VALUE`, `HEADER` {:.btn-box} -[Live Demo](https://nlp.johnsnowlabs.com/demos){:.button.button-orange.button-orange-trans.co.button-icon} +[Live Demo](https://demo.johnsnowlabs.com/finance/VISUALNER_10KFILINGS/){:.button.button-orange.button-orange-trans.co.button-icon} + [Open in Colab](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/finance-nlp/90.2.Financial_Visual_NER.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} [Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/visualner_keyvalue_10kfilings_en_4.0.0_3.2_1663781115795.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden} [Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/ocr/visualner_keyvalue_10kfilings_en_4.0.0_3.2_1663781115795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_en_3_2.md index 086ed8d4da..880145c2ac 100644 --- a/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_en_3_2.md +++ b/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_en_3_2.md @@ -49,7 +49,7 @@ text_detector = ImageTextDetectorV2 \ .setLinkThreshold(0.3) \ .setWidth(500) -ocr = ImageToTextV2Opt.pretrained("ocr_base_handwritten_v2", "en", "clinical/ocr") \ +ocr = ImageToTextV2.pretrained("ocr_base_handwritten_v2", "en", "clinical/ocr") \ .setInputCols(["image", "text_regions"]) \ .setGroupImages(True) \ .setOutputCol("text") \ diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_opt_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_opt_en_3_2.md index 06eb3798f7..63b96b89ba 100644 --- a/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_opt_en_3_2.md +++ b/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_opt_en_3_2.md @@ -50,7 +50,7 @@ text_detector = ImageTextDetectorV2 \ .setLinkThreshold(0.3) \ .setWidth(500) -ocr = ImageToTextV2Opt.pretrained("ocr_base_handwritten_v2_opt", "en", "clinical/ocr") \ +ocr = ImageToTextV2.pretrained("ocr_base_handwritten_v2_opt", "en", "clinical/ocr") \ .setInputCols(["image", "text_regions"]) \ .setGroupImages(True) \ .setOutputCol("text") \ diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_en_3_2.md index a840962511..c4a48e27f9 100644 --- a/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_en_3_2.md +++ b/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_en_3_2.md @@ -50,7 +50,7 @@ text_detector = ImageTextDetectorV2 \ .setLinkThreshold(0.3) \ .setWidth(500) -ocr = ImageToTextV2Opt.pretrained("ocr_base_printed_v2", "en", "clinical/ocr") \ +ocr = ImageToTextV2.pretrained("ocr_base_printed_v2", "en", "clinical/ocr") \ .setInputCols(["image", "text_regions"]) \ .setGroupImages(True) \ .setOutputCol("text") \ diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_opt_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_opt_en_3_2.md index d0c17eeab8..756bf162e5 100644 --- a/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_opt_en_3_2.md +++ b/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_opt_en_3_2.md @@ -49,7 +49,7 @@ text_detector = ImageTextDetectorV2 \ .setLinkThreshold(0.3) \ .setWidth(500) -ocr = ImageToTextV2Opt.pretrained("ocr_base_printed_v2_opt", "en", "clinical/ocr") \ +ocr = ImageToTextV2.pretrained("ocr_base_printed_v2_opt", "en", "clinical/ocr") \ .setInputCols(["image", "text_regions"]) \ .setGroupImages(True) \ .setOutputCol("text") \ diff --git a/docs/_posts/aymanechilah/2023-07-11-dit_base_finetuned_rvlcdip_en_3_2.md b/docs/_posts/aymanechilah/2023-07-11-dit_base_finetuned_rvlcdip_en_3_2.md new file mode 100644 index 0000000000..028d755a69 --- /dev/null +++ b/docs/_posts/aymanechilah/2023-07-11-dit_base_finetuned_rvlcdip_en_3_2.md @@ -0,0 +1,116 @@ +--- +layout: model +title: DiT model pretrained on IIT-CDIP and finetuned on RVL-CDIP for document classification +author: John Snow Labs +name: dit_base_finetuned_rvlcdip +date: 2023-07-11 +tags: [en, licensed] +task: OCR Document Classification +language: en +nav_key: models +edition: Visual NLP 4.0.0 +spark_version: 3.2.1 +supported: true +annotator: VisualDocumentClassifierv3 +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei. DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images. This model was trained for document image classification in the RVL-CDIP dataset (a collection of 400,000 images belonging to one of 16 classes). + +The abstract from the paper is the following: Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). + + +## Predicted Entities + + + +{:.btn-box} +[Live Demo](https://demo.johnsnowlabs.com/ocr/IMAGE_CLASSIFIER/){:.button.button-orange.button-orange-trans.co.button-icon} +[Open in Colab](https://github.com/JohnSnowLabs/spark-ocr-workshop/blob/master/tutorials/Certification_Trainings/5.2.Visual_Document_Classifier_v3.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/dit_base_finetuned_rvlcdip_en_3.3.0_3.0_1654798502586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} + +## How to use + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} + +```python +binary_to_image = BinaryToImage()\ + .setInputCol("content") \ + .setOutputCol("image") \ + .setImageType(ImageType.TYPE_3BYTE_BGR) + +doc_class = VisualDocumentClassifierV3() \ + .pretrained("dit_base_finetuned_rvlcdip", "en", "clinical/ocr") \ + .setInputCols(["image"]) \ + .setOutputCol("label") + +# OCR pipeline +pipeline = PipelineModel(stages=[ + binary_to_image, + doc_class +]) + +test_image_path = pkg_resources.resource_filename('sparkocr', 'resources/ocr/visualdoc/00556614_00556648.tif') +bin_df = spark.read.format("binaryFile").load(test_image_path).limit(50) + +results = pipeline.transform(bin_df).cache() +``` +```scala +val binary_to_image = new BinaryToImage() + .setInputCol("content") + .setOutputCol("image") + .setImageType(ImageType.TYPE_3BYTE_BGR) + +val doc_class = VisualDocumentClassifierV3() + .pretrained("dit_base_finetuned_rvlcdip", "en", "clinical/ocr") + .setInputCols(Array("image")) + .setOutputCol("label") + +# OCR pipeline +val pipeline = new PipelineModel().setStages(Array( + binary_to_image, + doc_class)) + +val test_image_path = pkg_resources.resource_filename("sparkocr", "resources/ocr/visualdoc/00556614_00556648.tif") +val bin_df = spark.read.format("binaryFile").load(test_image_path).limit(50) + +val results = pipeline.transform(bin_df).cache() +``` +
+ +## Example + +### Input: +![Screenshot](/assets/images/examples_ocr/image1.png) + +## Output text +```bash ++-------+ +|label | ++-------+ +|invoice| ++-------+ +``` + + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dit_base_finetuned_rvlcdip| +|Type:|ocr| +|Compatibility:|Visual NLP 4.0.0+| +|License:|Licensed| +|Edition:|Official| +|Language:|en| +|Size:|319.6 MB| + +## References + +IIT-CDIP, RVL-CDIP \ No newline at end of file diff --git a/docs/_posts/aymanechilah/2023-07-11-tabform_v1_en_3_2.md b/docs/_posts/aymanechilah/2023-07-11-tabform_v1_en_3_2.md new file mode 100644 index 0000000000..e4f0c07f7c --- /dev/null +++ b/docs/_posts/aymanechilah/2023-07-11-tabform_v1_en_3_2.md @@ -0,0 +1,115 @@ +--- +layout: model +title: Table and Form Detection +author: John Snow Labs +name: tabform_v1 +date: 2023-07-11 +tags: [en, licensed] +task: Table and Form Detection +language: en +nav_key: models +edition: Visual NLP 4.3.0 +spark_version: 3.2.1 +supported: true +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + + + +## Predicted Entities + +{:.btn-box} +[Live Demo](https://demo.johnsnowlabs.com/ocr/IMAGE_TABLE_FORM_DETECTION/){:.button.button-orange.button-orange-trans.co.button-icon} +[Open in Colab](https://github.com/JohnSnowLabs/spark-ocr-workshop/blob/master/jupyter/Cards/SparkOcrImageTableAndFormDetection.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/tabform_v1_en_4.2.5_3.2_1677478327651.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden} + +https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/tabform_v1_en_4.2.5_3.2_1677478327651.zip +## How to use + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} + +```python +binary_to_image = BinaryToImage() \ + .setImageType(ImageType.TYPE_3BYTE_BGR) + +region_detector = ImageDocumentRegionDetector.pretrained("tabform_v1", "en", "clinical/ocr") \ + .setInputCol("image") \ + .setOutputCol("regions") \ + .setScoreThreshold(0.25) + +draw_regions = ImageDrawRegions() \ + .setInputCol("image") \ + .setInputRegionsCol("regions") \ + .setOutputCol("image_with_regions") \ + .setRectColor(Color.red) + +pipeline = PipelineModel(stages=[ + binary_to_image, + region_detector, + draw_regions +]) + +imagePath = "data/tabform_images/irs_sp_1.jpg" +image_df = spark.read.format("binaryFile").load(imagePath) + +result = pipeline.transform(image_df) +``` +```scala +val binary_to_image = new BinaryToImage() + .setImageType(ImageType.TYPE_3BYTE_BGR) + +val region_detector = new ImageDocumentRegionDetector.pretrained("tabform_v1", "en", "clinical/ocr") + .setInputCol("image") + .setOutputCol("regions") + .setScoreThreshold(0.25) + +val draw_regions = new ImageDrawRegions() + .setInputCol("image") + .setInputRegionsCol("regions") + .setOutputCol("image_with_regions") + .setRectColor(Color.red) + +val pipeline = new PipelineModel().setStages(Array( + binary_to_image, + region_detector, + draw_regions)) + +val imagePath = "data/tabform_images/irs_sp_1.jpg" +val image_df = spark.read.format("binaryFile").load(imagePath) + +val result = pipeline.transform(image_df) +``` +
+ +## Example + +{%- capture input_image -%} +![Screenshot](/assets/images/examples_ocr/tabform_input.png) +{%- endcapture -%} + +{%- capture output_image -%} +![Screenshot](/assets/images/examples_ocr/tabform_output.png) +{%- endcapture -%} + + +{% include templates/input_output_image.md +input_image=input_image +output_image=output_image +%} + +## Model Information + +{:.table-form-model} +|---|---| +|Model Name:|tabform_v1| +|Type:|ocr| +|Compatibility:|Visual NLP 4.3.0+| +|License:|Licensed| +|Edition:|Official| +|Language:|en| + diff --git a/docs/assets/images/examples_ocr/tabform_input.png b/docs/assets/images/examples_ocr/tabform_input.png new file mode 100644 index 0000000000..bc86dc1186 Binary files /dev/null and b/docs/assets/images/examples_ocr/tabform_input.png differ diff --git a/docs/assets/images/examples_ocr/tabform_output.png b/docs/assets/images/examples_ocr/tabform_output.png new file mode 100644 index 0000000000..f515cf5aac Binary files /dev/null and b/docs/assets/images/examples_ocr/tabform_output.png differ