From bd3f2ee722d692d5e3338ed59502801fa3081ed1 Mon Sep 17 00:00:00 2001 From: Guoxia Wang Date: Fri, 10 Sep 2021 20:27:15 +0800 Subject: [PATCH] init plsc v2.2.0 (#88) * init plsc v2.2.0 --- .gitignore | 106 +- .pre-commit-config.yaml | 8 +- LICENSE | 201 --- README.md | 222 +++- codestyle/.gitignore | 1 + codestyle/clang_format.hook | 15 + {tools/codestyle => codestyle}/copyright.hook | 0 codestyle/cpplint_pre_commit.hook | 27 + codestyle/docstring_checker.py | 349 +++++ .../pylint_pre_commit.hook | 0 codestyle/test_docstring_checker.py | 232 ++++ {plsc/utils => configs}/__init__.py | 2 +- configs/argparser.py | 276 ++++ configs/config.py | 64 + configs/ms1mv3_r100.py | 54 + configs/ms1mv3_r50.py | 54 + datasets/__init__.py | 15 + datasets/common_dataset.py | 134 ++ datasets/kv_helper.py | 68 + demo/custom_reader.py | 55 - docs/Makefile | 19 - docs/markdown2rst.py | 720 ---------- docs/source/conf.py | 173 --- docs/source/index.rst | 39 - docs/source/md/advanced.md | 319 ----- docs/source/md/api_reference.md | 120 -- docs/source/md/quick_start.md | 167 --- docs/source/md/serving.md | 72 - {plsc => dynamic/backbones}/__init__.py | 8 +- dynamic/backbones/iresnet.py | 318 +++++ dynamic/backbones/mobilefacenet.py | 162 +++ dynamic/classifiers/__init__.py | 15 + dynamic/classifiers/lsc.py | 147 +++ dynamic/export.py | 56 + dynamic/train.py | 223 ++++ .../utils}/__init__.py | 4 +- dynamic/utils/amp.py | 93 ++ dynamic/utils/data_parallel.py | 49 + dynamic/utils/io.py | 224 ++++ dynamic/utils/verification.py | 133 ++ dynamic/validation.py | 40 + images/fc_computing.gif | Bin 417 -> 0 bytes images/fc_computing_block.gif | Bin 986 -> 0 bytes images/fc_computing_block_global.gif | Bin 1162 -> 0 bytes images/plsc_overview.png | Bin 142261 -> 0 bytes images/softmax_computing.gif | Bin 505 -> 0 bytes install.md | 109 ++ plsc/config.py | 46 - plsc/entry.py | 1172 ----------------- plsc/models/base_model.py | 152 --- plsc/models/dist_algo.py | 587 --------- plsc/models/resnet.py | 199 --- plsc/utils/base64_reader.py | 298 ----- plsc/utils/fp16_lists.py | 138 -- plsc/utils/fp16_utils.py | 450 ------- plsc/utils/input_field.py | 85 -- plsc/utils/jpeg_reader.py | 373 ------ plsc/utils/verification.py | 175 --- requirement.txt | 17 + requirements.txt | 11 - scripts/export_dynamic.sh | 21 + .../__init__.py => scripts/export_static.sh | 20 +- scripts/inference.sh | 24 + train.sh => scripts/kill_train_process.sh | 2 +- scripts/perf_dynamic.sh | 40 + scripts/perf_runner.sh | 60 + scripts/perf_static.sh | 40 + scripts/train_dynamic.sh | 41 + scripts/train_static.sh | 41 + scripts/validation_dynamic.sh | 22 + scripts/validation_static.sh | 22 + serving/client/face_service/data/00000000.jpg | Bin 3738 -> 0 bytes serving/client/face_service/face_service.py | 92 -- serving/server/plsc_serving/run/__init__.py | 195 --- .../server/plsc_serving/server/conf/cube.conf | 15 - .../plsc_serving/server/conf/gflags.conf | 2 - .../server/conf/model_toolkit.prototxt | 11 - .../server/conf/model_toolkit.prototxt.0 | 11 - .../server/conf/model_toolkit.prototxt.1 | 11 - .../server/conf/model_toolkit.prototxt.10 | 11 - .../server/conf/model_toolkit.prototxt.11 | 11 - .../server/conf/model_toolkit.prototxt.12 | 11 - .../server/conf/model_toolkit.prototxt.13 | 11 - .../server/conf/model_toolkit.prototxt.14 | 11 - .../server/conf/model_toolkit.prototxt.15 | 11 - .../server/conf/model_toolkit.prototxt.2 | 11 - .../server/conf/model_toolkit.prototxt.3 | 11 - .../server/conf/model_toolkit.prototxt.4 | 11 - .../server/conf/model_toolkit.prototxt.5 | 11 - .../server/conf/model_toolkit.prototxt.6 | 11 - .../server/conf/model_toolkit.prototxt.7 | 11 - .../server/conf/model_toolkit.prototxt.8 | 11 - .../server/conf/model_toolkit.prototxt.9 | 11 - .../server/conf/resource.prototxt | 3 - .../server/conf/resource.prototxt.0 | 3 - .../server/conf/resource.prototxt.1 | 3 - .../server/conf/resource.prototxt.10 | 3 - .../server/conf/resource.prototxt.11 | 3 - .../server/conf/resource.prototxt.12 | 3 - .../server/conf/resource.prototxt.13 | 3 - .../server/conf/resource.prototxt.14 | 3 - .../server/conf/resource.prototxt.15 | 3 - .../server/conf/resource.prototxt.2 | 3 - .../server/conf/resource.prototxt.3 | 3 - .../server/conf/resource.prototxt.4 | 3 - .../server/conf/resource.prototxt.5 | 3 - .../server/conf/resource.prototxt.6 | 3 - .../server/conf/resource.prototxt.7 | 3 - .../server/conf/resource.prototxt.8 | 3 - .../server/conf/resource.prototxt.9 | 3 - .../plsc_serving/server/conf/service.prototxt | 4 - .../server/conf/workflow.prototxt | 8 - .../data/model/paddle/fluid_reload_flag | 2 - .../server/data/model/paddle/fluid_time_file | 2 - serving/server/setup.py | 45 - setup.py | 71 - .../backbones/__init__.py | 6 +- static/backbones/iresnet.py | 249 ++++ static/classifiers/__init__.py | 15 + static/classifiers/lsc.py | 128 ++ static/export.py | 94 ++ static/static_model.py | 157 +++ static/train.py | 217 +++ static/utils/__init__.py | 13 + static/utils/io.py | 195 +++ static/utils/optimization_pass.py | 119 ++ static/utils/verification.py | 130 ++ static/validation.py | 58 + tools/__init__.py | 13 + tools/benchmark_speed.py | 115 ++ tools/convert_image_bin.py | 92 ++ tools/export.py | 72 + tools/extract_perf_logs.py | 153 +++ tools/inference.py | 107 ++ tools/mx_recordio_2_images.py | 82 ++ tools/process_base64_files.py | 198 --- tools/test_recognition.py | 723 ++++++++++ train.py => tools/train.py | 41 +- tools/validation.py | 84 ++ utils/__init__.py | 13 + utils/logging.py | 101 ++ utils/losses.py | 40 + .../rearrange_weight.py | 80 +- utils/verification.py | 182 +++ 144 files changed, 6672 insertions(+), 6608 deletions(-) delete mode 100644 LICENSE create mode 100644 codestyle/.gitignore create mode 100755 codestyle/clang_format.hook rename {tools/codestyle => codestyle}/copyright.hook (100%) create mode 100755 codestyle/cpplint_pre_commit.hook create mode 100644 codestyle/docstring_checker.py rename {tools/codestyle => codestyle}/pylint_pre_commit.hook (100%) create mode 100644 codestyle/test_docstring_checker.py rename {plsc/utils => configs}/__init__.py (89%) create mode 100644 configs/argparser.py create mode 100644 configs/config.py create mode 100644 configs/ms1mv3_r100.py create mode 100644 configs/ms1mv3_r50.py create mode 100644 datasets/__init__.py create mode 100644 datasets/common_dataset.py create mode 100644 datasets/kv_helper.py delete mode 100644 demo/custom_reader.py delete mode 100644 docs/Makefile delete mode 100644 docs/markdown2rst.py delete mode 100644 docs/source/conf.py delete mode 100644 docs/source/index.rst delete mode 100644 docs/source/md/advanced.md delete mode 100644 docs/source/md/api_reference.md delete mode 100644 docs/source/md/quick_start.md delete mode 100644 docs/source/md/serving.md rename {plsc => dynamic/backbones}/__init__.py (77%) create mode 100644 dynamic/backbones/iresnet.py create mode 100644 dynamic/backbones/mobilefacenet.py create mode 100644 dynamic/classifiers/__init__.py create mode 100644 dynamic/classifiers/lsc.py create mode 100644 dynamic/export.py create mode 100644 dynamic/train.py rename {serving/server/plsc_serving => dynamic/utils}/__init__.py (86%) create mode 100644 dynamic/utils/amp.py create mode 100644 dynamic/utils/data_parallel.py create mode 100644 dynamic/utils/io.py create mode 100644 dynamic/utils/verification.py create mode 100644 dynamic/validation.py delete mode 100644 images/fc_computing.gif delete mode 100644 images/fc_computing_block.gif delete mode 100644 images/fc_computing_block_global.gif delete mode 100644 images/plsc_overview.png delete mode 100644 images/softmax_computing.gif create mode 100644 install.md delete mode 100644 plsc/config.py delete mode 100644 plsc/entry.py delete mode 100644 plsc/models/base_model.py delete mode 100644 plsc/models/dist_algo.py delete mode 100644 plsc/models/resnet.py delete mode 100644 plsc/utils/base64_reader.py delete mode 100644 plsc/utils/fp16_lists.py delete mode 100644 plsc/utils/fp16_utils.py delete mode 100644 plsc/utils/input_field.py delete mode 100644 plsc/utils/jpeg_reader.py delete mode 100644 plsc/utils/verification.py create mode 100644 requirement.txt delete mode 100644 requirements.txt create mode 100644 scripts/export_dynamic.sh rename plsc/models/__init__.py => scripts/export_static.sh (60%) create mode 100644 scripts/inference.sh rename train.sh => scripts/kill_train_process.sh (89%) mode change 100755 => 100644 create mode 100644 scripts/perf_dynamic.sh create mode 100644 scripts/perf_runner.sh create mode 100644 scripts/perf_static.sh create mode 100644 scripts/train_dynamic.sh create mode 100644 scripts/train_static.sh create mode 100644 scripts/validation_dynamic.sh create mode 100644 scripts/validation_static.sh delete mode 100644 serving/client/face_service/data/00000000.jpg delete mode 100644 serving/client/face_service/face_service.py delete mode 100644 serving/server/plsc_serving/run/__init__.py delete mode 100644 serving/server/plsc_serving/server/conf/cube.conf delete mode 100644 serving/server/plsc_serving/server/conf/gflags.conf delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.0 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.1 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.10 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.11 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.12 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.13 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.14 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.15 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.2 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.3 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.4 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.5 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.6 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.7 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.8 delete mode 100644 serving/server/plsc_serving/server/conf/model_toolkit.prototxt.9 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.0 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.1 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.10 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.11 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.12 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.13 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.14 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.15 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.2 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.3 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.4 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.5 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.6 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.7 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.8 delete mode 100644 serving/server/plsc_serving/server/conf/resource.prototxt.9 delete mode 100644 serving/server/plsc_serving/server/conf/service.prototxt delete mode 100644 serving/server/plsc_serving/server/conf/workflow.prototxt delete mode 100644 serving/server/plsc_serving/server/data/model/paddle/fluid_reload_flag delete mode 100644 serving/server/plsc_serving/server/data/model/paddle/fluid_time_file delete mode 100644 serving/server/setup.py delete mode 100644 setup.py rename plsc/version.py => static/backbones/__init__.py (82%) create mode 100644 static/backbones/iresnet.py create mode 100644 static/classifiers/__init__.py create mode 100644 static/classifiers/lsc.py create mode 100644 static/export.py create mode 100644 static/static_model.py create mode 100644 static/train.py create mode 100644 static/utils/__init__.py create mode 100644 static/utils/io.py create mode 100644 static/utils/optimization_pass.py create mode 100644 static/utils/verification.py create mode 100644 static/validation.py create mode 100644 tools/__init__.py create mode 100644 tools/benchmark_speed.py create mode 100644 tools/convert_image_bin.py create mode 100644 tools/export.py create mode 100644 tools/extract_perf_logs.py create mode 100644 tools/inference.py create mode 100644 tools/mx_recordio_2_images.py delete mode 100644 tools/process_base64_files.py create mode 100644 tools/test_recognition.py rename train.py => tools/train.py (52%) create mode 100644 tools/validation.py create mode 100644 utils/__init__.py create mode 100644 utils/logging.py create mode 100644 utils/losses.py rename plsc/utils/parameter_converter.py => utils/rearrange_weight.py (70%) create mode 100644 utils/verification.py diff --git a/.gitignore b/.gitignore index 206b857..4f61b8c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,103 @@ -*.pyc -.idea -*.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +.DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fc7e706..d9fb2b9 100755 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: - id: clang-format-with-version-check name: clang-format description: Format files with ClangFormat. - entry: bash ./tools/codestyle/clang_format.hook -i + entry: bash ./codestyle/clang_format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ - repo: local @@ -31,7 +31,7 @@ repos: - id: cpplint-cpp-source name: cpplint description: Check C++ code style using cpplint.py. - entry: bash ./tools/codestyle/cpplint_pre_commit.hook + entry: bash ./codestyle/cpplint_pre_commit.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$ - repo: local @@ -39,14 +39,14 @@ repos: - id: pylint-doc-string name: pylint description: Check python docstring style using docstring_checker. - entry: bash ./tools/codestyle/pylint_pre_commit.hook + entry: bash ./codestyle/pylint_pre_commit.hook language: system files: \.(py)$ - repo: local hooks: - id: copyright_checker name: copyright_checker - entry: python ./tools/codestyle/copyright.hook + entry: python ./codestyle/copyright.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py|sh)$ exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$ diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 261eeb9..0000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/README.md b/README.md index af4ca51..778e1e8 100644 --- a/README.md +++ b/README.md @@ -1,55 +1,203 @@ -# PLSC: 飞桨大规模分类库 +# Arcface-Paddle -## 简介 -深度学习中用于解决多分类问题的深度神经网络的最后一层通常是全连接层和Softmax的组合层,并采用交叉熵(Cross-Entropy)算法计算神经网络的损失函数。由于全连接层的参数量随着分类类别数的增长线性增长,当分类类别数相当大时,神经网络的训练会面临下面两个主要挑战: +## 1. Introduction -1. 参数量过大,超出单个GPU卡的显存容量:假设分类网络最后一层隐层的输出维度为512,那么当分类类别数为一百万时,最后一层全连接层参数的大小约为2GB(假设以32比特浮点数表示参数)。当分类问题的类别数为一亿时(例如,对自然界中的生物进行分类),则最后一层全连接层参数的大小接近200GB,远远超过当前GPU的显存容量。 +`Arcface-Paddle` is an open source deep face detection and recognition toolkit, powered by PaddlePaddle. `Arcface-Paddle` provides three related pretrained models now, include `BlazeFace` for face detection, `ArcFace` and `MobileFace` for face recognition. -2. 参数量较大,同步训练方式下通信开销较大:数据并行训练方式下,所有GPU卡之间需要同步参数的梯度信息,以完成参数值的同步更新。当参数数量较大时,参数的梯度信息数据量同样较大,从而导致参数梯度信息的通信开销较大,影响训练速度。 +- This tutorial is mainly about face recognition. +- For face detection task, please refer to: [Face detection tuturial](../../detection/blazeface_paddle/README_en.md). -考虑到全接连层的线性可分性,可以将全连接层参数切分到多张GPU卡,减少每张GPU卡的参数存储量。 +## 2. Environment preparation -以下图为例,全连接层参数按行切分到不同的GPU卡上。每次训练迭代过程中,各张GPU卡分别以各自的训练数据计算隐层的输出特征,并通过集合通信操作AllGather得到汇聚后的特征。接着,各张GPU卡以汇聚后的特征和部分全连接层参数计算部分logit值(partial logit),并基于此计算神经网络的损失值。 +Please refer to [Installation](./install_en.md) to setup environment at first. -![overview](./images/plsc_overview.png) -飞桨大规模分类(PLSC: **P**addlePaddle **L**arge **S**cale **C**lassification)库是基于[飞桨平台](https://github.com/PaddlePaddle/Paddle)构建的超大规模分类库,为用户提供从训练到部署的大规模分类问题全流程解决方案。 +## 3. Data preparation -## PLSC特性 -* 支持超大规模分类:单机8张V100 GPU配置下支持的最大类别数扩大2.52倍,支持的类别数随GPU卡数的增加而增加; -* 训练速度快:单机8张V100 GPU配置下,基于ResNet50模型的百万类别分类训练速度2,122.56 images/s, 并支持多机分布式训练和混合精度训练; -* 支持训练卡数的调整:加载模型参数的热启动训练可以使用和预训练不同的GPU卡数,并自动进行参数转换; -* base64格式图像数据预处理:提供base64格式图像数据的预处理,包括数据的全局shuffle,数据自动切分; -* 支持自定义模型:PLSC内建ResNet50、ResNet101和ResNet152模型,并支持用户自定义模型; -* 支持模型参数在HDFS文件系统的自动上传和下载; -* 全流程解决方案:提供从训练到部署的大规模分类问题全流程解决方案。 +### 3.1 Enter recognition dir. -## 快速开始 -请参考[快速开始](docs/source/md/quick_start.md)获取安装指南和快速使用示例。 +``` +cd /path/to/arcface_paddle/ +``` -## 预测部署 -请参考[预测部署指南](docs/source/md/serving.md)获取预测部署使用指南。 +### 3.2 Download -## 高阶功能 -请参考[进阶指南](docs/source/md/advanced.md)获取更多高阶功能的使用指南,如HDFS文件系统的自动上传和下载等。 +Download the dataset from [https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_). -## API参考 -请参考[API参考](docs/source/md/api_reference.md)获取API使用信息。 +### 3.3 Extract MXNet Dataset to images -## 预训练模型和性能 -### 预训练模型 +```shell +python tools/mx_recordio_2_images.py --root_dir ms1m-retinaface-t1/ --output_dir MS1M_v3/ +``` -我们提供了下面的预训练模型,以帮助用户对下游任务进行fine-tuning。 +After finishing unzipping the dataset, the folder structure is as follows. -| 模型 | 描述 | -| :--------------- | :------------- | -| [resnet50_distarcface_ms1m_arcface](https://plsc.bj.bcebos.com/pretrained_model/resnet50_distarcface_ms1mv2.tar.gz) | 该模型使用ResNet50网络训练,数据集为MS1M-ArcFace,训练阶段使用的loss_type为'dist_arcface',预训练模型在lfw验证集上的验证精度为0.99817。 | +``` +arcface_paddle/MS1M_v3 +|_ images +| |_ 00000001.jpg +| |_ ... +| |_ 05179510.jpg +|_ label.txt +|_ agedb_30.bin +|_ cfp_ff.bin +|_ cfp_fp.bin +|_ lfw.bin +``` -### 训练精度 +Label file format is as follows. -| 模型 | 训练集 | lfw | agendb\_30 | cfp\_ff | cfp\_fp | MegaFace (Id/Ver) | -| :--------- | :------------ | :------ | :------ | :------ | :------ | :-------------- | -| ResNet50 | MS1M-ArcFace | 0.99817 | 0.99827 | 0.99857 | 0.96314 | 0.98000/0.99300 | -| ResNet50 | CASIA | 0.98950 | 0.90950 | 0.99057 | 0.91500 | N/A | +``` +# delimiter: "\t" +# the following the content of label.txt +images/00000001.jpg 0 +... +``` -备注:上述模型训练使用的loss_type为'dist_arcface'。更多关于ArcFace的内容请参考[ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) +If you want to use customed dataset, you can arrange your data according to the above format. + +### 3.3 Transform between original image files and bin files + +If you want to convert original image files to `bin` files used directly for training process, you can use the following command to finish the conversion. + +```shell +python tools/convert_image_bin.py --image_path="your/input/image/path" --bin_path="your/output/bin/path" --mode="image2bin" +``` + +If you want to convert `bin` files to original image files, you can use the following command to finish the conversion. + +```shell +python tools/convert_image_bin.py --image_path="your/input/bin/path" --bin_path="your/output/image/path" --mode="bin2image" +``` + +## 4. How to Training + +### 4.1 Single node, 8 GPUs: + +#### Static Mode + +```bash +sh scripts/train_static.sh +``` + +#### Dynamic Mode + +```bash +sh scripts/train_dynamic.sh +``` + + +During training, you can view loss changes in real time through `VisualDL`, For more information, please refer to [VisualDL](https://github.com/PaddlePaddle/VisualDL/). + + +## 5. Model evaluation + +The model evaluation process can be started as follows. + +#### Static Mode + +```bash +sh scripts/validation_static.sh +``` + +#### Dynamic Mode + +```bash +sh scripts/validation_dynamic.sh +``` + +## 6. Export model +PaddlePaddle supports inference using prediction engines. Firstly, you should export inference model. + +#### Static Mode + +```bash +sh scripts/export_static.sh +``` + +#### Dynamic Mode + +```bash +sh scripts/export_dynamic.sh +``` + +We also support export to onnx model, you only need to set `--export_type onnx`. + +## 7. Model inference + +The model inference process supports paddle save inference model and onnx model. + +```bash +sh scripts/inference.sh +``` + +## 8. Model performance + +### 8.1 Performance on IJB-C and Verification Datasets + +**Configuration:** + * GPU: 8 NVIDIA Tesla V100 32G + * Precison: AMP + * BatchSize: 128/1024 + +| Mode | Datasets | backbone | Ratio | IJBC(1e-05) | IJBC(1e-04) | agedb30 | cfp_fp | lfw | log | +| ------- | :------: | :------- | ----- | :---------- | :---------- | :------ | :----- | :--- | :--- | +| Static | MS1MV3 | r50 | 0.1 | | | | | | | +| Static | MS1MV3 | r50 | 1.0 | | | | | | | +| Dynamic | MS1MV3 | r50 | 0.1 | | | | | | | +| Dynamic | MS1MV3 | r50 | 1.0 | | | | | | | + + +### 8.2 Maximum Number of Identities + +**Configuration:** + * GPU: 8 NVIDIA Tesla V100 32G + * Precison: AMP + * BatchSize: 64/512 + * SampleRatio: 0.1 + +| Mode | Res50 | Res100 | +| ------------------------- | ---------------------------- | ---------------------------- | +| Oneflow | | | +| PyTorch | | | +| Paddle (static) | | | +| Paddle (dynamic) | | | + + +## 9. Demo + +Combined with face detection model, we can complete the face recognition process. + +Firstly, use the following commands to download the index gallery, demo image and font file for visualization. + + +```bash +# Index library for the recognition process +wget https://raw.githubusercontent.com/littletomatodonkey/insight-face-paddle/main/demo/friends/index.bin +# Demo image +wget https://raw.githubusercontent.com/littletomatodonkey/insight-face-paddle/main/demo/friends/query/friends2.jpg +# Font file for visualization +wget https://raw.githubusercontent.com/littletomatodonkey/insight-face-paddle/main/SourceHanSansCN-Medium.otf +``` + +The demo image is shown as follows. + +
+ +
+ + +Use the following command to run the whole face recognition demo. + +```shell +# detection + recogniotion process +python tools/test_recognition.py --det --rec --index=index.bin --input=friends2.jpg --output="./output" +``` + +The final result is save in folder `output/`, which is shown as follows. + +
+ +
+ +For more details about parameter explanations, index gallery construction and whl package inference, please refer to [Whl package inference tutorial](https://github.com/littletomatodonkey/insight-face-paddle). diff --git a/codestyle/.gitignore b/codestyle/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/codestyle/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/codestyle/clang_format.hook b/codestyle/clang_format.hook new file mode 100755 index 0000000..1d92821 --- /dev/null +++ b/codestyle/clang_format.hook @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +readonly VERSION="3.8" + +version=$(clang-format -version) + +if ! [[ $version == *"$VERSION"* ]]; then + echo "clang-format version check failed." + echo "a version contains '$VERSION' is needed, but get '$version'" + echo "you can install the right version, and make an soft-link to '\$PATH' env" + exit -1 +fi + +clang-format $@ diff --git a/tools/codestyle/copyright.hook b/codestyle/copyright.hook similarity index 100% rename from tools/codestyle/copyright.hook rename to codestyle/copyright.hook diff --git a/codestyle/cpplint_pre_commit.hook b/codestyle/cpplint_pre_commit.hook new file mode 100755 index 0000000..c90bf29 --- /dev/null +++ b/codestyle/cpplint_pre_commit.hook @@ -0,0 +1,27 @@ +#!/bin/bash + +TOTAL_ERRORS=0 +if [[ ! $TRAVIS_BRANCH ]]; then + # install cpplint on local machine. + if [[ ! $(which cpplint) ]]; then + pip install cpplint + fi + # diff files on local machine. + files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}') +else + # diff files between PR and latest commit on Travis CI. + branch_ref=$(git rev-parse "$TRAVIS_BRANCH") + head_ref=$(git rev-parse HEAD) + files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}') +fi +# The trick to remove deleted files: https://stackoverflow.com/a/2413151 +for file in $files; do + if [[ $file =~ ^(patches/.*) ]]; then + continue; + else + cpplint --filter=-readability/fn_size,-build/include_what_you_use,-build/c++11 $file; + TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); + fi +done + +exit $TOTAL_ERRORS diff --git a/codestyle/docstring_checker.py b/codestyle/docstring_checker.py new file mode 100644 index 0000000..823d947 --- /dev/null +++ b/codestyle/docstring_checker.py @@ -0,0 +1,349 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DocstringChecker is used to check python doc string's style.""" + +import six +import astroid + +from pylint.checkers import BaseChecker, utils +from pylint.interfaces import IAstroidChecker + +from collections import defaultdict +import re + + +def register(linter): + """Register checkers.""" + linter.register_checker(DocstringChecker(linter)) + + +class Docstring(object): + """Docstring class holds the parsed doc string elements. + """ + + def __init__(self): + self.d = defaultdict(list) #name->[] + self.clear() + + def clear(self): + self.d['Args'] = [] + self.d['Examples'] = [] + self.d['Returns'] = [] + self.d['Raises'] = [] + self.args = {} #arg_name->arg_type + + def get_level(self, string, indent=' '): + level = 0 + unit_size = len(indent) + while string[:unit_size] == indent: + string = string[unit_size:] + level += 1 + + return level + + def parse(self, doc): + """parse gets sections from doc + Such as Args, Returns, Raises, Examples s + Args: + doc (string): is the astroid node doc string. + Returns: + True if doc is parsed successfully. + """ + self.clear() + + lines = doc.splitlines() + state = ("others", -1) + for l in lines: + c = l.strip() + if len(c) <= 0: + continue + + level = self.get_level(l) + if c.startswith("Args:"): + state = ("Args", level) + elif c.startswith("Returns:"): + state = ("Returns", level) + elif c.startswith("Raises:"): + state = ("Raises", level) + elif c.startswith("Examples:"): + state = ("Examples", level) + else: + if level > state[1]: + self.d[state[0]].append(c) + continue + + state = ("others", -1) + self.d[state[0]].append(c) + + self._arg_with_type() + return True + + def get_returns(self): + return self.d['Returns'] + + def get_raises(self): + return self.d['Raises'] + + def get_examples(self): + return self.d['Examples'] + + def _arg_with_type(self): + + for t in self.d['Args']: + m = re.search(r'([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t) + if m: + self.args[m.group(1)] = m.group(2) + + return self.args + + +class DocstringChecker(BaseChecker): + """DosstringChecker is pylint checker to + check docstring style. + """ + __implements__ = (IAstroidChecker, ) + + POSITIONAL_MESSAGE_ID = 'str-used-on-positional-format-argument' + KEYWORD_MESSAGE_ID = 'str-used-on-keyword-format-argument' + + name = 'doc-string-checker' + symbol = "doc-string" + priority = -1 + msgs = { + 'W9001': ('One line doc string on > 1 lines', symbol + "-one-line", + 'Used when a short doc string is on multiple lines'), + 'W9002': + ('Doc string does not end with "." period', symbol + "-end-with", + 'Used when a doc string does not end with a period'), + 'W9003': + ('All args with their types must be mentioned in doc string %s', + symbol + "-with-all-args", + 'Used when not all arguments are in the doc string '), + 'W9005': ('Missing docstring or docstring is too short', + symbol + "-missing", 'Add docstring longer >=10'), + 'W9006': ('Docstring indent error, use 4 space for indent', + symbol + "-indent-error", 'Use 4 space for indent'), + 'W9007': ('You should add `Returns` in comments', + symbol + "-with-returns", + 'There should be a `Returns` section in comments'), + 'W9008': ('You should add `Raises` section in comments', + symbol + "-with-raises", + 'There should be a `Raises` section in comments'), + } + options = () + + def visit_functiondef(self, node): + """visit_functiondef checks Function node docstring style. + Args: + node (astroid.node): The visiting node. + Returns: + True if successful other wise False. + """ + + self.check_doc_string(node) + + if node.tolineno - node.fromlineno <= 10: + return True + + if not node.doc: + return True + + doc = Docstring() + doc.parse(node.doc) + + self.all_args_in_doc(node, doc) + self.with_returns(node, doc) + self.with_raises(node, doc) + + def visit_module(self, node): + self.check_doc_string(node) + + def visit_classdef(self, node): + self.check_doc_string(node) + + def check_doc_string(self, node): + self.missing_doc_string(node) + self.one_line(node) + self.has_period(node) + self.indent_style(node) + + def missing_doc_string(self, node): + if node.name.startswith("__") or node.name.startswith("_"): + return True + if node.tolineno - node.fromlineno <= 10: + return True + + if node.doc is None or len(node.doc) < 10: + self.add_message('W9005', node=node, line=node.fromlineno) + return False + + # FIXME(gongwb): give the docstring line-no + def indent_style(self, node, indent=4): + """indent_style checks docstring's indent style + Args: + node (astroid.node): The visiting node. + indent (int): The default indent of style + Returns: + True if successful other wise False. + """ + if node.doc is None: + return True + + doc = node.doc + lines = doc.splitlines() + line_num = 0 + + for l in lines: + if line_num == 0: + continue + cur_indent = len(l) - len(l.lstrip()) + if cur_indent % indent != 0: + self.add_message('W9006', node=node, line=node.fromlineno) + return False + line_num += 1 + + return True + + def one_line(self, node): + """one_line checks if docstring (len < 40) is on one line. + Args: + node (astroid.node): The node visiting. + Returns: + True if successful otherwise False. + """ + + doc = node.doc + if doc is None: + return True + + if len(doc) > 40: + return True + elif sum(doc.find(nl) for nl in ('\n', '\r', '\n\r')) == -3: + return True + else: + self.add_message('W9001', node=node, line=node.fromlineno) + return False + + return True + + def has_period(self, node): + """has_period checks if one line doc end-with '.' . + Args: + node (astroid.node): the node is visiting. + Returns: + True if successful otherwise False. + """ + if node.doc is None: + return True + + if len(node.doc.splitlines()) > 1: + return True + + if not node.doc.strip().endswith('.'): + self.add_message('W9002', node=node, line=node.fromlineno) + return False + + return True + + def with_raises(self, node, doc): + """with_raises checks if one line doc end-with '.' . + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object. + Returns: + True if successful otherwise False. + """ + + find = False + for t in node.body: + if not isinstance(t, astroid.Raise): + continue + + find = True + break + + if not find: + return True + + if len(doc.get_raises()) == 0: + self.add_message('W9008', node=node, line=node.fromlineno) + return False + + return True + + def with_returns(self, node, doc): + """with_returns checks if docstring comments what are returned . + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object. + Returns: + True if successful otherwise False. + """ + + if node.name.startswith("__") or node.name.startswith("_"): + return True + find = False + for t in node.body: + if not isinstance(t, astroid.Return): + continue + + find = True + break + + if not find: + return True + + if len(doc.get_returns()) == 0: + self.add_message('W9007', node=node, line=node.fromlineno) + return False + + return True + + def all_args_in_doc(self, node, doc): + """all_args_in_doc checks if arguments are mentioned in doc + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object + Returns: + True if successful otherwise False. + """ + if node.name.startswith("__") or node.name.startswith("_"): + return True + args = [] + for arg in node.args.get_children(): + if (not isinstance(arg, astroid.AssignName)) \ + or arg.name == "self": + continue + args.append(arg.name) + + if len(args) <= 0: + return True + + parsed_args = doc.args + args_not_documented = set(args) - set(parsed_args) + if len(args) > 0 and len(parsed_args) <= 0: + self.add_message( + 'W9003', + node=node, + line=node.fromlineno, + args=list(args_not_documented)) + return False + + for t in args: + if t not in parsed_args: + self.add_message( + 'W9003', node=node, line=node.fromlineno, args=[t, ]) + return False + + return True diff --git a/tools/codestyle/pylint_pre_commit.hook b/codestyle/pylint_pre_commit.hook similarity index 100% rename from tools/codestyle/pylint_pre_commit.hook rename to codestyle/pylint_pre_commit.hook diff --git a/codestyle/test_docstring_checker.py b/codestyle/test_docstring_checker.py new file mode 100644 index 0000000..b05983d --- /dev/null +++ b/codestyle/test_docstring_checker.py @@ -0,0 +1,232 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import docstring_checker +import pylint.testutils +import astroid +import pytest +import sys + + +class TestDocstring(pylint.testutils.CheckerTestCase): + CHECKER_CLASS = docstring_checker.DocstringChecker + + def test_one_line(self): + func_node = astroid.extract_node(''' + def test(): + """get + news. + """ + if True: + return 5 + return 5 + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9001' == got[0][0] + + def test_one_line_1(self): + func_node = astroid.extract_node(''' + def test(): + """get news""" + if True: + return 5 + return 5 + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9002' == got[0][0] + + def test_args(self): + func_node = astroid.extract_node(''' + def test(scale, mean): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9003' == got[0][0] + + def test_missing(self): + func_node = astroid.extract_node(''' + def test(): + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9005' == got[0][0] + + def test_indent(self): + func_node = astroid.extract_node(''' + def test(): + """ get get get get get get get get + get get get get get get get get. + """ + pass + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9006' == got[0][0] + + def test_with_resturns(self): + func_node = astroid.extract_node(''' + def test(): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + return mean + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9007' == got[0][0] + + def test_with_raises(self): + func_node = astroid.extract_node(''' + def test(): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + raise ValueError('A very specific bad thing happened.') + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9008' == got[0][0] + + def test_no_message(self): + p = ''' +def fc(input, + size, + num_flatten_dims=1, + param_attr=None, + bias_attr=None, + act=None, + name=None): + """ + **Fully Connected Layer** + The fully connected layer can take multiple tensors as its inputs. It + creates a variable called weights for each input tensor, which represents + a fully connected weight matrix from each input unit to each output unit. + The fully connected layer multiplies each input tensor with its coresponding + weight to produce an output Tensor. If multiple input tensors are given, + the results of multiple multiplications will be sumed up. If bias_attr is + not None, a bias variable will be created and added to the output. Finally, + if activation is not None, it will be applied to the output as well. + This process can be formulated as follows: + + Args: + input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of + the input tensor(s) is at least 2. + size(int): The number of output units in this layer. + num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than + two dimensions. If this happens, the multidimensional tensor will first be flattened + into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input + tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1) + dimensions will be flatten to form the first dimension of the final matrix (height of + the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to + form the second dimension of the final matrix (width of the matrix). For example, suppose + `X` is a 6-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3. + Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. + param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable + parameters/weights of this layer. + bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias + of this layer. If it is set to None, no bias will be added to the output units. + act (str, default None): Activation to be applied to the output of this layer. + name (str, default None): The name of this layer. + Returns: + A tensor variable storing the transformation result. + Raises: + ValueError: If rank of the input tensor is less than 2. + Examples: + .. code-block:: python + data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + fc = fluid.layers.fc(input=data, size=1000, act="tanh") + """ + raise ValueError('A very specific bad thing happened.') + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + return size + ''' + + func_node = astroid.extract_node(p) + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 0 diff --git a/plsc/utils/__init__.py b/configs/__init__.py similarity index 89% rename from plsc/utils/__init__.py rename to configs/__init__.py index d0c32e2..185a92b 100644 --- a/plsc/utils/__init__.py +++ b/configs/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/configs/argparser.py b/configs/argparser.py new file mode 100644 index 0000000..2704319 --- /dev/null +++ b/configs/argparser.py @@ -0,0 +1,276 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import logging +import argparse +import importlib + + +def print_args(args): + logging.info('--------args----------') + for k in list(vars(args).keys()): + logging.info('%s: %s' % (k, vars(args)[k])) + logging.info('------------------------\n') + + +def str2bool(v): + return str(v).lower() in ("true", "t", "1") + + +def tostrlist(v): + if isinstance(v, list): + return v + elif isinstance(v, str): + return [e.strip() for e in v.split(',')] + + +def tointlist(v): + if isinstance(v, list): + return v + elif isinstance(v, str): + return [int(e.strip()) for e in v.split(',')] + + +def get_config(config_file): + assert config_file.startswith( + 'configs/'), 'config file setting must start with configs/' + temp_config_name = os.path.basename(config_file) + temp_module_name = os.path.splitext(temp_config_name)[0] + config = importlib.import_module("configs.config") + cfg = config.config + config = importlib.import_module("configs.%s" % temp_module_name) + job_cfg = config.config + cfg.update(job_cfg) + if cfg.output is None: + cfg.output = osp.join('work_dirs', temp_module_name) + return cfg + + +class UserNamespace(object): + pass + + +def parse_args(): + + parser = argparse.ArgumentParser(description='Paddle Face Training') + user_namespace = UserNamespace() + parser.add_argument( + '--config_file', type=str, required=True, help='config file path') + parser.parse_known_args(namespace=user_namespace) + cfg = get_config(user_namespace.config_file) + + # Model setting + parser.add_argument( + '--is_static', + type=str2bool, + default=cfg.is_static, + help='whether to use static mode') + parser.add_argument( + '--backbone', type=str, default=cfg.backbone, help='backbone network') + parser.add_argument( + '--classifier', + type=str, + default=cfg.classifier, + help='classification network') + parser.add_argument( + '--embedding_size', + type=int, + default=cfg.embedding_size, + help='embedding size') + parser.add_argument( + '--model_parallel', + type=str2bool, + default=cfg.model_parallel, + help='whether to use model parallel') + parser.add_argument( + '--sample_ratio', + type=float, + default=cfg.sample_ratio, + help='sample rate, use partial fc sample if sample rate less than 1.0') + parser.add_argument( + '--loss', type=str, default=cfg.loss, help='loss function') + parser.add_argument( + '--dropout', + type=float, + default=cfg.dropout, + help='probability of dropout') + + # AMP setting + parser.add_argument( + '--fp16', + type=str2bool, + default=cfg.fp16, + help='whether to use fp16 training') + parser.add_argument( + '--init_loss_scaling', + type=float, + default=cfg.init_loss_scaling, + help='The initial loss scaling factor.') + parser.add_argument( + '--incr_every_n_steps', + type=int, + default=cfg.incr_every_n_steps, + help='Increases loss scaling every n consecutive steps with finite gradients.' + ) + parser.add_argument( + '--decr_every_n_nan_or_inf', + type=int, + default=cfg.decr_every_n_nan_or_inf, + help='Decreases loss scaling every n accumulated steps with nan or inf gradients.' + ) + parser.add_argument( + '--incr_ratio', + type=float, + default=cfg.incr_ratio, + help='The multiplier to use when increasing the loss scaling.') + parser.add_argument( + '--decr_ratio', + type=float, + default=cfg.decr_ratio, + help='The less-than-one-multiplier to use when decreasing the loss scaling.' + ) + parser.add_argument( + '--use_dynamic_loss_scaling', + type=str2bool, + default=cfg.use_dynamic_loss_scaling, + help='Whether to use dynamic loss scaling.') + parser.add_argument( + '--custom_white_list', + type=tostrlist, + default=cfg.custom_white_list, + help='fp16 custom white list.') + parser.add_argument( + '--custom_black_list', + type=tostrlist, + default=cfg.custom_black_list, + help='fp16 custom black list.') + + # Optimizer setting + parser.add_argument( + '--lr', type=float, default=cfg.lr, help='learning rate') + parser.add_argument( + '--lr_decay', + type=float, + default=cfg.lr_decay, + help='learning rate decay factor') + parser.add_argument( + '--weight_decay', + type=float, + default=cfg.weight_decay, + help='weight decay') + parser.add_argument( + '--momentum', type=float, default=cfg.momentum, help='sgd momentum') + parser.add_argument( + '--train_unit', + type=str, + default=cfg.train_unit, + help='train unit, "step" or "epoch"') + parser.add_argument( + '--warmup_num', + type=int, + default=cfg.warmup_num, + help='warmup num according train unit') + parser.add_argument( + '--train_num', + type=int, + default=cfg.train_num, + help='train num according train unit') + parser.add_argument( + '--decay_boundaries', + type=tointlist, + default=cfg.decay_boundaries, + help='piecewise decay boundaries') + + # Train dataset setting + parser.add_argument( + '--use_synthetic_dataset', + type=str2bool, + default=cfg.use_synthetic_dataset, + help='whether to use synthetic dataset') + parser.add_argument( + '--dataset', type=str, default=cfg.dataset, help='train dataset name') + parser.add_argument( + '--data_dir', + type=str, + default=cfg.data_dir, + help='train dataset directory') + parser.add_argument( + '--label_file', + type=str, + default=cfg.label_file, + help='train label file name, each line split by "\t"') + parser.add_argument( + '--is_bin', + type=str2bool, + default=cfg.is_bin, + help='whether the train data is bin or original image file') + parser.add_argument( + '--num_classes', + type=int, + default=cfg.num_classes, + help='classes of train dataset') + parser.add_argument( + '--batch_size', + type=int, + default=cfg.batch_size, + help='batch size of each rank') + parser.add_argument( + '--num_workers', + type=int, + default=cfg.num_workers, + help='the number workers of DataLoader') + + # Validation dataset setting + parser.add_argument( + '--do_validation_while_train', + type=str2bool, + default=cfg.do_validation_while_train, + help='do validation while train') + parser.add_argument( + '--validation_interval_step', + type=int, + default=cfg.validation_interval_step, + help='validation interval step') + parser.add_argument( + '--val_targets', + type=tostrlist, + default=cfg.val_targets, + help='val targets, list or str split by comma') + + # IO setting + parser.add_argument( + '--logdir', type=str, default=cfg.logdir, help='log dir') + parser.add_argument( + '--log_interval_step', + type=int, + default=cfg.log_interval_step, + help='log interval step') + parser.add_argument( + '--output', type=str, default=cfg.output, help='output dir') + parser.add_argument( + '--resume', type=str2bool, default=cfg.resume, help='model resuming') + parser.add_argument( + '--checkpoint_dir', + type=str, + default=cfg.checkpoint_dir, + help='checkpoint direcotry') + parser.add_argument( + '--max_num_last_checkpoint', + type=int, + default=cfg.max_num_last_checkpoint, + help='the maximum number of lastest checkpoint to keep') + + args = parser.parse_args(namespace=user_namespace) + return args diff --git a/configs/config.py b/configs/config.py new file mode 100644 index 0000000..1c96e71 --- /dev/null +++ b/configs/config.py @@ -0,0 +1,64 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from easydict import EasyDict as edict + +config = edict() +config.is_static = True +config.backbone = 'FresResNet100' +config.classifier = 'LargeScaleClassifier' +config.embedding_size = 512 +config.model_parallel = True +config.sample_ratio = 0.1 +config.loss = 'ArcFace' +config.dropout = 0.0 + +config.fp16 = True +config.init_loss_scaling = 1.0 +config.incr_every_n_steps = 2000 +config.decr_every_n_nan_or_inf = 1 +config.incr_ratio = 2.0 +config.decr_ratio = 0.5 +config.use_dynamic_loss_scaling = True +config.custom_white_list = [] +config.custom_black_list = ['margin_cross_entropy'] + +config.lr = 0.1 # for global batch size = 512 +config.lr_decay = 0.1 +config.weight_decay = 5e-4 +config.momentum = 0.9 +config.train_unit = 'step' # 'step' or 'epoch' +config.warmup_num = 1000 +config.train_num = 180000 +config.decay_boundaries = [100000, 140000, 160000] + +config.use_synthetic_dataset = False +config.dataset = "MS1M_v3" +config.data_dir = "./MS1M_v3" +config.label_file = "./MS1M_v3/label.txt" +config.is_bin = False +config.num_classes = 93431 # 85742 for MS1M_v2, 93431 for MS1M_v3 +config.batch_size = 64 # global batch size 512 of 8 GPU +config.num_workers = 8 + +config.do_validation_while_train = True +config.validation_interval_step = 2000 +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] + +config.logdir = './log' +config.log_interval_step = 10 +config.output = './MS1M_v3_arcface' +config.resume = False +config.checkpoint_dir = None +config.max_num_last_checkpoint = 3 diff --git a/configs/ms1mv3_r100.py b/configs/ms1mv3_r100.py new file mode 100644 index 0000000..75d200d --- /dev/null +++ b/configs/ms1mv3_r100.py @@ -0,0 +1,54 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from easydict import EasyDict as edict + +config = edict() +config.is_static = True +config.backbone = 'FresResNet100' +config.classifier = 'LargeScaleClassifier' +config.embedding_size = 512 +config.model_parallel = True +config.sample_ratio = 0.1 +config.loss = 'ArcFace' +config.dropout = 0.0 + +config.lr = 0.1 # for global batch size = 512 +config.lr_decay = 0.1 +config.weight_decay = 5e-4 +config.momentum = 0.9 +config.train_unit = 'epoch' # 'step' or 'epoch' +config.warmup_num = 0 +config.train_num = 25 +config.decay_boundaries = [10, 16, 22] + +config.use_synthetic_dataset = False +config.dataset = "MS1M_v3" +config.data_dir = "./MS1M_v3" +config.label_file = "./MS1M_v3/label.txt" +config.is_bin = False +config.num_classes = 93431 # 85742 for MS1M_v2, 93431 for MS1M_v3 +config.batch_size = 128 # global batch size 512 of 8 GPU +config.num_workers = 8 + +config.do_validation_while_train = True +config.validation_interval_step = 2000 +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] + +config.logdir = './log' +config.log_interval_step = 100 +config.output = './MS1M_v3_arcface' +config.resume = False +config.checkpoint_dir = None +config.max_num_last_checkpoint = 1 diff --git a/configs/ms1mv3_r50.py b/configs/ms1mv3_r50.py new file mode 100644 index 0000000..e5f556c --- /dev/null +++ b/configs/ms1mv3_r50.py @@ -0,0 +1,54 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from easydict import EasyDict as edict + +config = edict() +config.is_static = True +config.backbone = 'FresResNet50' +config.classifier = 'LargeScaleClassifier' +config.embedding_size = 512 +config.model_parallel = True +config.sample_ratio = 0.1 +config.loss = 'ArcFace' +config.dropout = 0.0 + +config.lr = 0.1 # for global batch size = 512 +config.lr_decay = 0.1 +config.weight_decay = 5e-4 +config.momentum = 0.9 +config.train_unit = 'epoch' # 'step' or 'epoch' +config.warmup_num = 0 +config.train_num = 25 +config.decay_boundaries = [10, 16, 22] + +config.use_synthetic_dataset = False +config.dataset = "MS1M_v3" +config.data_dir = "./MS1M_v3" +config.label_file = "./MS1M_v3/label.txt" +config.is_bin = False +config.num_classes = 93431 # 85742 for MS1M_v2, 93431 for MS1M_v3 +config.batch_size = 128 # global batch size 512 of 8 GPU +config.num_workers = 8 + +config.do_validation_while_train = True +config.validation_interval_step = 2000 +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] + +config.logdir = './log' +config.log_interval_step = 100 +config.output = './MS1M_v3_arcface' +config.resume = False +config.checkpoint_dir = None +config.max_num_last_checkpoint = 1 diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000..c97f8e4 --- /dev/null +++ b/datasets/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .common_dataset import CommonDataset, SyntheticDataset, load_bin diff --git a/datasets/common_dataset.py b/datasets/common_dataset.py new file mode 100644 index 0000000..e1d5a15 --- /dev/null +++ b/datasets/common_dataset.py @@ -0,0 +1,134 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pickle +import paddle +import os +import cv2 +import six +import random +import paddle +import numpy as np +import logging +from PIL import Image +from io import BytesIO + +from datasets.kv_helper import read_img_from_bin + + +def transform(img): + # random horizontal flip + if random.randint(0, 1) == 0: + img = cv2.flip(img, 1) + # normalize to mean 0.5, std 0.5 + img = (img - 127.5) * 0.00784313725 + # BGR2RGB + img = img[:, :, ::-1] + img = img.transpose((2, 0, 1)) + return img + + +class CommonDataset(paddle.io.Dataset): + def __init__(self, root_dir, label_file, fp16=False, is_bin=True): + super(CommonDataset, self).__init__() + self.root_dir = root_dir + self.label_file = label_file + self.fp16 = fp16 + with open(label_file, "r") as fin: + self.full_lines = fin.readlines() + + self.delimiter = "\t" + self.is_bin = is_bin + + self.num_samples = len(self.full_lines) + logging.info("read label file finished, total num: {}" + .format(self.num_samples)) + + def __getitem__(self, idx): + + line = self.full_lines[idx] + + img_path, label = line.strip().split(self.delimiter) + img_path = os.path.join(self.root_dir, img_path) + if self.is_bin: + img = read_img_from_bin(img_path) + else: + img = cv2.imread(img_path) + + img = transform(img) + + img = img.astype('float16' if self.fp16 else 'float32') + label = np.int32(label) + + return img, label + + def __len__(self): + return self.num_samples + + +class SyntheticDataset(paddle.io.Dataset): + def __init__(self, num_classes, fp16=False): + super(SyntheticDataset, self).__init__() + self.num_classes = num_classes + self.fp16 = fp16 + self.label_list = np.random.randint( + 0, num_classes, (5179510, ), dtype=np.int32) + self.num_samples = len(self.label_list) + + def __getitem__(self, idx): + label = self.label_list[idx] + img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.uint8) + img = transform(img) + + img = img.astype('float16' if self.fp16 else 'float32') + label = np.int32(label) + + return img, label + + def __len__(self): + return self.num_samples + + +# 返回为 numpy +def load_bin(path, image_size): + if six.PY2: + bins, issame_list = pickle.load(open(path, 'rb')) + else: + bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes') + data_list = [] + for flip in [0, 1]: + data = np.empty( + (len(issame_list) * 2, 3, image_size[0], image_size[1])) + data_list.append(data) + for i in range(len(issame_list) * 2): + _bin = bins[i] + if six.PY2: + if not isinstance(_bin, six.string_types): + _bin = _bin.tostring() + img_ori = Image.open(StringIO(_bin)) + else: + img_ori = Image.open(BytesIO(_bin)) + for flip in [0, 1]: + img = img_ori.copy() + if flip == 1: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) + img = (img - 127.5) * 0.00784313725 + data_list[flip][i][:] = img + if i % 1000 == 0: + print('loading bin', i) + print(data_list[0].shape) + return data_list, issame_list diff --git a/datasets/kv_helper.py b/datasets/kv_helper.py new file mode 100644 index 0000000..43bff7c --- /dev/null +++ b/datasets/kv_helper.py @@ -0,0 +1,68 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pickle +import struct +import random +import multiprocessing +import numpy as np +import cv2 +import json + + +def readkv(f): + """readkv""" + keylendata = f.read(4) + if len(keylendata) != 4: + return None + keylen = struct.unpack('I', keylendata)[0] + if keylen > 5000: + raise Exception('wrong key len' + str(keylen)) + key = f.read(keylen) + valuelen = struct.unpack('I', f.read(4))[0] + value = f.read(valuelen) + return key, value + + +def writekv(f, k, v, flush=True): + """writekv""" + f.write(struct.pack('I', len(k))) + f.write(k) + f.write(struct.pack('I', len(v))) + f.write(v) + if flush: + f.flush() + return + + +def trans_img_to_bin(img_name, output_path): + with open(img_name, "rb") as fin: + img = fin.read() + key = os.path.split(img_name)[-1] + with open(output_path, "wb") as fout: + writekv(fout, key.encode(), pickle.dumps(img, -1)) + return + + +def read_img_from_bin(input_path): + # the file can exist many key-vals, but it just save one in fact. + with open(input_path, "rb") as fin: + r = readkv(fin) + assert r is not None + _, value = r + value = pickle.loads(value) + value = np.frombuffer(value, dtype='uint8') + img = cv2.imdecode(value, 1) + return img diff --git a/demo/custom_reader.py b/demo/custom_reader.py deleted file mode 100644 index 1f7f544..0000000 --- a/demo/custom_reader.py +++ /dev/null @@ -1,55 +0,0 @@ -# This demo shows how to use user-defined training dataset. -# The following steps are needed to use user-defined training datasets: -# 1. Build a reader, which preprocess images and yield a sample in the -# format (data, label) each time, where data is the decoded image data; -# 2. Batch the above samples; -# 3. Set the reader to use during training to the above batch reader. - -import argparse - -import paddle -from plsc import Entry -from plsc.utils import jpeg_reader as reader - -parser = argparse.ArgumentParser() -parser.add_argument("--model_save_dir", - type=str, - default="./saved_model", - help="Directory to save models.") -parser.add_argument("--data_dir", - type=str, - default="./data", - help="Directory for datasets.") -parser.add_argument("--num_epochs", - type=int, - default=2, - help="Number of epochs to run.") -parser.add_argument("--loss_type", - type=str, - default='arcface', - help="Loss type to use.") -args = parser.parse_args() - - -def main(): - global args - ins = Entry() - ins.set_model_save_dir(args.model_save_dir) - ins.set_dataset_dir(args.data_dir) - ins.set_train_epochs(args.num_epochs) - ins.set_loss_type(args.loss_type) - # 1. Build a reader, which yield a sample in the format (data, label) - # each time, where data is the decoded image data; - train_reader = reader.arc_train(args.data_dir, - ins.num_classes) - # 2. Batch the above samples; - batched_train_reader = paddle.batch(train_reader, - ins.train_batch_size) - # 3. Set the reader to use during training to the above batch reader. - ins.train_reader = batched_train_reader - - ins.train() - - -if __name__ == "__main__": - main() diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 69fe55e..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/markdown2rst.py b/docs/markdown2rst.py deleted file mode 100644 index cc48829..0000000 --- a/docs/markdown2rst.py +++ /dev/null @@ -1,720 +0,0 @@ -#!/usr/bin/env python3 -""" - markdown to rst -""" -# -*- coding: utf-8 -*- - -from __future__ import print_function, unicode_literals -import os -import os.path -import re -import sys -from argparse import ArgumentParser, Namespace - -from docutils import statemachine, nodes, io, utils -from docutils.parsers import rst -from docutils.core import ErrorString -from docutils.utils import SafeString, column_width -import mistune - -if sys.version_info < (3, ): - from codecs import open as _open - from urlparse import urlparse -else: - _open = open - from urllib.parse import urlparse - -__version__ = '0.2.1' -_is_sphinx = False -prolog = '''\ -.. role:: raw-html-m2r(raw) - :format: html - -''' - -# for command-line use -parser = ArgumentParser() -options = Namespace() -parser.add_argument( - 'input_file', nargs='*', help='files to convert to reST format') -parser.add_argument( - '--overwrite', - action='store_true', - default=False, - help='overwrite output file without confirmaion') -parser.add_argument( - '--dry-run', - action='store_true', - default=False, - help='print conversion result and not save output file') -parser.add_argument( - '--no-underscore-emphasis', - action='store_true', - default=False, - help='do not use underscore (_) for emphasis') -parser.add_argument( - '--parse-relative-links', - action='store_true', - default=False, - help='parse relative links into ref or doc directives') -parser.add_argument( - '--anonymous-references', - action='store_true', - default=False, - help='use anonymous references in generated rst') -parser.add_argument( - '--disable-inline-math', - action='store_true', - default=False, - help='disable parsing inline math') - - -def parse_options(): - """parse_options""" - parser.parse_known_args(namespace=options) - - -class RestBlockGrammar(mistune.BlockGrammar): - """RestBlockGrammar""" - directive = re.compile( - r'^( *\.\..*?)\n(?=\S)', - re.DOTALL | re.MULTILINE, ) - oneline_directive = re.compile( - r'^( *\.\..*?)$', - re.DOTALL | re.MULTILINE, ) - rest_code_block = re.compile( - r'^::\s*$', - re.DOTALL | re.MULTILINE, ) - - -class RestBlockLexer(mistune.BlockLexer): - """RestBlockLexer""" - grammar_class = RestBlockGrammar - default_rules = [ - 'directive', - 'oneline_directive', - 'rest_code_block', - ] + mistune.BlockLexer.default_rules - - def parse_directive(self, m): - """parse_directive""" - self.tokens.append({ - 'type': 'directive', - 'text': m.group(1), - }) - - def parse_oneline_directive(self, m): - """parse_oneline_directive""" - # reuse directive output - self.tokens.append({ - 'type': 'directive', - 'text': m.group(1), - }) - - def parse_rest_code_block(self, m): - """parse_rest_code_block""" - self.tokens.append({'type': 'rest_code_block', }) - - -class RestInlineGrammar(mistune.InlineGrammar): - """RestInlineGrammar""" - image_link = re.compile( - r'\[!\[(?P.*?)\]\((?P.*?)\).*?\]\((?P.*?)\)') - rest_role = re.compile(r':.*?:`.*?`|`[^`]+`:.*?:') - rest_link = re.compile(r'`[^`]*?`_') - inline_math = re.compile(r'.*\$(.*)?\$') - eol_literal_marker = re.compile(r'(\s+)?::\s*$') - # add colon and space as special text - text = re.compile(r'^[\s\S]+?(?=[\\[\s\S]+?)\1{2}(?!\1)') - # _word_ or *word* - emphasis = re.compile(r'^\b_((?:__|[^_])+?)_\b' # _word_ - r'|' - r'^\*(?P(?:\*\*|[^\*])+?)\*(?!\*)' # *word* - ) - - def no_underscore_emphasis(self): - """no_underscore_emphasis""" - self.double_emphasis = re.compile( - r'^\*{2}(?P[\s\S]+?)\*{2}(?!\*)' # **word** - ) - self.emphasis = re.compile( - r'^\*(?P(?:\*\*|[^\*])+?)\*(?!\*)' # *word* - ) - - -class RestInlineLexer(mistune.InlineLexer): - """RestInlineLexer""" - grammar_class = RestInlineGrammar - default_rules = [ - 'image_link', - 'rest_role', - 'rest_link', - 'eol_literal_marker', - ] + mistune.InlineLexer.default_rules - - def __init__(self, *args, **kwargs): - no_underscore_emphasis = kwargs.pop('no_underscore_emphasis', False) - disable_inline_math = kwargs.pop('disable_inline_math', False) - super(RestInlineLexer, self).__init__(*args, **kwargs) - if not _is_sphinx: - parse_options() - if no_underscore_emphasis or getattr(options, 'no_underscore_emphasis', - False): - self.rules.no_underscore_emphasis() - inline_maths = 'inline_math' in self.default_rules - if disable_inline_math or getattr(options, 'disable_inline_math', - False): - if inline_maths: - self.default_rules.remove('inline_math') - elif not inline_maths: - self.default_rules.insert(0, 'inline_math') - - def output_double_emphasis(self, m): - """output_double_emphasis""" - # may include code span - text = self.output(m.group('text')) - return self.renderer.double_emphasis(text) - - def output_emphasis(self, m): - """output_emphasis""" - # may include code span - text = self.output(m.group('text') or m.group(1)) - return self.renderer.emphasis(text) - - def output_image_link(self, m): - """Pass through rest role.""" - return self.renderer.image_link( - m.group('url'), m.group('target'), m.group('alt')) - - def output_rest_role(self, m): - """Pass through rest role.""" - return self.renderer.rest_role(m.group(0)) - - def output_rest_link(self, m): - """Pass through rest link.""" - return self.renderer.rest_link(m.group(0)) - - def output_inline_math(self, m): - """Pass through rest link.""" - return self.renderer.inline_math(m.group(0)) - - def output_eol_literal_marker(self, m): - """Pass through rest link.""" - marker = ':' if m.group(1) is None else '' - return self.renderer.eol_literal_marker(marker) - - -class RestRenderer(mistune.Renderer): - """RestRenderer""" - _include_raw_html = False - list_indent_re = re.compile(r'^(\s*(#\.|\*)\s)') - indent = ' ' * 3 - list_marker = '{#__rest_list_mark__#}' - hmarks = { - 1: '=', - 2: '-', - 3: '^', - 4: '~', - 5: '"', - 6: '#', - } - - def __init__(self, *args, **kwargs): - self.parse_relative_links = kwargs.pop('parse_relative_links', False) - self.anonymous_references = kwargs.pop('anonymous_references', False) - super(RestRenderer, self).__init__(*args, **kwargs) - if not _is_sphinx: - parse_options() - if getattr(options, 'parse_relative_links', False): - self.parse_relative_links = options.parse_relative_links - if getattr(options, 'anonymous_references', False): - self.anonymous_references = options.anonymous_references - - def _indent_block(self, block): - return '\n'.join(self.indent + line if line else '' - for line in block.splitlines()) - - def _raw_html(self, html): - self._include_raw_html = True - return '\ :raw-html-m2r:`{}`\ '.format(html) - - def block_code(self, code, lang=None): - """block_code""" - if lang == 'math': - first_line = '\n.. math::\n\n' - elif lang: - first_line = '\n.. code-block:: {}\n\n'.format(lang) - elif _is_sphinx: - first_line = '\n.. code-block:: guess\n\n' - else: - first_line = '\n.. code-block::\n\n' - return first_line + self._indent_block(code) + '\n' - - def block_quote(self, text): - """block_quote""" - # text includes some empty line - return '\n..\n\n{}\n\n'.format(self._indent_block(text.strip('\n'))) - - def block_html(self, html): - """Rendering block level pure html content. - - :param html: text content of the html snippet. - """ - return '\n\n.. raw:: html\n\n' + self._indent_block(html) + '\n\n' - - def header(self, text, level, raw=None): - """Rendering header/heading tags like ``

`` ``

``. - - :param text: rendered text content for the header. - :param level: a number for the header level, for example: 1. - :param raw: raw text content of the header. - """ - return '\n{0}\n{1}\n'.format(text, - self.hmarks[level] * column_width(text)) - - def hrule(self): - """Rendering method for ``
`` tag.""" - return '\n----\n' - - def list(self, body, ordered=True): - """Rendering list tags like ``