Release a Chinese MRP model with Mengzi PLM

hankcs · hankcs · commit 9b3a786ea514 · 2022-04-15T12:49:09.000-04:00
diff --git a/docs/references.bib b/docs/references.bib
@@ -1,13 +1,21 @@
 %% This BibTeX bibliography file was created using BibDesk.
 %% https://bibdesk.sourceforge.io/
 
-%% Created for hankcs at 2022-04-12 22:36:24 -0400 
+%% Created for hankcs at 2022-04-15 10:32:15 -0400 
 
 
 %% Saved with string encoding Unicode (UTF-8) 
 
 
 
+@article{zhang2021mengzi,
+	author = {Zhang, Zhuosheng and Zhang, Hanqing and Chen, Keming and Guo, Yuhang and Hua, Jingyun and Wang, Yulong and Zhou, Ming},
+	date-added = {2022-04-15 10:32:14 -0400},
+	date-modified = {2022-04-15 10:32:14 -0400},
+	journal = {arXiv preprint arXiv:2110.06696},
+	title = {Mengzi: Towards Lightweight yet Ingenious Pre-trained Models for Chinese},
+	year = {2021}}
+
 @inproceedings{samuel-straka-2020-ufal,
 	abstract = {We present PERIN, a novel permutation-invariant approach to sentence-to-graph semantic parsing. PERIN is a versatile, cross-framework and language independent architecture for universal modeling of semantic structures. Our system participated in the CoNLL 2020 shared task, Cross-Framework Meaning Representation Parsing (MRP 2020), where it was evaluated on five different frameworks (AMR, DRG, EDS, PTG and UCCA) across four languages. PERIN was one of the winners of the shared task. The source code and pretrained models are available at http://www.github.com/ufal/perin.},
 	address = {Online},
diff --git a/hanlp/pretrained/amr.py b/hanlp/pretrained/amr.py
@@ -34,5 +34,13 @@
 provided as inputs. 
 '''
 
+MRP2020_AMR_ZHO_MENGZI_BASE = 'http://download.hanlp.com/amr/extra/amr-zho-mengzi-base_20220415_101941.zip'
+'''A Chinese Permutation-invariant Semantic Parser (:cite:`samuel-straka-2020-ufal`) trained on MRP2020  
+Chinese AMR corpus using Mengzi BERT base (:cite:`zhang2021mengzi`). Its performance on dev set is 
+``{amr-zho [tops F1: 85.43%][anchors F1: 93.41%][labels F1: 87.68%][properties F1: 82.02%][edges F1: 73.17%]
+[attributes F1: 0.00%][all F1: 84.11%]}``. Test set performance is unknown since the test set is not released to the 
+public. 
+'''
+
 # Will be filled up during runtime
 ALL = {}
diff --git a/hanlp/version.py b/hanlp/version.py
@@ -2,7 +2,7 @@
 # Author: hankcs
 # Date: 2019-12-28 19:26
 
-__version__ = '2.1.0-beta.23'
+__version__ = '2.1.0-beta.24'
 """HanLP version"""
 
 
diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
     'amr': [
         'penman==1.2.1',
         'networkx>=2.5.1',
-        'perin-parser>=0.0.10',
+        'perin-parser>=0.0.12',
     ],
     'tf': [
         'fasttext-wheel==0.9.2',