Skip to content

Commit 9b3a786

Browse files
committed
Release a Chinese MRP model with Mengzi PLM
1 parent 15bb02f commit 9b3a786

File tree

4 files changed

+19
-3
lines changed

4 files changed

+19
-3
lines changed

docs/references.bib

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
11
%% This BibTeX bibliography file was created using BibDesk.
22
%% https://bibdesk.sourceforge.io/
33
4-
%% Created for hankcs at 2022-04-12 22:36:24 -0400
4+
%% Created for hankcs at 2022-04-15 10:32:15 -0400
55
66
77
%% Saved with string encoding Unicode (UTF-8)
88
99
1010
11+
@article{zhang2021mengzi,
12+
author = {Zhang, Zhuosheng and Zhang, Hanqing and Chen, Keming and Guo, Yuhang and Hua, Jingyun and Wang, Yulong and Zhou, Ming},
13+
date-added = {2022-04-15 10:32:14 -0400},
14+
date-modified = {2022-04-15 10:32:14 -0400},
15+
journal = {arXiv preprint arXiv:2110.06696},
16+
title = {Mengzi: Towards Lightweight yet Ingenious Pre-trained Models for Chinese},
17+
year = {2021}}
18+
1119
@inproceedings{samuel-straka-2020-ufal,
1220
abstract = {We present PERIN, a novel permutation-invariant approach to sentence-to-graph semantic parsing. PERIN is a versatile, cross-framework and language independent architecture for universal modeling of semantic structures. Our system participated in the CoNLL 2020 shared task, Cross-Framework Meaning Representation Parsing (MRP 2020), where it was evaluated on five different frameworks (AMR, DRG, EDS, PTG and UCCA) across four languages. PERIN was one of the winners of the shared task. The source code and pretrained models are available at http://www.github.com/ufal/perin.},
1321
address = {Online},

hanlp/pretrained/amr.py

+8
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,13 @@
3434
provided as inputs.
3535
'''
3636

37+
MRP2020_AMR_ZHO_MENGZI_BASE = 'http://download.hanlp.com/amr/extra/amr-zho-mengzi-base_20220415_101941.zip'
38+
'''A Chinese Permutation-invariant Semantic Parser (:cite:`samuel-straka-2020-ufal`) trained on MRP2020
39+
Chinese AMR corpus using Mengzi BERT base (:cite:`zhang2021mengzi`). Its performance on dev set is
40+
``{amr-zho [tops F1: 85.43%][anchors F1: 93.41%][labels F1: 87.68%][properties F1: 82.02%][edges F1: 73.17%]
41+
[attributes F1: 0.00%][all F1: 84.11%]}``. Test set performance is unknown since the test set is not released to the
42+
public.
43+
'''
44+
3745
# Will be filled up during runtime
3846
ALL = {}

hanlp/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Author: hankcs
33
# Date: 2019-12-28 19:26
44

5-
__version__ = '2.1.0-beta.23'
5+
__version__ = '2.1.0-beta.24'
66
"""HanLP version"""
77

88

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
'amr': [
1616
'penman==1.2.1',
1717
'networkx>=2.5.1',
18-
'perin-parser>=0.0.10',
18+
'perin-parser>=0.0.12',
1919
],
2020
'tf': [
2121
'fasttext-wheel==0.9.2',

0 commit comments

Comments
 (0)