Skip to content

Commit 66d5711

Browse files
zRzRzRzRzRzRzRzucchini-nlpArthurZucker
authored
GLM-V update with new processor (#42122)
* init * update * add * Update video_processing_glm46v.py * update doc * Update modular_glm46v.py * 2 * Update processing_glm46v.py * 21 * Update check_repo.py * Update check_repo.py * Update test_processor_glm46v.py * Update modeling_auto.py * update * Update glm46v.md * Update configuration_auto.py * 2 * update with glm46v import * uppercase * upload * upload * upload with modular * 1 * - * update * 1 * 2 * 1 * 2 * 2 * 1 * update config * 1 * update as automoel * 1 * try remove * delete * delete * test * update * 1 * Update modular_glm46v.py * Update test_modeling_glm46v.py * update 1513 * 1 * use PreTrainedConfig * Update modular_glm46v.py * Update configuration_glm46v.py * model_type = "glm46v" * remove glm46v_text * Update image_processing_auto.py * 1 * update readme * GLM-4.6V * update * update * Update __init__.py * update * update doc * Update check_docstrings.py * update doc * fix copies for tied weight keys! * more fixup --------- Co-authored-by: Raushan Turganbay <[email protected]> Co-authored-by: Arthur <[email protected]> Co-authored-by: Arthur <[email protected]>
1 parent 8598421 commit 66d5711

36 files changed

+3928
-226
lines changed

docs/source/en/_toctree.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,8 @@
10661066
title: Gemma3n
10671067
- local: model_doc/git
10681068
title: GIT
1069+
- local: model_doc/glm46v
1070+
title: Glm46V
10691071
- local: model_doc/glm4v
10701072
title: glm4v
10711073
- local: model_doc/glm4v_moe

docs/source/en/model_doc/glm46v.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# GLM-4.6V
2+
3+
## Glm46VConfig
4+
5+
[[autodoc]] Glm46VConfig
6+
7+
## Glm46VImageProcessor
8+
9+
[[autodoc]] Glm46VImageProcessor
10+
- preprocess
11+
12+
## Glm46VVideoProcessor
13+
14+
[[autodoc]] Glm46VVideoProcessor
15+
- preprocess
16+
17+
## Glm46VImageProcessorFast
18+
19+
[[autodoc]] Glm46VImageProcessorFast
20+
- preprocess
21+
22+
## Glm46VProcessor
23+
24+
[[autodoc]] Glm46VProcessor
25+
26+
## Glm46VModel
27+
28+
[[autodoc]] Glm46VModel
29+
- forward
30+
31+
## Glm46VForConditionalGeneration
32+
33+
[[autodoc]] Glm46VForConditionalGeneration
34+
- forward

docs/source/en/model_doc/glm4v.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ print(output_text)
170170

171171
[[autodoc]] Glm4vConfig
172172

173+
174+
## Glm4vVisionConfig
175+
176+
[[autodoc]] Glm4vVisionConfig
177+
173178
## Glm4vTextConfig
174179

175180
[[autodoc]] Glm4vTextConfig
@@ -193,6 +198,11 @@ print(output_text)
193198

194199
[[autodoc]] Glm4vProcessor
195200

201+
## Glm4vVisionModel
202+
203+
[[autodoc]] Glm4vVisionModel
204+
- forward
205+
196206
## Glm4vTextModel
197207

198208
[[autodoc]] Glm4vTextModel

docs/source/en/model_doc/glm4v_moe.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ rendered properly in your Markdown viewer.
2222
<img alt="SDPA" src="https://img.shields.io/badge/SDPA-DE3412?style=flat&logo=pytorch&logoColor=white"> </div>
2323
</div>
2424

25-
# Glm4vMoe
25+
# Glm4vMoeMoe
2626

2727
## Overview
2828

@@ -48,10 +48,20 @@ The model also introduces a **Thinking Mode** switch, allowing users to balance
4848

4949
[[autodoc]] Glm4vMoeConfig
5050

51+
52+
## Glm4vMoeVisionConfig
53+
54+
[[autodoc]] Glm4vMoeVisionConfig
55+
5156
## Glm4vMoeTextConfig
5257

5358
[[autodoc]] Glm4vMoeTextConfig
5459

60+
## Glm4vMoeVisionModel
61+
62+
[[autodoc]] Glm4vMoeVisionModel
63+
- forward
64+
5565
## Glm4vMoeTextModel
5666

5767
[[autodoc]] Glm4vMoeTextModel
@@ -65,4 +75,4 @@ The model also introduces a **Thinking Mode** switch, allowing users to balance
6575
## Glm4vMoeForConditionalGeneration
6676

6777
[[autodoc]] Glm4vMoeForConditionalGeneration
68-
- forward
78+
- forward

src/transformers/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@
142142
from .git import *
143143
from .glm import *
144144
from .glm4 import *
145+
from .glm4v import *
146+
from .glm4v_moe import *
147+
from .glm46v import *
145148
from .glpn import *
146149
from .got_ocr2 import *
147150
from .gpt2 import *

src/transformers/models/auto/configuration_auto.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,14 @@
172172
("git", "GitConfig"),
173173
("glm", "GlmConfig"),
174174
("glm4", "Glm4Config"),
175+
("glm46v", "Glm46VConfig"),
175176
("glm4_moe", "Glm4MoeConfig"),
176177
("glm4v", "Glm4vConfig"),
177178
("glm4v_moe", "Glm4vMoeConfig"),
178179
("glm4v_moe_text", "Glm4vMoeTextConfig"),
180+
("glm4v_moe_vision", "Glm4vMoeVisionConfig"),
179181
("glm4v_text", "Glm4vTextConfig"),
182+
("glm4v_vision", "Glm4vVisionConfig"),
180183
("glpn", "GLPNConfig"),
181184
("got_ocr2", "GotOcr2Config"),
182185
("gpt-sw3", "GPT2Config"),
@@ -620,11 +623,14 @@
620623
("git", "GIT"),
621624
("glm", "GLM"),
622625
("glm4", "GLM4"),
626+
("glm46v", "Glm46V"),
623627
("glm4_moe", "Glm4MoE"),
624628
("glm4v", "GLM4V"),
625629
("glm4v_moe", "GLM4VMOE"),
626630
("glm4v_moe_text", "GLM4VMOE"),
631+
("glm4v_moe_vision", "Glm4vMoeVisionModel"),
627632
("glm4v_text", "GLM4V"),
633+
("glm4v_vision", "Glm4vVisionModel"),
628634
("glpn", "GLPN"),
629635
("got_ocr2", "GOT-OCR2"),
630636
("gpt-sw3", "GPT-Sw3"),
@@ -983,6 +989,8 @@
983989
("gemma3n_audio", "gemma3n"),
984990
("gemma3n_text", "gemma3n"),
985991
("gemma3n_vision", "gemma3n"),
992+
("glm4v_vision", "glm4v"),
993+
("glm4v_moe_vision", "glm4v_moe"),
986994
("glm4v_text", "glm4v"),
987995
("glm4v_moe_text", "glm4v_moe"),
988996
("idefics3_vision", "idefics3"),

src/transformers/models/auto/image_processing_auto.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
("gemma3", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
110110
("gemma3n", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
111111
("git", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
112+
("glm46v", ("Glm46VImageProcessor", "Glm46VImageProcessorFast")),
112113
("glm4v", ("Glm4vImageProcessor", "Glm4vImageProcessorFast")),
113114
("glpn", ("GLPNImageProcessor", "GLPNImageProcessorFast")),
114115
("got_ocr2", ("GotOcr2ImageProcessor", "GotOcr2ImageProcessorFast")),

src/transformers/models/auto/modeling_auto.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,14 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
175175
("git", "GitModel"),
176176
("glm", "GlmModel"),
177177
("glm4", "Glm4Model"),
178+
("glm46v", "Glm46VModel"),
178179
("glm4_moe", "Glm4MoeModel"),
179180
("glm4v", "Glm4vModel"),
180181
("glm4v_moe", "Glm4vMoeModel"),
181182
("glm4v_moe_text", "Glm4vMoeTextModel"),
183+
("glm4v_moe_vision", "Glm4vMoeVisionModel"),
182184
("glm4v_text", "Glm4vTextModel"),
185+
("glm4v_vision", "Glm4vVisionModel"),
183186
("glpn", "GLPNModel"),
184187
("got_ocr2", "GotOcr2Model"),
185188
("gpt-sw3", "GPT2Model"),
@@ -1032,6 +1035,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
10321035
("gemma3", "Gemma3ForConditionalGeneration"),
10331036
("gemma3n", "Gemma3nForConditionalGeneration"),
10341037
("git", "GitForCausalLM"),
1038+
("glm46v", "Glm46VForConditionalGeneration"),
10351039
("glm4v", "Glm4vForConditionalGeneration"),
10361040
("glm4v_moe", "Glm4vMoeForConditionalGeneration"),
10371041
("got_ocr2", "GotOcr2ForConditionalGeneration"),

src/transformers/models/auto/processing_auto.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
("gemma3", "Gemma3Processor"),
7676
("gemma3n", "Gemma3nProcessor"),
7777
("git", "GitProcessor"),
78+
("glm46v", "Glm46VProcessor"),
7879
("glm4v", "Glm4vProcessor"),
7980
("glm4v_moe", "Glm4vProcessor"),
8081
("got_ocr2", "GotOcr2Processor"),

src/transformers/models/auto/tokenization_auto.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@
308308
("git", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
309309
("glm", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
310310
("glm4", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
311+
("glm46v", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
311312
("glm4_moe", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
312313
("glm4v", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
313314
("glm4v_moe", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),

0 commit comments

Comments
 (0)