Skip to content

Commit b0b4845

Browse files
committed
Update Transformers to 4.54
For higher Transformers versions than 4.36, models need a build() function that ensures correctly loading of models. This commit adds those functions.
1 parent b73ae20 commit b0b4845

File tree

4 files changed

+37
-4
lines changed

4 files changed

+37
-4
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## Bicleaner AI 3.3.0:
8+
### Changed:
9+
- Update Transformers to 4.52
10+
- Add build methods to BCAI XLMR custom layers to load the models correctly.
11+
712
## Bicleaner AI 3.2.0:
813
### Added:
914
- Option to require GPU.

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "bicleaner-ai"
3-
version = "3.2.1"
3+
version = "3.3.0"
44
license = {file = "LICENSE"}
55
authors = [
66
{ "name" = "Prompsit Language Engineering", "email" = "[email protected]" }
@@ -19,15 +19,15 @@ dependencies = [
1919
"toolwrapper",
2020
"joblib",
2121
"sacremoses",
22-
"bicleaner-hardrules==2.10.4",
22+
"bicleaner-hardrules==2.10.6",
2323
"sentencepiece",
2424
"protobuf==3.20.3",
2525
"tensorflow>=2.6.5,<2.16",
2626
"bicleaner-ai-glove==0.2.1",
2727
"fuzzywuzzy",
2828
"python-Levenshtein",
29-
"transformers==4.36.1",
30-
"huggingface-hub>=0.22,<0.23",
29+
"transformers==4.52.4",
30+
"huggingface-hub>=0.30,<0.31",
3131
"zstandard",
3232
"psutil",
3333
"regex",

src/bicleaner_ai/layers.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def __init__(self, config, **kwargs):
8888
kernel_initializer=get_initializer(config.initializer_range),
8989
name="out_proj"
9090
)
91+
self.config = config
9192

9293
def call(self, features, training=False):
9394
x = features[:, 0, :] # take <s> token (equiv. to [CLS])
@@ -96,3 +97,20 @@ def call(self, features, training=False):
9697
x = self.dropout(x, training=training)
9798
x = self.out_proj(x)
9899
return x
100+
101+
def build(self, input_shape=None):
102+
if self.built:
103+
return
104+
self.built = True
105+
if getattr(self, "dense", None) is not None:
106+
with tf.name_scope(self.dense.name):
107+
# This was failing because our dense layer last input dimension is
108+
# actually config.hidden_size and not config.head_hidden_size
109+
# self.hidden_size is the original parameter and is 768, just like the last XLMR state
110+
# we use a different hidden size with config.head_hidden_size (typically 2048)
111+
# so even if we use config.head_hidden_size as the number of neurones in self.dense
112+
# the shape of dense is (self.head_hidden_size,self.hidden_size) (2048,768)
113+
self.dense.build([None, None, self.config.hidden_size])
114+
if getattr(self, "out_proj", None) is not None:
115+
with tf.name_scope(self.out_proj.name):
116+
self.out_proj.build([None, None, self.config.head_hidden_size])

src/bicleaner_ai/models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,3 +646,13 @@ def __init__(self, config, *inputs, **kwargs):
646646
self.classifier = BicleanerAIClassificationHead(config,
647647
name=name)
648648

649+
def build(self, input_shape=None):
650+
if self.built:
651+
return
652+
self.built = True
653+
if getattr(self, "roberta", None) is not None:
654+
with tf.name_scope(self.roberta.name):
655+
self.roberta.build(None)
656+
if getattr(self, "classifier", None) is not None:
657+
with tf.name_scope(self.classifier.name):
658+
self.classifier.build(None)

0 commit comments

Comments
 (0)