Skip to content

Commit 15e511c

Browse files
committed
apply changes
1 parent f174f8a commit 15e511c

26 files changed

+65
-66
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
8686

8787
fix-copies:
8888
python utils/check_copies.py --fix_and_overwrite
89+
python utils/check_docstrings.py --fix_and_overwrite
8990
python utils/check_modular_conversion.py --fix_and_overwrite
9091
python utils/check_dummies.py --fix_and_overwrite
9192
python utils/check_pipeline_typing.py --fix_and_overwrite
9293
python utils/check_doctest_list.py --fix_and_overwrite
93-
python utils/check_docstrings.py --fix_and_overwrite
9494

9595
# Run tests for the library
9696

src/transformers/models/chameleon/image_processing_chameleon.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class ChameleonImageProcessor(BaseImageProcessor):
4949
Constructs a Chameleon image processor.
5050
5151
Args:
52-
do_resize (`bool`, *optional*, defaults to `True`):
52+
do_resize (`bool`, *optional*, defaults to True):
5353
Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by
5454
`do_resize` in the `preprocess` method.
5555
size (`dict[str, int]` *optional*, defaults to `{"shortest_edge": 512}`):
@@ -58,19 +58,19 @@ class ChameleonImageProcessor(BaseImageProcessor):
5858
method.
5959
resample (`PILImageResampling`, *optional*, defaults to 1):
6060
Resampling filter to use if resizing the image. Can be overridden by `resample` in the `preprocess` method.
61-
do_center_crop (`bool`, *optional*, defaults to `True`):
61+
do_center_crop (`bool`, *optional*, defaults to True):
6262
Whether to center crop the image to the specified `crop_size`. Can be overridden by `do_center_crop` in the
6363
`preprocess` method.
6464
crop_size (`dict[str, int]` *optional*, defaults to {"height": 512, "width": 512}):
6565
Size of the output image after applying `center_crop`. Can be overridden by `crop_size` in the `preprocess`
6666
method.
67-
do_rescale (`bool`, *optional*, defaults to `True`):
67+
do_rescale (`bool`, *optional*, defaults to True):
6868
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
6969
the `preprocess` method.
7070
rescale_factor (`int` or `float`, *optional*, defaults to 0.0078):
7171
Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
7272
method.
73-
do_normalize (`bool`, *optional*, defaults to `True`):
73+
do_normalize (`bool`, *optional*, defaults to True):
7474
Whether to normalize the image. Can be overridden by `do_normalize` in the `preprocess` method.
7575
image_mean (`float` or `list[float]`, *optional*, defaults to `[1.0, 1.0, 1.0]`):
7676
Mean to use if normalizing the image. This is a float or list of floats the length of the number of
@@ -79,7 +79,7 @@ class ChameleonImageProcessor(BaseImageProcessor):
7979
Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
8080
number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
8181
Can be overridden by the `image_std` parameter in the `preprocess` method.
82-
do_convert_rgb (`bool`, *optional*, defaults to `True`):
82+
do_convert_rgb (`bool`, *optional*, defaults to True):
8383
Whether to convert the image to RGB.
8484
"""
8585

src/transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# See the License for the specific language governing permissions and
2121
# limitations under the License.
2222

23+
2324
from ...configuration_utils import PretrainedConfig
2425
from ...utils.backbone_utils import BackboneConfigMixin, get_aligned_output_features_output_indices
2526

@@ -60,7 +61,7 @@ class Dinov2WithRegistersConfig(BackboneConfigMixin, PretrainedConfig):
6061
The size (resolution) of each patch.
6162
num_channels (`int`, *optional*, defaults to 3):
6263
The number of input channels.
63-
qkv_bias (`bool`, *optional*, defaults to `True`):
64+
qkv_bias (`bool`, *optional*, defaults to True):
6465
Whether to add a bias to the queries, keys and values.
6566
layerscale_value (`float`, *optional*, defaults to 1.0):
6667
Initial value to use for layer scale.
@@ -80,9 +81,9 @@ class Dinov2WithRegistersConfig(BackboneConfigMixin, PretrainedConfig):
8081
many stages the model has). If unset and `out_features` is set, will default to the corresponding stages.
8182
If unset and `out_features` is unset, will default to the last stage. Must be in the
8283
same order as defined in the `stage_names` attribute.
83-
apply_layernorm (`bool`, *optional*, defaults to `True`):
84+
apply_layernorm (`bool`, *optional*, defaults to True):
8485
Whether to apply layer normalization to the feature maps in case the model is used as backbone.
85-
reshape_hidden_states (`bool`, *optional*, defaults to `True`):
86+
reshape_hidden_states (`bool`, *optional*, defaults to True):
8687
Whether to reshape the feature maps to 4D tensors of shape `(batch_size, hidden_size, height, width)` in
8788
case the model is used as backbone. If `False`, the feature maps will be 3D tensors of shape `(batch_size,
8889
seq_len, hidden_size)`.

src/transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class Dinov2WithRegistersConfig(BackboneConfigMixin, PretrainedConfig):
7474
The size (resolution) of each patch.
7575
num_channels (`int`, *optional*, defaults to 3):
7676
The number of input channels.
77-
qkv_bias (`bool`, *optional*, defaults to `True`):
77+
qkv_bias (`bool`, *optional*, defaults to True):
7878
Whether to add a bias to the queries, keys and values.
7979
layerscale_value (`float`, *optional*, defaults to 1.0):
8080
Initial value to use for layer scale.
@@ -94,9 +94,9 @@ class Dinov2WithRegistersConfig(BackboneConfigMixin, PretrainedConfig):
9494
many stages the model has). If unset and `out_features` is set, will default to the corresponding stages.
9595
If unset and `out_features` is unset, will default to the last stage. Must be in the
9696
same order as defined in the `stage_names` attribute.
97-
apply_layernorm (`bool`, *optional*, defaults to `True`):
97+
apply_layernorm (`bool`, *optional*, defaults to True):
9898
Whether to apply layer normalization to the feature maps in case the model is used as backbone.
99-
reshape_hidden_states (`bool`, *optional*, defaults to `True`):
99+
reshape_hidden_states (`bool`, *optional*, defaults to True):
100100
Whether to reshape the feature maps to 4D tensors of shape `(batch_size, hidden_size, height, width)` in
101101
case the model is used as backbone. If `False`, the feature maps will be 3D tensors of shape `(batch_size,
102102
seq_len, hidden_size)`.

src/transformers/models/falcon_h1/modeling_falcon_h1.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@
3030
import torch.nn.functional as F
3131
from torch import nn
3232

33-
from transformers.activations import ACT2FN
34-
33+
from ...activations import ACT2FN
3534
from ...cache_utils import (
3635
Cache,
3736
DynamicCache, # we need __iter__ and __len__ of pkv
@@ -503,7 +502,7 @@ def apply_mask_to_padding_states(hidden_states, attention_mask):
503502
return hidden_states
504503

505504

506-
# Adapted from transformers.models.mamba2.modeling_mamba2.Mamba2Mixer
505+
# Adapted from ..mamba2.modeling_mamba2.Mamba2Mixer
507506
class FalconH1Mixer(nn.Module):
508507
"""
509508
FalconH1Mixer is identical to classic Mamba2 mixer classes but differs on two different things
@@ -1219,7 +1218,7 @@ def compute_mup_vector(config):
12191218

12201219

12211220
@auto_docstring
1222-
# Adapted from transformers.models.jamba.modeling_jamba.JambaModel
1221+
# Adapted from ..jamba.modeling_jamba.JambaModel
12231222
class FalconH1Model(FalconH1PreTrainedModel):
12241223
def __init__(self, config: FalconH1Config):
12251224
super().__init__(config)

src/transformers/models/gemma3/configuration_gemma3.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class Gemma3TextConfig(PretrainedConfig):
6969
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
7070
rms_norm_eps (`float`, *optional*, defaults to 1e-06):
7171
The epsilon used by the rms normalization layers.
72-
use_cache (`bool`, *optional*, defaults to `True`):
72+
use_cache (`bool`, *optional*, defaults to True):
7373
Whether or not the model should return the last key/values attentions (not used by all models). Only
7474
relevant if `config.is_decoder=True`.
7575
pad_token_id (`int`, *optional*, defaults to 0):
@@ -78,11 +78,11 @@ class Gemma3TextConfig(PretrainedConfig):
7878
End of stream token id.
7979
bos_token_id (`int`, *optional*, defaults to 2):
8080
Beginning of stream token id.
81-
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
81+
tie_word_embeddings (`bool`, *optional*, defaults to True):
8282
Whether to tie weight embeddings
8383
rope_theta (`float`, *optional*, defaults to 1000000.0):
8484
The base period of the RoPE embeddings.
85-
attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
85+
attention_bias (`bool`, defaults to `False`, *optional*, defaults to True):
8686
Whether to use a bias in the query, key, value and output projection layers during self-attention.
8787
attention_dropout (`float`, *optional*, defaults to 0.0):
8888
The dropout ratio for the attention probabilities.

src/transformers/models/gemma3/modular_gemma3.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class Gemma3TextConfig(Gemma2Config, PretrainedConfig):
9595
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
9696
rms_norm_eps (`float`, *optional*, defaults to 1e-06):
9797
The epsilon used by the rms normalization layers.
98-
use_cache (`bool`, *optional*, defaults to `True`):
98+
use_cache (`bool`, *optional*, defaults to True):
9999
Whether or not the model should return the last key/values attentions (not used by all models). Only
100100
relevant if `config.is_decoder=True`.
101101
pad_token_id (`int`, *optional*, defaults to 0):
@@ -104,11 +104,11 @@ class Gemma3TextConfig(Gemma2Config, PretrainedConfig):
104104
End of stream token id.
105105
bos_token_id (`int`, *optional*, defaults to 2):
106106
Beginning of stream token id.
107-
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
107+
tie_word_embeddings (`bool`, *optional*, defaults to True):
108108
Whether to tie weight embeddings
109109
rope_theta (`float`, *optional*, defaults to 1000000.0):
110110
The base period of the RoPE embeddings.
111-
attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
111+
attention_bias (`bool`, defaults to `False`, *optional*, defaults to True):
112112
Whether to use a bias in the query, key, value and output projection layers during self-attention.
113113
attention_dropout (`float`, *optional*, defaults to 0.0):
114114
The dropout ratio for the attention probabilities.

src/transformers/models/got_ocr2/configuration_got_ocr2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ class GotOcr2VisionConfig(PretrainedConfig):
5757
The dropout ratio for the attention probabilities.
5858
initializer_range (`float`, *optional*, defaults to 1e-10):
5959
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
60-
qkv_bias (`bool`, *optional*, defaults to `True`):
60+
qkv_bias (`bool`, *optional*, defaults to True):
6161
Whether to add a bias to query, key, value projections.
62-
use_abs_pos (`bool`, *optional*, defaults to `True`):
62+
use_abs_pos (`bool`, *optional*, defaults to True):
6363
Whether to use absolute position embedding.
64-
use_rel_pos (`bool`, *optional*, defaults to `True`):
64+
use_rel_pos (`bool`, *optional*, defaults to True):
6565
Whether to use relative position embedding.
6666
window_size (`int`, *optional*, defaults to 14):
6767
Window size for relative position.

src/transformers/models/got_ocr2/modular_got_ocr2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ class GotOcr2VisionConfig(PretrainedConfig):
7272
The dropout ratio for the attention probabilities.
7373
initializer_range (`float`, *optional*, defaults to 1e-10):
7474
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
75-
qkv_bias (`bool`, *optional*, defaults to `True`):
75+
qkv_bias (`bool`, *optional*, defaults to True):
7676
Whether to add a bias to query, key, value projections.
77-
use_abs_pos (`bool`, *optional*, defaults to `True`):
77+
use_abs_pos (`bool`, *optional*, defaults to True):
7878
Whether to use absolute position embedding.
79-
use_rel_pos (`bool`, *optional*, defaults to `True`):
79+
use_rel_pos (`bool`, *optional*, defaults to True):
8080
Whether to use relative position embedding.
8181
window_size (`int`, *optional*, defaults to 14):
8282
Window size for relative position.

src/transformers/models/idefics3/image_processing_idefics3.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,10 @@ class Idefics3ImageProcessor(BaseImageProcessor):
253253
r"""
254254
Constructs a Idefics3 image processor.
255255
Args:
256-
do_convert_rgb (`bool`, *optional*, defaults to `True`):
256+
do_convert_rgb (`bool`, *optional*, defaults to True):
257257
Whether to convert the image to RGB. This is useful if the input image is of a different format e.g. RGBA.
258258
Only has an effect if the input image is in the PIL format.
259-
do_resize (`bool`, *optional*, defaults to `True`):
259+
do_resize (`bool`, *optional*, defaults to True):
260260
Whether to resize the image. The longest edge of the image is resized to be <= `size["longest_edge"]`, with the
261261
shortest edge resized to keep the input aspect ratio.
262262
size (`Dict`, *optional*, defaults to `{"longest_edge": 4 * 364}`):
@@ -265,16 +265,16 @@ class Idefics3ImageProcessor(BaseImageProcessor):
265265
to keep the input aspect ratio.
266266
resample (`Resampling`, *optional*, defaults to `Resampling.LANCZOS`):
267267
Resampling filter to use when resizing the image.
268-
do_image_splitting (`bool`, *optional*, defaults to `True`):
268+
do_image_splitting (`bool`, *optional*, defaults to True):
269269
Whether to split the image into sub-images concatenated with the original image. They are split into patches
270270
such that each patch has a size of `max_image_size["height"]` x `max_image_size["width"]`.
271271
max_image_size (`Dict`, *optional*, defaults to `{"longest_edge": 364}`):
272272
Maximum resolution of the patches of images accepted by the model. This is a dictionary containing the key "longest_edge".
273-
do_rescale (`bool`, *optional*, defaults to `True`):
273+
do_rescale (`bool`, *optional*, defaults to True):
274274
Whether to rescale the image. If set to `True`, the image is rescaled to have pixel values between 0 and 1.
275275
rescale_factor (`float`, *optional*, defaults to `1/255`):
276276
Rescale factor to rescale the image by if `do_rescale` is set to `True`.
277-
do_normalize (`bool`, *optional*, defaults to `True`):
277+
do_normalize (`bool`, *optional*, defaults to True):
278278
Whether to normalize the image. If set to `True`, the image is normalized to have a mean of `image_mean` and
279279
a standard deviation of `image_std`.
280280
image_mean (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_MEAN`):
@@ -285,7 +285,7 @@ class Idefics3ImageProcessor(BaseImageProcessor):
285285
Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
286286
number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
287287
Can be overridden by the `image_std` parameter in the `preprocess` method.
288-
do_pad (`bool`, *optional*, defaults to `True`):
288+
do_pad (`bool`, *optional*, defaults to True):
289289
Whether or not to pad the images to the largest height and width in the batch and number of images per
290290
sample in the batch, such that the returned tensor is of shape (batch_size, max_num_images, num_channels, max_height, max_width).
291291
"""

src/transformers/models/instructblipvideo/configuration_instructblipvideo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121

2222

2323
from ...configuration_utils import PretrainedConfig
24-
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
2524
from ...utils import logging
2625
from ..auto import CONFIG_MAPPING, AutoConfig
26+
from ..auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
2727

2828

2929
logger = logging.get_logger(__name__)

src/transformers/models/janus/configuration_janus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class JanusVisionConfig(PretrainedConfig):
5656
`"relu"`, `"selu"`, and `"gelu_new"` are supported.
5757
mlp_ratio (`float`, *optional*, defaults to 4.0):
5858
Ratio of MLP hidden dimensionality to embedding dimensionality.
59-
attention_bias (`bool`, *optional*, defaults to `True`):
59+
attention_bias (`bool`, *optional*, defaults to True):
6060
Whether to add a bias to the queries, keys, and values in the attention layers.
6161
hidden_dropout_rate (`float`, *optional*, defaults to 0.0):
6262
The dropout probability for fully connected layers in the encoder.

src/transformers/models/janus/image_processing_janus.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class JanusImageProcessor(BaseImageProcessor):
6161
Constructs a JANUS image processor.
6262
6363
Args:
64-
do_resize (`bool`, *optional*, defaults to `True`):
64+
do_resize (`bool`, *optional*, defaults to True):
6565
Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
6666
`do_resize` parameter in the `preprocess` method.
6767
size (`dict`, *optional*, defaults to `{"height": 384, "width": 384}`):
@@ -73,13 +73,13 @@ class JanusImageProcessor(BaseImageProcessor):
7373
resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
7474
Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`. Can be
7575
overridden by the `resample` parameter in the `preprocess` method.
76-
do_rescale (`bool`, *optional*, defaults to `True`):
76+
do_rescale (`bool`, *optional*, defaults to True):
7777
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the
7878
`do_rescale` parameter in the `preprocess` method.
7979
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
8080
Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be
8181
overridden by the `rescale_factor` parameter in the `preprocess` method.
82-
do_normalize (`bool`, *optional*, defaults to `True`):
82+
do_normalize (`bool`, *optional*, defaults to True):
8383
Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
8484
method. Can be overridden by the `do_normalize` parameter in the `preprocess` method.
8585
image_mean (`float` or `list[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):

0 commit comments

Comments
 (0)