update magc mean std and small improvements

felixdittrich92 · felixdittrich92 · commit b77e6b658abd · 2022-07-01T17:45:28.000+02:00
diff --git a/doctr/models/classification/magc_resnet/tensorflow.py b/doctr/models/classification/magc_resnet/tensorflow.py
@@ -23,8 +23,8 @@
 
 default_cfgs: Dict[str, Dict[str, Any]] = {
     'magc_resnet31': {
-        'mean': (0.5, 0.5, 0.5),
-        'std': (1., 1., 1.),
+        'mean': (0.694, 0.695, 0.693),
+        'std': (0.299, 0.296, 0.301),
         'input_shape': (32, 32, 3),
         'classes': list(VOCABS['french']),
         'url': None,
diff --git a/doctr/models/recognition/master/tensorflow.py b/doctr/models/recognition/master/tensorflow.py
@@ -169,7 +169,7 @@ def call(
         # (N, H, W, C) --> (N, H * W, C)
         feature = tf.reshape(feature, shape=(b, h * w, c))
         # add positional encoding to features
-        encoded = self.positional_encoding(feature)
+        encoded = self.positional_encoding(feature, **kwargs)
 
         out: Dict[str, tf.Tensor] = {}
 
diff --git a/doctr/models/recognition/transformer/tensorflow.py b/doctr/models/recognition/transformer/tensorflow.py
@@ -60,7 +60,7 @@ def scaled_dot_product_attention(
 ) -> Tuple[tf.Tensor, tf.Tensor]:
     """ Scaled Dot-Product Attention """
 
-    scores = tf.matmul(query, key, transpose_b=True) / math.sqrt(query.shape[-1])
+    scores = tf.matmul(query, tf.transpose(key, perm=[0, 1, 3, 2])) / math.sqrt(query.shape[-1])
     if mask is not None:
         scores = tf.where(mask == 0, -1e9, scores)
     p_attn = tf.nn.softmax(scores, axis=-1)
@@ -88,7 +88,7 @@ class MultiHeadAttention(layers.Layer, NestedObject):
 
     def __init__(self, num_heads: int, d_model: int, dropout: float = 0.1) -> None:
         super().__init__()
-        assert d_model % num_heads == 0
+        assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
 
         self.d_k = d_model // num_heads
         self.num_heads = num_heads
@@ -158,7 +158,7 @@ def call(
     ) -> tf.Tensor:
 
         tgt = self.embed(tgt, **kwargs) * math.sqrt(self.d_model)
-        pos_enc_tgt = self.positional_encoding(tgt)
+        pos_enc_tgt = self.positional_encoding(tgt, **kwargs)
         output = pos_enc_tgt
 
         for i in range(self.num_layers):