Skip to content

Commit 2f564c7

Browse files
philkuzBernardZach
authored andcommitted
fix(DPT,Depth-Anything) torch.export (huggingface#34103)
* Fix torch.export issue in dpt based models Signed-off-by: Phillip Kuznetsov <[email protected]> * Simplify the if statements Signed-off-by: Phillip Kuznetsov <[email protected]> * Move activation definitions of zoe_depth to init() Signed-off-by: Phillip Kuznetsov <[email protected]> * Add test_export for dpt and zoedepth Signed-off-by: Phillip Kuznetsov <[email protected]> * add depth anything Signed-off-by: Phillip Kuznetsov <[email protected]> * Remove zoedepth non-automated zoedepth changes and zoedepth test Signed-off-by: Phillip Kuznetsov <[email protected]> * [run_slow] dpt, depth_anything, zoedepth Signed-off-by: Phillip Kuznetsov <[email protected]> --------- Signed-off-by: Phillip Kuznetsov <[email protected]>
1 parent 71b231d commit 2f564c7

File tree

5 files changed

+72
-20
lines changed

5 files changed

+72
-20
lines changed

src/transformers/models/depth_anything/modeling_depth_anything.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -224,16 +224,16 @@ def forward(self, hidden_states, size=None):
224224
hidden_states = hidden_states[::-1]
225225

226226
fused_hidden_states = []
227-
# first layer only uses the last hidden_state
228-
size = hidden_states[1].shape[2:]
229-
fused_hidden_state = self.layers[0](hidden_states[0], size=size)
230-
fused_hidden_states.append(fused_hidden_state)
227+
fused_hidden_state = None
231228

232-
# looping from the last layer to the second
233-
for idx, (hidden_state, layer) in enumerate(zip(hidden_states[1:], self.layers[1:])):
234-
size = hidden_states[1:][idx + 1].shape[2:] if idx != (len(hidden_states[1:]) - 1) else None
229+
for idx, (hidden_state, layer) in enumerate(zip(hidden_states, self.layers)):
230+
size = hidden_states[idx + 1].shape[2:] if idx != (len(hidden_states) - 1) else None
235231

236-
fused_hidden_state = layer(fused_hidden_state, hidden_state, size=size)
232+
if fused_hidden_state is None:
233+
# first layer only uses the last hidden_state
234+
fused_hidden_state = layer(hidden_state, size=size)
235+
else:
236+
fused_hidden_state = layer(fused_hidden_state, hidden_state, size=size)
237237

238238
fused_hidden_states.append(fused_hidden_state)
239239

src/transformers/models/dpt/modeling_dpt.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -689,12 +689,13 @@ def forward(self, hidden_states):
689689
hidden_states = hidden_states[::-1]
690690

691691
fused_hidden_states = []
692-
# first layer only uses the last hidden_state
693-
fused_hidden_state = self.layers[0](hidden_states[0])
694-
fused_hidden_states.append(fused_hidden_state)
695-
# looping from the last layer to the second
696-
for hidden_state, layer in zip(hidden_states[1:], self.layers[1:]):
697-
fused_hidden_state = layer(fused_hidden_state, hidden_state)
692+
fused_hidden_state = None
693+
for hidden_state, layer in zip(hidden_states, self.layers):
694+
if fused_hidden_state is None:
695+
# first layer only uses the last hidden_state
696+
fused_hidden_state = layer(hidden_state)
697+
else:
698+
fused_hidden_state = layer(fused_hidden_state, hidden_state)
698699
fused_hidden_states.append(fused_hidden_state)
699700

700701
return fused_hidden_states

src/transformers/models/zoedepth/modeling_zoedepth.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,13 @@ def forward(self, hidden_states):
185185
hidden_states = hidden_states[::-1]
186186

187187
fused_hidden_states = []
188-
# first layer only uses the last hidden_state
189-
fused_hidden_state = self.layers[0](hidden_states[0])
190-
fused_hidden_states.append(fused_hidden_state)
191-
# looping from the last layer to the second
192-
for hidden_state, layer in zip(hidden_states[1:], self.layers[1:]):
193-
fused_hidden_state = layer(fused_hidden_state, hidden_state)
188+
fused_hidden_state = None
189+
for hidden_state, layer in zip(hidden_states, self.layers):
190+
if fused_hidden_state is None:
191+
# first layer only uses the last hidden_state
192+
fused_hidden_state = layer(hidden_state)
193+
else:
194+
fused_hidden_state = layer(fused_hidden_state, hidden_state)
194195
fused_hidden_states.append(fused_hidden_state)
195196

196197
return fused_hidden_states

tests/models/depth_anything/test_modeling_depth_anything.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from transformers import DepthAnythingConfig, Dinov2Config
2020
from transformers.file_utils import is_torch_available, is_vision_available
21+
from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
2122
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
2223

2324
from ...test_configuration_common import ConfigTester
@@ -290,3 +291,30 @@ def test_inference(self):
290291
).to(torch_device)
291292

292293
self.assertTrue(torch.allclose(predicted_depth[0, :3, :3], expected_slice, atol=1e-4))
294+
295+
def test_export(self):
296+
for strict in [True, False]:
297+
with self.subTest(strict=strict):
298+
if not is_torch_greater_or_equal_than_2_4:
299+
self.skipTest(reason="This test requires torch >= 2.4 to run.")
300+
model = (
301+
DepthAnythingForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
302+
.to(torch_device)
303+
.eval()
304+
)
305+
image_processor = DPTImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
306+
image = prepare_img()
307+
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
308+
309+
exported_program = torch.export.export(
310+
model,
311+
args=(inputs["pixel_values"],),
312+
strict=strict,
313+
)
314+
with torch.no_grad():
315+
eager_outputs = model(**inputs)
316+
exported_outputs = exported_program.module().forward(inputs["pixel_values"])
317+
self.assertEqual(eager_outputs.predicted_depth.shape, exported_outputs.predicted_depth.shape)
318+
self.assertTrue(
319+
torch.allclose(eager_outputs.predicted_depth, exported_outputs.predicted_depth, atol=1e-4)
320+
)

tests/models/dpt/test_modeling_dpt.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from transformers import DPTConfig
2020
from transformers.file_utils import is_torch_available, is_vision_available
21+
from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
2122
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
2223

2324
from ...test_configuration_common import ConfigTester
@@ -410,3 +411,24 @@ def test_post_processing_depth_estimation(self):
410411
).squeeze()
411412
self.assertTrue(output_enlarged.shape == expected_shape)
412413
self.assertTrue(torch.allclose(predicted_depth_l, output_enlarged, rtol=1e-3))
414+
415+
def test_export(self):
416+
for strict in [True, False]:
417+
with self.subTest(strict=strict):
418+
if not is_torch_greater_or_equal_than_2_4:
419+
self.skipTest(reason="This test requires torch >= 2.4 to run.")
420+
model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade").to(torch_device).eval()
421+
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large-ade")
422+
image = prepare_img()
423+
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
424+
425+
exported_program = torch.export.export(
426+
model,
427+
args=(inputs["pixel_values"],),
428+
strict=strict,
429+
)
430+
with torch.no_grad():
431+
eager_outputs = model(**inputs)
432+
exported_outputs = exported_program.module().forward(inputs["pixel_values"])
433+
self.assertEqual(eager_outputs.logits.shape, exported_outputs.logits.shape)
434+
self.assertTrue(torch.allclose(eager_outputs.logits, exported_outputs.logits, atol=1e-4))

0 commit comments

Comments
 (0)