@@ -1255,7 +1255,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
12551255 }
12561256 }
12571257
1258- void test_compressed_int4_scale_dyn_quan (bool is_caching_test, bool is_dynamic, int batch = 1 ) {
1258+ void test_compressed_int4_scale_dyn_quan (bool is_caching_test, bool is_dynamic, int batch = 1 , bool is_wei_dyn = false ) {
12591259 tests::random_generator rg (GET_SUITE_NAME);
12601260 auto & engine = get_test_engine ();
12611261
@@ -1285,6 +1285,11 @@ class fully_connected_gpu_tests: public ::testing::Test {
12851285 auto scale_data = rg.generate_random_1d <ov::float16>(ofm_num * ifm_num / scales_group_size, -4 .0f , 4 .0f );
12861286 set_values (scale_mem, scale_data);
12871287
1288+ if (is_wei_dyn) {
1289+ // ifm_num is dynamic
1290+ dyn_input_ps = is_3d ? ov::PartialShape{ -1 , -1 , -1 } : ov::PartialShape{ -1 , -1 };
1291+ }
1292+
12881293 auto in_layout = is_dynamic ? layout{ dyn_input_ps, data_types::f16 , format::bfyx }
12891294 : layout{ input_ps, data_types::f16 , format::bfyx };
12901295
@@ -1302,7 +1307,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
13021307
13031308 auto config = get_test_default_config (engine);
13041309 config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
1305- config.set_property (ov::intel_gpu::optimize_data (true ));
1310+ ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, " fully_connected_gpu_bfyx_ref" , impl_types::ocl };
1311+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ {" fc_prim" , fc_impl_desc} }));
13061312
13071313 network network (engine, topology, config);
13081314 network.set_input_data (" input" , input_mem);
@@ -1365,13 +1371,13 @@ class fully_connected_gpu_tests: public ::testing::Test {
13651371 }
13661372
13671373
1368- void test_compressed_int4_scale (bool is_caching_test, bool is_dynamic, long int batch_num, long int scales_group_size = 128 ) {
1374+ void test_compressed_int4_scale (bool is_caching_test, bool is_dynamic, long int batch_num, long int scales_group_size = 128 , bool is_wei_dyn = false ) {
13691375 tests::random_generator rg (GET_SUITE_NAME);
13701376 auto & engine = get_test_engine ();
13711377 auto supports_immad = engine.get_device_info ().supports_immad ;
13721378
13731379 long int ifm_num = 256 ;
1374- long int ofm_num = 256 ;
1380+ long int ofm_num = 512 ;
13751381
13761382 auto input_mem = engine.allocate_memory ({ { batch_num, ifm_num}, data_types::f16 , format::bfyx });
13771383 auto weights_mem = engine.allocate_memory ({ {ofm_num, ifm_num}, data_types::u4, format::bfyx });
@@ -1392,6 +1398,11 @@ class fully_connected_gpu_tests: public ::testing::Test {
13921398 auto in_layout = is_dynamic ? layout{ {-1 , ifm_num}, data_types::f16 , format::bfyx }
13931399 : layout{ {batch_num, ifm_num}, data_types::f16 , format::bfyx };
13941400
1401+ if (is_dynamic && is_wei_dyn) {
1402+ // ifm_num is dynamic
1403+ in_layout = layout{ {-1 , -1 }, data_types::f16 , format::bfyx };
1404+ }
1405+
13951406 auto dcomp_zp_name = supports_immad ? " dcomp_zp" : " " ;
13961407
13971408 auto fc_prim = fully_connected (" fc_prim" , input_info (" input" ), " weights" , " " , " scale" , dcomp_zp_name, data_types::f16 , padding (), 2 , 2 );
@@ -1409,6 +1420,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
14091420
14101421 auto config = get_test_default_config (engine);
14111422 config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
1423+ ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, " fully_connected_gpu_bfyx_ref" , impl_types::ocl };
1424+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ {" fc_prim" , fc_impl_desc} }));
14121425
14131426 network network (engine, topology, config);
14141427 network.set_input_data (" input" , input_mem);
@@ -3324,6 +3337,32 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dyn_cache_dynamic) {
33243337 this ->test_compressed_int4_scale_dyn_quan (true , true , 512 );
33253338}
33263339
3340+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input) {
3341+ this ->test_compressed_int4_scale (false , true , 256 , true );
3342+ }
3343+
3344+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_cached) {
3345+ this ->test_compressed_int4_scale (true , true , 260 , true );
3346+ }
3347+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_b1g64) {
3348+ this ->test_compressed_int4_scale (false , true , 1 , 64 , true );
3349+ }
3350+
3351+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_b1g128) {
3352+ this ->test_compressed_int4_scale (false , true , 1 , 128 , true );
3353+ }
3354+
3355+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input_single_batch) {
3356+ this ->test_compressed_int4_scale_dyn_quan (false , true , 1 , true );
3357+ }
3358+
3359+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input) {
3360+ this ->test_compressed_int4_scale_dyn_quan (false , true , 512 , true );
3361+ }
3362+
3363+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input_unaligned) {
3364+ this ->test_compressed_int4_scale_dyn_quan (false , true , 511 , true );
3365+ }
33273366
33283367
33293368TEST_F (fully_connected_gpu_tests, compressed_scale_bias) {
0 commit comments