From 6f3796be289e7fadb1000be50f3e6a59c6fea56f Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 17 Dec 2024 20:20:30 +0100 Subject: [PATCH] [TRANSFORMATIONS] Call KeepPrecisionSensitiveInFP32 transformations recursively for internal MultiSubGraph body (#28050) [TRANSFORMATIONS] Call KeepPrecisionSensitiveInFP32 transformations recursively for internal MultiSubGraph body A set of KeepPrecisionSensitiveInFP32 transformations (MarkSugraphsToKeepInMixedPrecision & AlignMixedFP32FP16Types) preserves the output precisions for certain parts of a model that need to remain in high precision, despite the general model conversion to a different precision (like it's done in the GPU pipeline fp32 -> fp16). The AlignMixedFP32FP16Types transformation makes sure the high precision parts of the graph are surrounded with the required Converts to facilitate with the other parts of the model that have a different precision now. However, this won't take place if the model is an internal body of a MultiSubGraph as the transformation will not be called for it. Fix it by calling the KeepPrecisionSensitiveInFP32 transformations recursively to make sure each internal body is converted correctly having the required Converts for it. - Ticket: CVS-158631 Signed-off-by: Andrii Staikov --------- Signed-off-by: Andrii Staikov Co-authored-by: Denis Orlov --- .../src/transformations/convert_precision.cpp | 25 ++-- .../align_mixed_fp32_fp16_types.cpp | 4 - .../tests/utils/convert_precision.cpp | 120 +++++++++++++++++- 3 files changed, 132 insertions(+), 17 deletions(-) diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index d5e96ddafc252f..bcf44c74d29f04 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -198,7 +198,8 @@ bool convert_node_input_precision(const std::shared_ptr& node, return false; } -bool convert_function_precision(const std::shared_ptr& f, +bool convert_function_precision(ov::pass::PassBase& pass, + const std::shared_ptr& f, const type_to_fuse_map& type_to_fuse, const type_to_fuse_map& type_to_extend, const precisions_map& precisions, @@ -212,6 +213,14 @@ bool convert_function_precision(const std::shared_ptr& f, bool names_compatibility_mode) { bool is_output_precision_changed = false; + if (skip_precision_sensitive && has_fp16_compression) { + pass::Manager manager(pass.get_pass_config(), "KeepPrecisionSensitiveInFP32"); + // Mark subgraphs with disable_fp16_compression to keep them in FP32 + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + } + ov::element::TypeVector orig_result_types; if (!convert_input_output_precision) { const auto& results = f->get_results(); @@ -268,7 +277,8 @@ bool convert_function_precision(const std::shared_ptr& f, if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size(); for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) { - is_changed = convert_function_precision(sub_graph_node->get_function(static_cast(sub_graph_ind)), + is_changed = convert_function_precision(pass, + sub_graph_node->get_function(static_cast(sub_graph_ind)), type_to_fuse, type_to_extend, precisions, @@ -366,7 +376,8 @@ bool convert_precision(ov::pass::PassBase& pass, std::unordered_map>> const_to_internal_output; const auto names_compatibility_mode = f->has_rt_info("version") && f->get_rt_info("version") < 11; - return convert_function_precision(f, + return convert_function_precision(pass, + f, type_to_fuse, type_to_extend, precisions, @@ -418,14 +429,6 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16; - if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) { - pass::Manager manager(get_pass_config(), "KeepPrecisionSensitiveInFP32"); - // Mark subgraphs with disable_fp16_compression to keep them in FP32 - manager.register_pass(); - manager.register_pass(); - manager.run_passes(f); - } - type_to_fuse_map type_to_fuse{ {ov::op::v0::Convert::get_type_info_static(), fuse_type_to_convert}, {ov::op::v3::ShapeOf::get_type_info_static(), fuse_type_to_shapeof}, diff --git a/src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp b/src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp index 147faad0f3acad..2b2b462bd4b87e 100644 --- a/src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp @@ -65,10 +65,6 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr(out_node)) - continue; - // element_type of this convert will be changed automatically to f16 after // ConvertPrecision(f32 -> f16). It's kept here f32 to keep ov::Model validatable auto convert = std::make_shared(output, out_inputs.get_element_type()); diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index f4bdedf4764604..a2edb0232b40f2 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -1951,8 +1951,9 @@ TEST(TransformationTests, ConvertPrecision_DivisionByZeroMinimalPattern) { auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value}); auto add = std::make_shared(input_2_decompressed, eps_const); auto divide = std::make_shared(input_1_decompressed, add); + auto conv = std::make_shared(divide, element::f16); - model_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + model_ref = std::make_shared(NodeVector{conv}, ParameterVector{input_1, input_2}); } const FunctionsComparator func_comparator = FunctionsComparator::with_default(); @@ -1960,6 +1961,120 @@ TEST(TransformationTests, ConvertPrecision_DivisionByZeroMinimalPattern) { ASSERT_TRUE(result.valid) << result.message; } +static std::shared_ptr make_then_body(bool ref) { + auto el_type = ref ? element::f16 : element::f32; + + auto then_param = std::make_shared(el_type, PartialShape{1, 112, 112, 24}); + auto opt_conv = + ref ? std::make_shared(then_param, element::f32)->output(0) : then_param->output(0); + auto red_mean_const = opset10::Constant::create(element::i32, Shape{3}, {0, 1, 2}); + auto red_mean = std::make_shared(opt_conv, red_mean_const); + + auto opt_conv_sub = + ref ? std::make_shared(then_param, element::f32)->output(0) : then_param->output(0); + auto subtract = std::make_shared(opt_conv_sub, red_mean); + + auto power_const = opset10::Constant::create(el_type, Shape{1}, {2}); + auto opt_conv_1 = + ref ? std::make_shared(power_const, element::f32)->output(0) : power_const->output(0); + auto power = std::make_shared(subtract, opt_conv_1); + + auto red_mean_const_1 = opset10::Constant::create(element::i32, Shape{3}, {0, 1, 2}); + auto reduce_mean_1 = std::make_shared(power, red_mean_const_1); + + auto add_const = opset10::Constant::create(el_type, Shape{1}, {1.001e-05}); + auto opt_conv_2 = + ref ? std::make_shared(add_const, element::f32)->output(0) : add_const->output(0); + auto add = std::make_shared(reduce_mean_1, opt_conv_2); + + auto sqrt = std::make_shared(add); + + auto divide = std::make_shared(subtract, sqrt); + + auto mul_const = + opset10::Constant::create(element::f16, Shape{1, 1, 1, 24}, std::vector(24, 1)); // stub values + auto mul_conv = std::make_shared(mul_const, element::f32); + auto mul = std::make_shared(divide, mul_conv); + + auto add_const_1 = + opset10::Constant::create(element::f16, Shape{1, 1, 1, 24}, std::vector(24, 1)); // stub values + auto add_conv = std::make_shared(add_const_1, element::f32); + auto add_1 = std::make_shared(mul, add_conv); + + auto res_conv = ref ? std::make_shared(add_1, element::f16)->output(0) : add_1->output(0); + + auto then_res = std::make_shared(res_conv); + + return std::make_shared(OutputVector{then_res}, ParameterVector{then_param}); +} + +static std::shared_ptr make_else_body(bool ref) { + auto el_type = ref ? element::f16 : element::f32; + auto else_param = std::make_shared(el_type, ov::Shape{1, 112, 112, 24}); + auto else_res = std::make_shared(else_param); + + return std::make_shared(OutputVector{else_res}, ParameterVector{else_param}); +} + +TEST(TransformationTests, Convert_Precision_If_Body) { + shared_ptr main_model, main_model_ref; + pass::Manager manager; + { + auto then_body = make_then_body(false); + auto then_param = then_body->get_parameters()[0]; + auto then_res = then_body->get_results()[0]; + + auto else_body = make_else_body(false); + auto else_param = else_body->get_parameters()[0]; + auto else_res = else_body->get_results()[0]; + + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 112, 112, 24}); + auto cond = std::make_shared(element::boolean, Shape{1}, true); + auto if_op = std::make_shared(cond); + auto if_result = std::make_shared(if_op); + + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(input, then_param, else_param); + if_op->set_output(then_res, else_res); + + main_model = std::make_shared(NodeVector{if_result}, ParameterVector{input}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_map{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(main_model); + } + + { + auto then_body = make_then_body(true); + auto then_param = then_body->get_parameters()[0]; + auto then_res = then_body->get_results()[0]; + + auto else_body = make_else_body(true); + auto else_param = else_body->get_parameters()[0]; + auto else_res = else_body->get_results()[0]; + + auto input = std::make_shared(ov::element::f16, ov::Shape{1, 112, 112, 24}); + auto cond = std::make_shared(element::boolean, Shape{1}, true); + auto if_op = std::make_shared(cond); + auto if_result = std::make_shared(if_op); + + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(input, then_param, else_param); + if_op->set_output(then_res, else_res); + + main_model_ref = std::make_shared(NodeVector{if_result}, ParameterVector{input}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(main_model_ref, main_model); + ASSERT_TRUE(result.valid) << result.message; +} + TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) { shared_ptr model, model_ref; pass::Manager manager; @@ -1994,8 +2109,9 @@ TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) { auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77}); auto pow = std::make_shared(add, pow_exp_const); auto mul = std::make_shared(input_1_decompressed, pow); + auto conv = std::make_shared(mul, element::f16); - model_ref = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + model_ref = std::make_shared(NodeVector{conv}, ParameterVector{input_1, input_2}); } const FunctionsComparator func_comparator = FunctionsComparator::with_default();