[TRANSFORMATIONS] Call KeepPrecisionSensitiveInFP32 transformations r…

…ecursively for internal MultiSubGraph body (#28050) [TRANSFORMATIONS] Call KeepPrecisionSensitiveInFP32 transformations recursively for internal MultiSubGraph body A set of KeepPrecisionSensitiveInFP32 transformations (MarkSugraphsToKeepInMixedPrecision & AlignMixedFP32FP16Types) preserves the output precisions for certain parts of a model that need to remain in high precision, despite the general model conversion to a different precision (like it's done in the GPU pipeline fp32 -> fp16). The AlignMixedFP32FP16Types transformation makes sure the high precision parts of the graph are surrounded with the required Converts to facilitate with the other parts of the model that have a different precision now. However, this won't take place if the model is an internal body of a MultiSubGraph as the transformation will not be called for it. Fix it by calling the KeepPrecisionSensitiveInFP32 transformations recursively to make sure each internal body is converted correctly having the required Converts for it. - Ticket: CVS-158631 Signed-off-by: Andrii Staikov <[email protected]> --------- Signed-off-by: Andrii Staikov <[email protected]> Co-authored-by: Denis Orlov <[email protected]>
openvinotoolkit · Dec 17, 2024 · 6f3796b · 6f3796b
1 parent c1c66ce
commit 6f3796b
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 17 deletions.
diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -198,7 +198,8 @@ bool convert_node_input_precision(const std::shared_ptr<ov::Node>& node,
     return false;
 }
 
-bool convert_function_precision(const std::shared_ptr<Model>& f,
+bool convert_function_precision(ov::pass::PassBase& pass,
+                                const std::shared_ptr<Model>& f,
                                 const type_to_fuse_map& type_to_fuse,
                                 const type_to_fuse_map& type_to_extend,
                                 const precisions_map& precisions,
@@ -212,6 +213,14 @@ bool convert_function_precision(const std::shared_ptr<Model>& f,
                                 bool names_compatibility_mode) {
     bool is_output_precision_changed = false;
 
+    if (skip_precision_sensitive && has_fp16_compression) {
+        pass::Manager manager(pass.get_pass_config(), "KeepPrecisionSensitiveInFP32");
+        // Mark subgraphs with disable_fp16_compression to keep them in FP32
+        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+        manager.register_pass<pass::AlignMixedFP32FP16Types>();
+        manager.run_passes(f);
+    }
+
     ov::element::TypeVector orig_result_types;
     if (!convert_input_output_precision) {
         const auto& results = f->get_results();
@@ -268,7 +277,8 @@ bool convert_function_precision(const std::shared_ptr<Model>& f,
         if (auto sub_graph_node = std::dynamic_pointer_cast<op::util::MultiSubGraphOp>(node)) {
             size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size();
             for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) {
-                is_changed = convert_function_precision(sub_graph_node->get_function(static_cast<int>(sub_graph_ind)),
+                is_changed = convert_function_precision(pass,
+                                                        sub_graph_node->get_function(static_cast<int>(sub_graph_ind)),
                                                         type_to_fuse,
                                                         type_to_extend,
                                                         precisions,
@@ -366,7 +376,8 @@ bool convert_precision(ov::pass::PassBase& pass,
     std::unordered_map<const ov::Node*, std::vector<Input<Node>>> const_to_internal_output;
 
     const auto names_compatibility_mode = f->has_rt_info("version") && f->get_rt_info<int64_t>("version") < 11;
-    return convert_function_precision(f,
+    return convert_function_precision(pass,
+                                      f,
                                       type_to_fuse,
                                       type_to_extend,
                                       precisions,
@@ -418,14 +429,6 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&
 
     bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16;
 
-    if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) {
-        pass::Manager manager(get_pass_config(), "KeepPrecisionSensitiveInFP32");
-        // Mark subgraphs with disable_fp16_compression to keep them in FP32
-        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
-        manager.register_pass<pass::AlignMixedFP32FP16Types>();
-        manager.run_passes(f);
-    }
-
     type_to_fuse_map type_to_fuse{
         {ov::op::v0::Convert::get_type_info_static(), fuse_type_to_convert},
         {ov::op::v3::ShapeOf::get_type_info_static(), fuse_type_to_shapeof},

diff --git a/...mmon/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp b/...mmon/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp
@@ -65,10 +65,6 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
                     if (!out_inputs.get_element_type().is_real())
                         continue;
 
-                    // todo xxx-101766: if we don't skip Results there is an error on GPU
-                    if (ov::as_type_ptr<ov::op::v0::Result>(out_node))
-                        continue;
-
                     // element_type of this convert will be changed automatically to f16 after
                     // ConvertPrecision(f32 -> f16). It's kept here f32 to keep ov::Model validatable
                     auto convert = std::make_shared<ov::op::v0::Convert>(output, out_inputs.get_element_type());

diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp
@@ -1951,15 +1951,130 @@ TEST(TransformationTests, ConvertPrecision_DivisionByZeroMinimalPattern) {
         auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
         auto add = std::make_shared<opset10::Add>(input_2_decompressed, eps_const);
         auto divide = std::make_shared<opset10::Divide>(input_1_decompressed, add);
+        auto conv = std::make_shared<opset10::Convert>(divide, element::f16);
 
-        model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
+        model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input_1, input_2});
     }
 
     const FunctionsComparator func_comparator = FunctionsComparator::with_default();
     FunctionsComparator::Result result = func_comparator(model_ref, model);
     ASSERT_TRUE(result.valid) << result.message;
 }
 
+static std::shared_ptr<ov::Model> make_then_body(bool ref) {
+    auto el_type = ref ? element::f16 : element::f32;
+
+    auto then_param = std::make_shared<opset10::Parameter>(el_type, PartialShape{1, 112, 112, 24});
+    auto opt_conv =
+        ref ? std::make_shared<opset10::Convert>(then_param, element::f32)->output(0) : then_param->output(0);
+    auto red_mean_const = opset10::Constant::create(element::i32, Shape{3}, {0, 1, 2});
+    auto red_mean = std::make_shared<opset10::ReduceMean>(opt_conv, red_mean_const);
+
+    auto opt_conv_sub =
+        ref ? std::make_shared<opset10::Convert>(then_param, element::f32)->output(0) : then_param->output(0);
+    auto subtract = std::make_shared<opset10::Subtract>(opt_conv_sub, red_mean);
+
+    auto power_const = opset10::Constant::create(el_type, Shape{1}, {2});
+    auto opt_conv_1 =
+        ref ? std::make_shared<opset10::Convert>(power_const, element::f32)->output(0) : power_const->output(0);
+    auto power = std::make_shared<opset10::Power>(subtract, opt_conv_1);
+
+    auto red_mean_const_1 = opset10::Constant::create(element::i32, Shape{3}, {0, 1, 2});
+    auto reduce_mean_1 = std::make_shared<opset10::ReduceMean>(power, red_mean_const_1);
+
+    auto add_const = opset10::Constant::create(el_type, Shape{1}, {1.001e-05});
+    auto opt_conv_2 =
+        ref ? std::make_shared<opset10::Convert>(add_const, element::f32)->output(0) : add_const->output(0);
+    auto add = std::make_shared<opset10::Add>(reduce_mean_1, opt_conv_2);
+
+    auto sqrt = std::make_shared<opset10::Sqrt>(add);
+
+    auto divide = std::make_shared<opset10::Divide>(subtract, sqrt);
+
+    auto mul_const =
+        opset10::Constant::create(element::f16, Shape{1, 1, 1, 24}, std::vector<float16>(24, 1));  // stub values
+    auto mul_conv = std::make_shared<opset10::Convert>(mul_const, element::f32);
+    auto mul = std::make_shared<opset10::Multiply>(divide, mul_conv);
+
+    auto add_const_1 =
+        opset10::Constant::create(element::f16, Shape{1, 1, 1, 24}, std::vector<float16>(24, 1));  // stub values
+    auto add_conv = std::make_shared<opset10::Convert>(add_const_1, element::f32);
+    auto add_1 = std::make_shared<opset10::Multiply>(mul, add_conv);
+
+    auto res_conv = ref ? std::make_shared<opset10::Convert>(add_1, element::f16)->output(0) : add_1->output(0);
+
+    auto then_res = std::make_shared<opset10::Result>(res_conv);
+
+    return std::make_shared<ov::Model>(OutputVector{then_res}, ParameterVector{then_param});
+}
+
+static std::shared_ptr<ov::Model> make_else_body(bool ref) {
+    auto el_type = ref ? element::f16 : element::f32;
+    auto else_param = std::make_shared<opset10::Parameter>(el_type, ov::Shape{1, 112, 112, 24});
+    auto else_res = std::make_shared<opset10::Result>(else_param);
+
+    return std::make_shared<ov::Model>(OutputVector{else_res}, ParameterVector{else_param});
+}
+
+TEST(TransformationTests, Convert_Precision_If_Body) {
+    shared_ptr<Model> main_model, main_model_ref;
+    pass::Manager manager;
+    {
+        auto then_body = make_then_body(false);
+        auto then_param = then_body->get_parameters()[0];
+        auto then_res = then_body->get_results()[0];
+
+        auto else_body = make_else_body(false);
+        auto else_param = else_body->get_parameters()[0];
+        auto else_res = else_body->get_results()[0];
+
+        auto input = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::Shape{1, 112, 112, 24});
+        auto cond = std::make_shared<ov::op::v0::Constant>(element::boolean, Shape{1}, true);
+        auto if_op = std::make_shared<ov::opset8::If>(cond);
+        auto if_result = std::make_shared<ov::op::v0::Result>(if_op);
+
+        if_op->set_then_body(then_body);
+        if_op->set_else_body(else_body);
+        if_op->set_input(input, then_param, else_param);
+        if_op->set_output(then_res, else_res);
+
+        main_model = std::make_shared<Model>(NodeVector{if_result}, ParameterVector{input});
+
+        type_to_fuse_map empty_type_to_fuse_map = {};
+        bool keep_precision_sensitive_in_fp32 = true;
+        manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
+                                                      empty_type_to_fuse_map,
+                                                      keep_precision_sensitive_in_fp32);
+        manager.run_passes(main_model);
+    }
+
+    {
+        auto then_body = make_then_body(true);
+        auto then_param = then_body->get_parameters()[0];
+        auto then_res = then_body->get_results()[0];
+
+        auto else_body = make_else_body(true);
+        auto else_param = else_body->get_parameters()[0];
+        auto else_res = else_body->get_results()[0];
+
+        auto input = std::make_shared<ov::opset8::Parameter>(ov::element::f16, ov::Shape{1, 112, 112, 24});
+        auto cond = std::make_shared<ov::op::v0::Constant>(element::boolean, Shape{1}, true);
+        auto if_op = std::make_shared<ov::opset8::If>(cond);
+        auto if_result = std::make_shared<ov::op::v0::Result>(if_op);
+
+        if_op->set_then_body(then_body);
+        if_op->set_else_body(else_body);
+        if_op->set_input(input, then_param, else_param);
+        if_op->set_output(then_res, else_res);
+
+        main_model_ref = std::make_shared<Model>(NodeVector{if_result}, ParameterVector{input});
+    }
+
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default();
+    FunctionsComparator::Result result = func_comparator(main_model_ref, main_model);
+    ASSERT_TRUE(result.valid) << result.message;
+}
+
 TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) {
     shared_ptr<Model> model, model_ref;
     pass::Manager manager;
@@ -1994,8 +2109,9 @@ TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) {
         auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77});
         auto pow = std::make_shared<opset10::Power>(add, pow_exp_const);
         auto mul = std::make_shared<opset10::Multiply>(input_1_decompressed, pow);
+        auto conv = std::make_shared<opset10::Convert>(mul, element::f16);
 
-        model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
+        model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input_1, input_2});
     }
 
     const FunctionsComparator func_comparator = FunctionsComparator::with_default();