Skip to content

Commit

Permalink
[TRANSFORMATIONS] Call KeepPrecisionSensitiveInFP32 transformations r…
Browse files Browse the repository at this point in the history
…ecursively for internal MultiSubGraph body (#28050)

[TRANSFORMATIONS] Call KeepPrecisionSensitiveInFP32 transformations
recursively for internal MultiSubGraph body

A set of KeepPrecisionSensitiveInFP32 transformations
(MarkSugraphsToKeepInMixedPrecision & AlignMixedFP32FP16Types) preserves
the output precisions for certain parts of a model that need to remain
in high precision, despite the general model conversion to a different
precision (like it's done in the GPU pipeline fp32 -> fp16).

The AlignMixedFP32FP16Types transformation makes sure the high precision
parts of the graph are surrounded with the required Converts to
facilitate with the other parts of the model that have a different
precision now. However, this won't take place if the model is an
internal body of a MultiSubGraph as the transformation will not be
called for it.

Fix it by calling the KeepPrecisionSensitiveInFP32 transformations
recursively to make sure each internal body is converted correctly
having the required Converts for it.

- Ticket:
 CVS-158631

Signed-off-by: Andrii Staikov <[email protected]>

---------

Signed-off-by: Andrii Staikov <[email protected]>
Co-authored-by: Denis Orlov <[email protected]>
  • Loading branch information
CuriousPanCake and dorloff authored Dec 17, 2024
1 parent c1c66ce commit 6f3796b
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ bool convert_node_input_precision(const std::shared_ptr<ov::Node>& node,
return false;
}

bool convert_function_precision(const std::shared_ptr<Model>& f,
bool convert_function_precision(ov::pass::PassBase& pass,
const std::shared_ptr<Model>& f,
const type_to_fuse_map& type_to_fuse,
const type_to_fuse_map& type_to_extend,
const precisions_map& precisions,
Expand All @@ -212,6 +213,14 @@ bool convert_function_precision(const std::shared_ptr<Model>& f,
bool names_compatibility_mode) {
bool is_output_precision_changed = false;

if (skip_precision_sensitive && has_fp16_compression) {
pass::Manager manager(pass.get_pass_config(), "KeepPrecisionSensitiveInFP32");
// Mark subgraphs with disable_fp16_compression to keep them in FP32
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
manager.run_passes(f);
}

ov::element::TypeVector orig_result_types;
if (!convert_input_output_precision) {
const auto& results = f->get_results();
Expand Down Expand Up @@ -268,7 +277,8 @@ bool convert_function_precision(const std::shared_ptr<Model>& f,
if (auto sub_graph_node = std::dynamic_pointer_cast<op::util::MultiSubGraphOp>(node)) {
size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size();
for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) {
is_changed = convert_function_precision(sub_graph_node->get_function(static_cast<int>(sub_graph_ind)),
is_changed = convert_function_precision(pass,
sub_graph_node->get_function(static_cast<int>(sub_graph_ind)),
type_to_fuse,
type_to_extend,
precisions,
Expand Down Expand Up @@ -366,7 +376,8 @@ bool convert_precision(ov::pass::PassBase& pass,
std::unordered_map<const ov::Node*, std::vector<Input<Node>>> const_to_internal_output;

const auto names_compatibility_mode = f->has_rt_info("version") && f->get_rt_info<int64_t>("version") < 11;
return convert_function_precision(f,
return convert_function_precision(pass,
f,
type_to_fuse,
type_to_extend,
precisions,
Expand Down Expand Up @@ -418,14 +429,6 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&

bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16;

if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) {
pass::Manager manager(get_pass_config(), "KeepPrecisionSensitiveInFP32");
// Mark subgraphs with disable_fp16_compression to keep them in FP32
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
manager.run_passes(f);
}

type_to_fuse_map type_to_fuse{
{ov::op::v0::Convert::get_type_info_static(), fuse_type_to_convert},
{ov::op::v3::ShapeOf::get_type_info_static(), fuse_type_to_shapeof},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,6 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
if (!out_inputs.get_element_type().is_real())
continue;

// todo xxx-101766: if we don't skip Results there is an error on GPU
if (ov::as_type_ptr<ov::op::v0::Result>(out_node))
continue;

// element_type of this convert will be changed automatically to f16 after
// ConvertPrecision(f32 -> f16). It's kept here f32 to keep ov::Model validatable
auto convert = std::make_shared<ov::op::v0::Convert>(output, out_inputs.get_element_type());
Expand Down
120 changes: 118 additions & 2 deletions src/common/transformations/tests/utils/convert_precision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1951,15 +1951,130 @@ TEST(TransformationTests, ConvertPrecision_DivisionByZeroMinimalPattern) {
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset10::Add>(input_2_decompressed, eps_const);
auto divide = std::make_shared<opset10::Divide>(input_1_decompressed, add);
auto conv = std::make_shared<opset10::Convert>(divide, element::f16);

model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input_1, input_2});
}

const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}

static std::shared_ptr<ov::Model> make_then_body(bool ref) {
auto el_type = ref ? element::f16 : element::f32;

auto then_param = std::make_shared<opset10::Parameter>(el_type, PartialShape{1, 112, 112, 24});
auto opt_conv =
ref ? std::make_shared<opset10::Convert>(then_param, element::f32)->output(0) : then_param->output(0);
auto red_mean_const = opset10::Constant::create(element::i32, Shape{3}, {0, 1, 2});
auto red_mean = std::make_shared<opset10::ReduceMean>(opt_conv, red_mean_const);

auto opt_conv_sub =
ref ? std::make_shared<opset10::Convert>(then_param, element::f32)->output(0) : then_param->output(0);
auto subtract = std::make_shared<opset10::Subtract>(opt_conv_sub, red_mean);

auto power_const = opset10::Constant::create(el_type, Shape{1}, {2});
auto opt_conv_1 =
ref ? std::make_shared<opset10::Convert>(power_const, element::f32)->output(0) : power_const->output(0);
auto power = std::make_shared<opset10::Power>(subtract, opt_conv_1);

auto red_mean_const_1 = opset10::Constant::create(element::i32, Shape{3}, {0, 1, 2});
auto reduce_mean_1 = std::make_shared<opset10::ReduceMean>(power, red_mean_const_1);

auto add_const = opset10::Constant::create(el_type, Shape{1}, {1.001e-05});
auto opt_conv_2 =
ref ? std::make_shared<opset10::Convert>(add_const, element::f32)->output(0) : add_const->output(0);
auto add = std::make_shared<opset10::Add>(reduce_mean_1, opt_conv_2);

auto sqrt = std::make_shared<opset10::Sqrt>(add);

auto divide = std::make_shared<opset10::Divide>(subtract, sqrt);

auto mul_const =
opset10::Constant::create(element::f16, Shape{1, 1, 1, 24}, std::vector<float16>(24, 1)); // stub values
auto mul_conv = std::make_shared<opset10::Convert>(mul_const, element::f32);
auto mul = std::make_shared<opset10::Multiply>(divide, mul_conv);

auto add_const_1 =
opset10::Constant::create(element::f16, Shape{1, 1, 1, 24}, std::vector<float16>(24, 1)); // stub values
auto add_conv = std::make_shared<opset10::Convert>(add_const_1, element::f32);
auto add_1 = std::make_shared<opset10::Multiply>(mul, add_conv);

auto res_conv = ref ? std::make_shared<opset10::Convert>(add_1, element::f16)->output(0) : add_1->output(0);

auto then_res = std::make_shared<opset10::Result>(res_conv);

return std::make_shared<ov::Model>(OutputVector{then_res}, ParameterVector{then_param});
}

static std::shared_ptr<ov::Model> make_else_body(bool ref) {
auto el_type = ref ? element::f16 : element::f32;
auto else_param = std::make_shared<opset10::Parameter>(el_type, ov::Shape{1, 112, 112, 24});
auto else_res = std::make_shared<opset10::Result>(else_param);

return std::make_shared<ov::Model>(OutputVector{else_res}, ParameterVector{else_param});
}

TEST(TransformationTests, Convert_Precision_If_Body) {
shared_ptr<Model> main_model, main_model_ref;
pass::Manager manager;
{
auto then_body = make_then_body(false);
auto then_param = then_body->get_parameters()[0];
auto then_res = then_body->get_results()[0];

auto else_body = make_else_body(false);
auto else_param = else_body->get_parameters()[0];
auto else_res = else_body->get_results()[0];

auto input = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::Shape{1, 112, 112, 24});
auto cond = std::make_shared<ov::op::v0::Constant>(element::boolean, Shape{1}, true);
auto if_op = std::make_shared<ov::opset8::If>(cond);
auto if_result = std::make_shared<ov::op::v0::Result>(if_op);

if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(input, then_param, else_param);
if_op->set_output(then_res, else_res);

main_model = std::make_shared<Model>(NodeVector{if_result}, ParameterVector{input});

type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(main_model);
}

{
auto then_body = make_then_body(true);
auto then_param = then_body->get_parameters()[0];
auto then_res = then_body->get_results()[0];

auto else_body = make_else_body(true);
auto else_param = else_body->get_parameters()[0];
auto else_res = else_body->get_results()[0];

auto input = std::make_shared<ov::opset8::Parameter>(ov::element::f16, ov::Shape{1, 112, 112, 24});
auto cond = std::make_shared<ov::op::v0::Constant>(element::boolean, Shape{1}, true);
auto if_op = std::make_shared<ov::opset8::If>(cond);
auto if_result = std::make_shared<ov::op::v0::Result>(if_op);

if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(input, then_param, else_param);
if_op->set_output(then_res, else_res);

main_model_ref = std::make_shared<Model>(NodeVector{if_result}, ParameterVector{input});
}

const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(main_model_ref, main_model);
ASSERT_TRUE(result.valid) << result.message;
}

TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
Expand Down Expand Up @@ -1994,8 +2109,9 @@ TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) {
auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset10::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset10::Multiply>(input_1_decompressed, pow);
auto conv = std::make_shared<opset10::Convert>(mul, element::f16);

model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input_1, input_2});
}

const FunctionsComparator func_comparator = FunctionsComparator::with_default();
Expand Down

0 comments on commit 6f3796b

Please sign in to comment.