Skip to content

Commit

Permalink
OpenVINO GPU plugin does not support int64 natively so i64 inputs are…
Browse files Browse the repository at this point in the history
… always converted to i32. To avoid runtime conversion, updated IO tensor precision to i32.
  • Loading branch information
yeonbok committed Jun 25, 2024
1 parent f935233 commit 1d4800f
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ def __init__(
self._first_iter_beam_search = False
self._second_iter_beam_search = False
self.update_pkv_precision()
if "GPU" in device:
self.update_int_precision()
if self.is_dynamic:
self.model = self._reshape(self.model, -1, -1)
is_stateful_supported = ensure_stateful_is_available(warn=False)
Expand Down Expand Up @@ -210,6 +212,13 @@ def update_pkv_precision(self, force_fp32=False):
self.model = self._reshape(self.model, -1, -1)
self.request = None

def update_int_precision(self):
ppp = PrePostProcessor(self.model)
for key in self.model.inputs:
in_name = key.get_any_name()
if key.get_element_type() == Type.i64 and ("input_ids" in in_name or "position_ids" in in_name or "attention_mask" in in_name):
ppp.input(in_name).tensor().set_element_type(Type.i32)

def _save_pretrained(self, save_directory: Union[str, Path]):
"""
Saves the model to the OpenVINO IR format so that it can be re-loaded using the
Expand Down

0 comments on commit 1d4800f

Please sign in to comment.