openvinotoolkit · dnkurek · Dec 13, 2024 · Dec 20, 2024 · sshlyapn · Dec 20, 2024
@@ -160,6 +160,7 @@ struct network {
     std::map<primitive_id, network_output> execute(const std::vector<event::ptr>& dependencies = {});
 
     void validate_primitives();
+    void preallocate_shape_info_buffers();
     void set_arguments();
     // Implementation specific calls
     bool does_node_need_lockable_output(const primitive_id& id) const;
@@ -215,6 +216,7 @@ struct network {
     bool _is_dynamic = false;
     bool _enable_profiling = false;
     bool _reset_arguments;
+    memory::ptr _ptr;
 
     std::unordered_map<primitive_id, std::shared_ptr<primitive_inst>> _primitives;
     std::vector<shared_mem_type> _in_out_shared_mem_types;

@@ -57,6 +57,8 @@ class engine {
     /// Created memory object from memory @p params and reinterpred the data using specified @p layout
     virtual memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) = 0;
 
+    virtual memory_ptr reinterpret_subbuffer(const memory& memory, const layout& new_layout, size_t offset) = 0;
+
     /// Created memory object from the other @p memory and reinterpred the data using specified @p new_layout
     virtual memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) = 0;
 

@@ -232,6 +232,7 @@ class primitive_inst {
     }
 
     memory::ptr shape_info_memory_ptr() const { return _shape_info_memory; }
+    void set_shape_info_memory_ptr(memory::ptr addr);
 
     void add_dep_events(const std::vector<event::ptr>& events);
     void add_dep_event(event::ptr ev);

@@ -207,6 +207,7 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
     build_insts_deps();
     build_exec_order();
     validate_primitives();
+    preallocate_shape_info_buffers();
     add_default_output_chains();
 }
 
@@ -275,6 +276,35 @@ void network::validate_primitives() {
     }
 }
 
+void network::preallocate_shape_info_buffers() {
+    GPU_DEBUG_DEFINE_MEM_LOGGER("preallocate_shape_info_buffers");
+    int64_t sum = 0;
+
+    for (auto const& prim : _exec_order) {
+        auto& node = prim->get_node();
+        int64_t shape_elements = node.get_total_shape_info_size();
+        sum += shape_elements;
+        std::cout << "shape_elements " << shape_elements << std::endl;
+        prim->set_shape_info_memory_ptr(nullptr);
+    }
+
+    std::cout << "Sum of shape elements " << sum << std::endl;
+
+    if(sum) {
+        auto& eng = get_engine();
+        _ptr = eng.allocate_memory(layout{{sum}, data_types::i32, format::bfyx}, false);
+        int new_sum = 0;
+        for (auto const& prim : _exec_order) {
+            auto& node = prim->get_node();
+            int64_t shape_elements = node.get_total_shape_info_size();
+            if(shape_elements == 0) continue;
+	    auto new_mem = eng.reinterpret_subbuffer(*_ptr, layout{{shape_elements}, data_types::i32, format::bfyx}, new_sum);
+            prim->set_shape_info_memory_ptr(new_mem);
+            new_sum += shape_elements * 4;
+        }
+    }
+}
+
 void network::set_arguments() {
     if (!_reset_arguments)
         return;

@@ -1125,6 +1125,10 @@ void primitive_inst::fill_shape_info_data(const layout& runtime_layout, const la
     }
 }
 
+void primitive_inst::set_shape_info_memory_ptr(memory::ptr addr) {
+    _shape_info_memory = addr;
+}
+
 void primitive_inst::allocate_shape_info_memory() {
     int64_t shape_elements = _node->get_total_shape_info_size();
     _shape_info_memory = _network.get_engine().allocate_memory(layout{{shape_elements}, data_types::i32, format::bfyx}, false);

@@ -208,6 +208,42 @@ memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type ty
     }
 }
 
+memory::ptr ocl_engine::reinterpret_subbuffer(const memory& memory, const layout& new_layout, size_t offset) {
+    OPENVINO_ASSERT(memory.get_engine() == this, "[GPU] trying to reinterpret buffer allocated by a different engine");
+    OPENVINO_ASSERT(new_layout.format.is_image() == memory.get_layout().format.is_image(),
+                    "[GPU] trying to reinterpret between image and non-image layouts. Current: ",
+                    memory.get_layout().format.to_string(), " Target: ", new_layout.format.to_string());
+
+    try {
+        if (new_layout.format.is_image_2d()) {
+           return std::make_shared<ocl::gpu_image2d>(this,
+                                     new_layout,
+                                     reinterpret_cast<const ocl::gpu_image2d&>(memory).get_buffer(),
+                                     memory.get_mem_tracker());
+        } else if (memory_capabilities::is_usm_type(memory.get_allocation_type())) {
+            auto& new_buf = reinterpret_cast<const ocl::gpu_usm&>(memory);
+            auto helper = new_buf.get_buffer()._usmHelper;
+	    auto ptr = new_buf.get_buffer().get();
+	    ptr = (char*) ptr + offset; 
+	    auto buffer = cl::UsmMemory(helper, ptr);
+
+            return std::make_shared<ocl::gpu_usm>(this,
+                                     new_layout,
+                                     buffer,
+                                     memory.get_allocation_type(),
+                                     memory.get_mem_tracker());
+        } else {
+           return std::make_shared<ocl::gpu_buffer>(this,
+                                    new_layout,
+                                    reinterpret_cast<const ocl::gpu_buffer&>(memory).get_buffer(),
+                                    memory.get_mem_tracker());
+        }
+    } catch (cl::Error const& err) {
+        OPENVINO_THROW(OCL_ERR_MSG_FMT(err));
+    }
+}
+
+
 memory::ptr ocl_engine::reinterpret_buffer(const memory& memory, const layout& new_layout) {
     OPENVINO_ASSERT(memory.get_engine() == this, "[GPU] trying to reinterpret buffer allocated by a different engine");
     OPENVINO_ASSERT(new_layout.format.is_image() == memory.get_layout().format.is_image(),

@@ -26,6 +26,7 @@ class ocl_engine : public engine {
 
     memory_ptr allocate_memory(const layout& layout, allocation_type type, bool reset = true) override;
     memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) override;
+    memory_ptr reinterpret_subbuffer(const memory& memory, const layout& new_layout, size_t offset) override;
     memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) override;
     bool is_the_same_buffer(const memory& mem1, const memory& mem2) override;
     bool check_allocatable(const layout& layout, allocation_type type) override;

@@ -903,9 +903,10 @@ class UsmHolder {
     ~UsmHolder() {
         memFree();
     }
+
+    void* _ptr;
 private:
     const cl::UsmHelper& _usmHelper;
-    void* _ptr;
     bool _shared_memory = false;
 };
 /*
@@ -922,6 +923,10 @@ class UsmMemory {
         }
     }
 
+    void add_offset(size_t offsetPtr) const {
+        _usm_pointer->_ptr = (void*) ((unsigned char*) _usm_pointer->_ptr + offsetPtr);
+    }
+
     // Get methods returns original pointer allocated by openCL.
     void* get() const { return _usm_pointer->ptr(); }
 
@@ -953,9 +958,9 @@ class UsmMemory {
     }
 
     virtual ~UsmMemory() = default;
+    const UsmHelper& _usmHelper;
 
 protected:
-    const UsmHelper& _usmHelper;
     std::shared_ptr<UsmHolder> _usm_pointer = nullptr;
 
 private: