diff --git a/ggml b/ggml index 21d3a308..6fcbd60b 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 21d3a308fcb7f31cb9beceaeebad4fb622f3c337 +Subproject commit 6fcbd60bc72ac3f7ad43f78c87e535f2e6206f58 diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 75ad0414..fc679e70 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -22,6 +22,7 @@ #include "ggml-alloc.h" #include "ggml-backend.h" +#include "ggml-cpu.h" #include "ggml.h" #ifdef SD_USE_CUBLAS @@ -100,17 +101,11 @@ __STATIC_INLINE__ ggml_fp16_t ggml_tensor_get_f16(const ggml_tensor* tensor, int static struct ggml_tensor* get_tensor_from_graph(struct ggml_cgraph* gf, const char* name) { struct ggml_tensor* res = NULL; - for (int i = 0; i < gf->n_nodes; i++) { - // printf("%d, %s \n", i, gf->nodes[i]->name); - if (strcmp(ggml_get_name(gf->nodes[i]), name) == 0) { - res = gf->nodes[i]; - break; - } - } - for (int i = 0; i < gf->n_leafs; i++) { - // printf("%d, %s \n", i, gf->leafs[i]->name); - if (strcmp(ggml_get_name(gf->leafs[i]), name) == 0) { - res = gf->leafs[i]; + for (int i = 0; i < ggml_graph_n_nodes(gf); i++) { + struct ggml_tensor* node = ggml_graph_node(gf, i); + // printf("%d, %s \n", i, ggml_get_name(node)); + if (strcmp(ggml_get_name(node), name) == 0) { + res = node; break; } } @@ -1129,7 +1124,7 @@ struct GGMLRunner { ggml_graph_print(gf); #endif if (output != NULL) { - auto result = gf->nodes[gf->n_nodes - 1]; + auto result = ggml_graph_node(gf, -1); if (*output == NULL && output_ctx != NULL) { *output = ggml_dup_tensor(output_ctx, result); } diff --git a/model.cpp b/model.cpp index 2719f63c..dba8187d 100644 --- a/model.cpp +++ b/model.cpp @@ -13,6 +13,7 @@ #include "ggml-alloc.h" #include "ggml-backend.h" +#include "ggml-cpu.h" #include "ggml.h" #include "stable-diffusion.h" @@ -733,25 +734,25 @@ void convert_tensor(void* src, if (src_type == GGML_TYPE_F16) { ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n); } else { - auto qtype = ggml_internal_get_type_traits(src_type); - if (qtype.to_float == NULL) { + auto qtype = ggml_get_type_traits(src_type); + if (qtype->to_float == NULL) { throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(src_type))); } - qtype.to_float(src, (float*)dst, n); + qtype->to_float(src, (float*)dst, n); } } else { // src_type == GGML_TYPE_F16 => dst_type is quantized // src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized - auto qtype = ggml_internal_get_type_traits(src_type); - if (qtype.to_float == NULL) { + auto qtype = ggml_get_type_traits(src_type); + if (qtype->to_float == NULL) { throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(src_type))); } std::vector buf; buf.resize(sizeof(float) * n); char* src_data_f32 = buf.data(); - qtype.to_float(src, (float*)src_data_f32, n); + qtype->to_float(src, (float*)src_data_f32, n); if (dst_type == GGML_TYPE_F16) { ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n); } else { diff --git a/stable-diffusion.h b/stable-diffusion.h index f3e71bef..3604e716 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -93,6 +93,8 @@ enum sd_type_t { SD_TYPE_Q4_0_4_4 = 31, SD_TYPE_Q4_0_4_8 = 32, SD_TYPE_Q4_0_8_8 = 33, + SD_TYPE_TQ1_0 = 34, + SD_TYPE_TQ2_0 = 35, SD_TYPE_COUNT, }; diff --git a/util.cpp b/util.cpp index b8a65e7d..3bcee094 100644 --- a/util.cpp +++ b/util.cpp @@ -22,6 +22,7 @@ #include #endif +#include "ggml-cpu.h" #include "ggml.h" #include "stable-diffusion.h" @@ -410,7 +411,6 @@ const char* sd_get_system_info() { static char buffer[1024]; std::stringstream ss; ss << "System Info: \n"; - ss << " BLAS = " << ggml_cpu_has_blas() << std::endl; ss << " SSE3 = " << ggml_cpu_has_sse3() << std::endl; ss << " AVX = " << ggml_cpu_has_avx() << std::endl; ss << " AVX2 = " << ggml_cpu_has_avx2() << std::endl;