Skip to content

Commit

Permalink
sync: update ggml to fix large image generation with SYCL backend (#380)
Browse files Browse the repository at this point in the history
* turn off fast-math on host in SYCL backend

Signed-off-by: zhentaoyu <[email protected]>

* update ggml for sync some sycl ops

Signed-off-by: zhentaoyu <[email protected]>

* update sycl readme and ggml

Signed-off-by: zhentaoyu <[email protected]>

---------

Signed-off-by: zhentaoyu <[email protected]>
  • Loading branch information
zhentaoyu authored Sep 2, 2024
1 parent 58d5473 commit e410aeb
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 18 deletions.
34 changes: 21 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,40 +35,34 @@ option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
#option(SD_BUILD_SERVER "sd: build server example" ON)

if(SD_CUBLAS)
message("Use CUBLAS as backend stable-diffusion")
message("-- Use CUBLAS as backend stable-diffusion")
set(GGML_CUDA ON)
add_definitions(-DSD_USE_CUBLAS)
endif()

if(SD_METAL)
message("Use Metal as backend stable-diffusion")
message("-- Use Metal as backend stable-diffusion")
set(GGML_METAL ON)
add_definitions(-DSD_USE_METAL)
endif()

if (SD_VULKAN)
message("Use Vulkan as backend stable-diffusion")
message("-- Use Vulkan as backend stable-diffusion")
set(GGML_VULKAN ON)
add_definitions(-DSD_USE_VULKAN)
endif ()

if (SD_HIPBLAS)
message("Use HIPBLAS as backend stable-diffusion")
message("-- Use HIPBLAS as backend stable-diffusion")
set(GGML_HIPBLAS ON)
add_definitions(-DSD_USE_CUBLAS)
if(SD_FAST_SOFTMAX)
set(GGML_CUDA_FAST_SOFTMAX ON)
endif()
endif ()

if(SD_SYCL)
message("Use SYCL as backend stable-diffusion")
set(GGML_SYCL ON)
add_definitions(-DSD_USE_SYCL)
endif()

if(SD_FLASH_ATTN)
message("Use Flash Attention for memory optimization")
message("-- Use Flash Attention for memory optimization")
add_definitions(-DSD_USE_FLASH_ATTENTION)
endif()

Expand All @@ -82,19 +76,33 @@ file(GLOB SD_LIB_SOURCES

# we can get only one share lib
if(SD_BUILD_SHARED_LIBS)
message("Build shared library")
message("-- Build shared library")
message(${SD_LIB_SOURCES})
set(BUILD_SHARED_LIBS OFF)
add_library(${SD_LIB} SHARED ${SD_LIB_SOURCES})
add_definitions(-DSD_BUILD_SHARED_LIB)
target_compile_definitions(${SD_LIB} PRIVATE -DSD_BUILD_DLL)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
else()
message("Build static library")
message("-- Build static library")
set(BUILD_SHARED_LIBS OFF)
add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES})
endif()

if(SD_SYCL)
message("-- Use SYCL as backend stable-diffusion")
set(GGML_SYCL ON)
add_definitions(-DSD_USE_SYCL)
# disable fast-math on host, see:
# https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/fp-model-fp.html
if (WIN32)
set(SYCL_COMPILE_OPTIONS /fp:precise)
else()
set(SYCL_COMPILE_OPTIONS -fp-model=precise)
endif()
message("-- Turn off fast-math for host in SYCL backend")
target_compile_options(${SD_LIB} PRIVATE ${SYCL_COMPILE_OPTIONS})
endif()

set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

Expand Down
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,12 @@ Example of text2img by using SYCL backend:
- download `stable-diffusion` model weight, refer to [download-weight](#download-weights).
- run `./bin/sd -m ../models/sd3_medium_incl_clips_t5xxlfp16.safetensors --cfg-scale 5 --steps 30 --sampling-method euler -H 512 -W 512 --seed 42 -p "fantasy medieval village world inside a glass sphere , high detail, fantasy, realistic, light effect, hyper detail, volumetric lighting, cinematic, macro, depth of field, blur, red light and clouds from the back, highly detailed epic cinematic concept art cg render made in maya, blender and photoshop, octane render, excellent composition, dynamic dramatic cinematic lighting, aesthetic, very inspirational, world inside a glass sphere by james gurney by artgerm with james jean, joe fenton and tristan eaton by ross tran, fine details, 4k resolution"`
- run `./bin/sd -m ../models/sd3_medium_incl_clips_t5xxlfp16.safetensors --cfg-scale 5 --steps 30 --sampling-method euler -H 1024 -W 1024 --seed 42 -p "fantasy medieval village world inside a glass sphere , high detail, fantasy, realistic, light effect, hyper detail, volumetric lighting, cinematic, macro, depth of field, blur, red light and clouds from the back, highly detailed epic cinematic concept art cg render made in maya, blender and photoshop, octane render, excellent composition, dynamic dramatic cinematic lighting, aesthetic, very inspirational, world inside a glass sphere by james gurney by artgerm with james jean, joe fenton and tristan eaton by ross tran, fine details, 4k resolution"`
<p align="center">
<img src="./assets/sycl_sd3_output.png" width="360x">
</p>
> [!NOTE]
> Try to set smaller image height and width (for example, `-H 512 -W 512`) if you meet `Provided range is out of integer limits. Pass '-fno-sycl-id-queries-fit-in-int' to disable range check.`
##### Using Flash Attention
Expand Down
Binary file modified assets/sycl_sd3_output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion ggml
Submodule ggml updated 91 files
+1 −0 .gitignore
+1 −1 CMakeLists.txt
+38 −37 ci/run.sh
+37 −5 examples/common.h
+3 −0 examples/mnist/.gitignore
+11 −31 examples/mnist/CMakeLists.txt
+148 −80 examples/mnist/README.md
+0 −62 examples/mnist/convert-h5-to-ggml.py
+0 −169 examples/mnist/main-cnn.cpp
+0 −122 examples/mnist/main-cpu.cpp
+0 −125 examples/mnist/main-mtl.cpp
+0 −26 examples/mnist/main-mtl.h
+0 −499 examples/mnist/main-mtl.m
+0 −328 examples/mnist/main.cpp
+0 −116 examples/mnist/mnist-cnn.py
+591 −0 examples/mnist/mnist-common.cpp
+104 −0 examples/mnist/mnist-common.h
+78 −0 examples/mnist/mnist-eval.cpp
+93 −0 examples/mnist/mnist-train-cnn.py
+130 −0 examples/mnist/mnist-train-fc.py
+39 −0 examples/mnist/mnist-train.cpp
+0 −1 examples/mnist/models/mnist/.gitignore
+ examples/mnist/models/mnist/mnist_model.state_dict
+ examples/mnist/models/mnist/t10k-images.idx3-ubyte
+8 −1 examples/yolo/README.md
+144 −60 examples/yolo/yolov3-tiny.cpp
+1 −0 include/ggml-backend.h
+76 −47 include/ggml.h
+1 −0 scripts/sync-llama-am.sh
+1 −1 scripts/sync-llama.last
+1 −0 scripts/sync-whisper-am.sh
+1 −1 scripts/sync-whisper.last
+14 −0 src/CMakeLists.txt
+6 −21 src/ggml-aarch64.c
+5 −8 src/ggml-backend.c
+2 −2 src/ggml-cann/Doxyfile
+1 −1 src/ggml-cann/aclnn_ops.cpp
+14 −1 src/ggml-cuda.cu
+8 −0 src/ggml-cuda/binbcast.cu
+1 −0 src/ggml-cuda/binbcast.cuh
+106 −0 src/ggml-cuda/cross-entropy-loss.cu
+5 −0 src/ggml-cuda/cross-entropy-loss.cuh
+12 −5 src/ggml-cuda/fattn-common.cuh
+43 −9 src/ggml-cuda/fattn-tile-f16.cu
+40 −7 src/ggml-cuda/fattn-tile-f32.cu
+58 −13 src/ggml-cuda/fattn-vec-f16.cuh
+57 −11 src/ggml-cuda/fattn-vec-f32.cuh
+58 −5 src/ggml-cuda/fattn-wmma-f16.cuh
+2 −2 src/ggml-cuda/fattn.cu
+1 −1 src/ggml-cuda/rope.cu
+1 −2 src/ggml-cuda/sumrows.cu
+2 −0 src/ggml-cuda/sumrows.cuh
+180 −27 src/ggml-metal.m
+211 −19 src/ggml-metal.metal
+1 −1 src/ggml-quants.c
+49 −35 src/ggml-rpc.cpp
+17 −106 src/ggml-sycl.cpp
+1 −0 src/ggml-sycl/backend.hpp
+11 −0 src/ggml-sycl/common.cpp
+53 −0 src/ggml-sycl/common.hpp
+57 −57 src/ggml-sycl/convert.cpp
+1 −1 src/ggml-sycl/convert.hpp
+98 −98 src/ggml-sycl/dequantize.hpp
+2 −2 src/ggml-sycl/dmmv.cpp
+101 −0 src/ggml-sycl/gemm.hpp
+125 −0 src/ggml-sycl/im2col.cpp
+23 −0 src/ggml-sycl/im2col.hpp
+1 −1 src/ggml-sycl/rope.cpp
+716 −786 src/ggml-vulkan.cpp
+517 −253 src/ggml.c
+24 −0 src/vulkan-shaders/acc.comp
+5 −1 src/vulkan-shaders/concat.comp
+1 −2 src/vulkan-shaders/mul_mat_vec.comp
+1 −1 src/vulkan-shaders/mul_mat_vec_nc.comp
+1 −1 src/vulkan-shaders/mul_mat_vec_p021.comp
+18 −17 src/vulkan-shaders/mul_mat_vec_q2_k.comp
+10 −9 src/vulkan-shaders/mul_mat_vec_q3_k.comp
+24 −21 src/vulkan-shaders/mul_mat_vec_q4_k.comp
+27 −29 src/vulkan-shaders/mul_mat_vec_q5_k.comp
+13 −13 src/vulkan-shaders/mul_mat_vec_q6_k.comp
+8 −7 src/vulkan-shaders/mul_mm.comp
+24 −0 src/vulkan-shaders/repeat.comp
+8 −0 src/vulkan-shaders/vulkan-shaders-gen.cpp
+10 −0 tests/CMakeLists.txt
+116 −10 tests/test-backend-ops.cpp
+169 −0 tests/test-cont.c
+16 −16 tests/test-conv-transpose-1d.cpp
+9 −9 tests/test-conv1d.cpp
+9 −9 tests/test-conv2d.cpp
+179 −66 tests/test-grad0.cpp
+4 −4 tests/test-mul-mat.cpp
2 changes: 1 addition & 1 deletion ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
LOG_DEBUG("k->ne[1] == %d", k->ne[1]);
kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0);
kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0, 0);
} else {
v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, L_k]
v = ggml_reshape_3d(ctx, v, L_k, d_head, n_head * N); // [N * n_head, d_head, L_k]
Expand Down

0 comments on commit e410aeb

Please sign in to comment.