Skip to content

Commit

Permalink
fix: avoid stack overflow on MSVC
Browse files Browse the repository at this point in the history
  • Loading branch information
leejet committed Oct 23, 2023
1 parent 09cab2a commit fbd18e1
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3155,6 +3155,8 @@ class StableDiffusionGGML {
struct ggml_tensor* c = ggml_new_tensor_4d(res_ctx, GGML_TYPE_F32, 1024, 2, 1, 1);
ggml_set_f32(c, 0.5);

struct ggml_cplan cplan;

size_t ctx_size = 10 * 1024 * 1024; // 10MB
// calculate the amount of memory required
{
Expand All @@ -3179,7 +3181,7 @@ class StableDiffusionGGML {
ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
cplan = ggml_graph_plan(diffusion_graph, n_threads);

ctx_size += cplan.work_size;
LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB",
Expand Down Expand Up @@ -3212,7 +3214,7 @@ class StableDiffusionGGML {
ggml_hold_dynamic_tensor(out);

struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
cplan = ggml_graph_plan(diffusion_graph, n_threads);

ggml_set_dynamic(ctx, false);
struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
Expand Down Expand Up @@ -3257,6 +3259,7 @@ class StableDiffusionGGML {
true);
std::vector<int>& tokens = tokens_and_weights.first;
std::vector<float>& weights = tokens_and_weights.second;
struct ggml_cplan cplan;
size_t ctx_size = 10 * 1024 * 1024; // 10MB
// calculate the amount of memory required
{
Expand All @@ -3278,8 +3281,8 @@ class StableDiffusionGGML {

struct ggml_tensor* hidden_states = cond_stage_model.text_model.forward(ctx, input_ids);

struct ggml_cgraph cond_graph = ggml_build_forward(hidden_states);
struct ggml_cplan cplan = ggml_graph_plan(&cond_graph, n_threads);
struct ggml_cgraph* cond_graph = ggml_build_forward_ctx(ctx, hidden_states);
cplan = ggml_graph_plan(cond_graph, n_threads);
ctx_size += cplan.work_size;

ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);
Expand Down Expand Up @@ -3390,6 +3393,7 @@ class StableDiffusionGGML {
// print_ggml_tensor(x_t);
struct ggml_tensor* x = ggml_dup_tensor(res_ctx, x_t);
copy_ggml_tensor(x, x_t);
struct ggml_cplan cplan;

size_t ctx_size = 10 * 1024 * 1024; // 10MB
// calculate the amount of memory required
Expand Down Expand Up @@ -3417,7 +3421,7 @@ class StableDiffusionGGML {
ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
cplan = ggml_graph_plan(diffusion_graph, n_threads);

ctx_size += cplan.work_size;
LOG_DEBUG("diffusion context need %.2fMB static memory, with work_size needing %.2fMB",
Expand Down Expand Up @@ -3450,7 +3454,7 @@ class StableDiffusionGGML {
ggml_hold_dynamic_tensor(out);

struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
struct ggml_cplan cplan = ggml_graph_plan(diffusion_graph, n_threads);
cplan = ggml_graph_plan(diffusion_graph, n_threads);

ggml_set_dynamic(ctx, false);
struct ggml_tensor* buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
Expand Down Expand Up @@ -3961,6 +3965,7 @@ class StableDiffusionGGML {
int64_t W = x->ne[0];
int64_t H = x->ne[1];
struct ggml_tensor* result = NULL;
struct ggml_cplan cplan;

// calculate the amount of memory required
size_t ctx_size = 10 * 1024 * 1024; // 10MB
Expand All @@ -3981,7 +3986,7 @@ class StableDiffusionGGML {
ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, moments);
struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads);
cplan = ggml_graph_plan(vae_graph, n_threads);

ctx_size += cplan.work_size;
LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB",
Expand Down Expand Up @@ -4083,6 +4088,7 @@ class StableDiffusionGGML {
int64_t W = z->ne[0];
int64_t H = z->ne[1];
struct ggml_tensor* result_img = NULL;
struct ggml_cplan cplan;

{
float* vec = (float*)z->data;
Expand Down Expand Up @@ -4110,7 +4116,7 @@ class StableDiffusionGGML {
ctx_size += ggml_used_mem(ctx) + ggml_used_mem_of_data(ctx);

struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, img);
struct ggml_cplan cplan = ggml_graph_plan(vae_graph, n_threads);
cplan = ggml_graph_plan(vae_graph, n_threads);

ctx_size += cplan.work_size;
LOG_DEBUG("vae context need %.2fMB static memory, with work_size needing %.2fMB",
Expand Down

0 comments on commit fbd18e1

Please sign in to comment.