From 6668e8749bde78f0d3211420e27d3b3aac26891d Mon Sep 17 00:00:00 2001 From: Won-Kyu Park Date: Sat, 12 Oct 2024 22:34:29 +0900 Subject: [PATCH 1/3] support cuda_malloc imported from comfy: https://github.com/comfyanonymous/ComfyUI/blob/f1d6cef71c70719cc3ed45a2455a4e5ac910cd5e/cuda_malloc.py original commits: - https://github.com/comfyanonymous/ComfyUI/commit/799c08a: Auto disable cuda malloc on some GPUs on windows. - https://github.com/comfyanonymous/ComfyUI/commit/D39c58b: Disable cuda malloc on GTX 750 Ti. - https://github.com/comfyanonymous/ComfyUI/commit/85a8900: Disable cuda malloc on regular GTX 960. - https://github.com/comfyanonymous/ComfyUI/commit/30de083: Disable cuda malloc on all the 9xx series. - https://github.com/comfyanonymous/ComfyUI/commit/7c0a5a3: Disable cuda malloc on a bunch of quadro cards. - https://github.com/comfyanonymous/ComfyUI/commit/5a90d3c: GeForce MX110 + MX130 are maxwell. - https://github.com/comfyanonymous/ComfyUI/commit/fc71cf6: Add some 800M gpus to cuda malloc blacklist. - https://github.com/comfyanonymous/ComfyUI/commit/861fd58: Add a warning if a card that doesn't support cuda malloc has it enabled. - https://github.com/comfyanonymous/ComfyUI/commit/192ca06: Add some more cards to the cuda malloc blacklist. - https://github.com/comfyanonymous/ComfyUI/commit/caddef8: Auto disable cuda malloc on unsupported GPUs on Linux. - https://github.com/comfyanonymous/ComfyUI/commit/2f93b91: Add Tesla GPUs to cuda malloc blacklist. --- cuda_malloc.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ webui.py | 3 ++ 2 files changed, 100 insertions(+) create mode 100644 cuda_malloc.py diff --git a/cuda_malloc.py b/cuda_malloc.py new file mode 100644 index 00000000000..41bd1368ee8 --- /dev/null +++ b/cuda_malloc.py @@ -0,0 +1,97 @@ +# from comfyui with minor modification +import os +import importlib.util +import subprocess + +from modules import cmd_args + +#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import. +def get_gpu_names(): + if os.name == 'nt': + import ctypes + + # Define necessary C structures and types + class DISPLAY_DEVICEA(ctypes.Structure): + _fields_ = [ + ('cb', ctypes.c_ulong), + ('DeviceName', ctypes.c_char * 32), + ('DeviceString', ctypes.c_char * 128), + ('StateFlags', ctypes.c_ulong), + ('DeviceID', ctypes.c_char * 128), + ('DeviceKey', ctypes.c_char * 128) + ] + + # Load user32.dll + user32 = ctypes.windll.user32 + + # Call EnumDisplayDevicesA + def enum_display_devices(): + device_info = DISPLAY_DEVICEA() + device_info.cb = ctypes.sizeof(device_info) + device_index = 0 + gpu_names = set() + + while user32.EnumDisplayDevicesA(None, device_index, ctypes.byref(device_info), 0): + device_index += 1 + gpu_names.add(device_info.DeviceString.decode('utf-8')) + return gpu_names + return enum_display_devices() + else: + gpu_names = set() + out = subprocess.check_output(['nvidia-smi', '-L']) + for l in out.split(b'\n'): + if len(l) > 0: + gpu_names.add(l.decode('utf-8').split(' (UUID')[0]) + return gpu_names + +blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", + "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620", + "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000", + "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000", + "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M", + "GeForce GTX 1650", "GeForce GTX 1630", "Tesla M4", "Tesla M6", "Tesla M10", "Tesla M40", "Tesla M60" + } + +def cuda_malloc_supported(): + try: + names = get_gpu_names() + except: + names = set() + for x in names: + if "NVIDIA" in x: + for b in blacklist: + if b in x: + return False + return True + + +parser = cmd_args.parser +args, _ = parser.parse_known_args() + + +if not args.cuda_malloc: + try: + version = "" + torch_spec = importlib.util.find_spec("torch") + for folder in torch_spec.submodule_search_locations: + ver_file = os.path.join(folder, "version.py") + if os.path.isfile(ver_file): + spec = importlib.util.spec_from_file_location("torch_version_import", ver_file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + version = module.__version__ + if int(version[0]) >= 2: #enable by default for torch version 2.0 and up + args.cuda_malloc = cuda_malloc_supported() + except: + pass + + +if args.cuda_malloc and not args.disable_cuda_malloc: + env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None) + if env_var is None: + env_var = "backend:cudaMallocAsync" + else: + env_var += ",backend:cudaMallocAsync" + + os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var + print(f"Setup environment PYTORCH_CUDA_ALLOC_CONF={env_var}") diff --git a/webui.py b/webui.py index 2c417168aa6..0567668d8b4 100644 --- a/webui.py +++ b/webui.py @@ -10,6 +10,9 @@ startup_timer = timer.startup_timer startup_timer.record("launcher") +import cuda_malloc +startup_timer.record("cuda_malloc") + initialize.imports() initialize.check_versions() From e78be27e7540ac33ed6b5668ed5e71f55a5f7667 Mon Sep 17 00:00:00 2001 From: Won-Kyu Park Date: Sat, 12 Oct 2024 22:49:14 +0900 Subject: [PATCH 2/3] add --cuda-malloc, --disable-cuda-malloc cmd args --- modules/cmd_args.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/cmd_args.py b/modules/cmd_args.py index d71982b2c12..2947ec395e6 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -9,6 +9,9 @@ parser.add_argument("--update-all-extensions", action='store_true', help="launch.py argument: download updates for all extensions when starting the program") parser.add_argument("--skip-python-version-check", action='store_true', help="launch.py argument: do not check python version") parser.add_argument("--skip-torch-cuda-test", action='store_true', help="launch.py argument: do not check if CUDA is able to work properly") +cm_group = parser.add_mutually_exclusive_group() +cm_group.add_argument("--cuda-malloc", action='store_true', help="Enable cudaMallocAsync (enabled by default for torch 2.0 and up).") +cm_group.add_argument("--disable-cuda-malloc", action="store_true", help="Disable cudaMallocAsync.") parser.add_argument("--reinstall-xformers", action='store_true', help="launch.py argument: install the appropriate version of xformers even if you have some version already installed") parser.add_argument("--reinstall-torch", action='store_true', help="launch.py argument: install the appropriate version of torch even if you have some version already installed") parser.add_argument("--update-check", action='store_true', help="launch.py argument: check for updates at startup") From 0cc81464bb9482195941baa42b3c88963345bfe3 Mon Sep 17 00:00:00 2001 From: Won-Kyu Park Date: Sat, 12 Oct 2024 22:52:40 +0900 Subject: [PATCH 3/3] lint, add init_cuda_malloc() --- cuda_malloc.py | 28 ++++++++++++++-------------- webui.py | 3 ++- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cuda_malloc.py b/cuda_malloc.py index 41bd1368ee8..fae5d73cfdc 100644 --- a/cuda_malloc.py +++ b/cuda_malloc.py @@ -39,9 +39,9 @@ def enum_display_devices(): else: gpu_names = set() out = subprocess.check_output(['nvidia-smi', '-L']) - for l in out.split(b'\n'): - if len(l) > 0: - gpu_names.add(l.decode('utf-8').split(' (UUID')[0]) + for line in out.split(b'\n'): + if len(line) > 0: + gpu_names.add(line.decode('utf-8').split(' (UUID')[0]) return gpu_names blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", @@ -55,7 +55,7 @@ def enum_display_devices(): def cuda_malloc_supported(): try: names = get_gpu_names() - except: + except Exception: names = set() for x in names: if "NVIDIA" in x: @@ -82,16 +82,16 @@ def cuda_malloc_supported(): version = module.__version__ if int(version[0]) >= 2: #enable by default for torch version 2.0 and up args.cuda_malloc = cuda_malloc_supported() - except: + except Exception: pass +def init_cuda_malloc(): + if args.cuda_malloc and not args.disable_cuda_malloc: + env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None) + if env_var is None: + env_var = "backend:cudaMallocAsync" + else: + env_var += ",backend:cudaMallocAsync" -if args.cuda_malloc and not args.disable_cuda_malloc: - env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None) - if env_var is None: - env_var = "backend:cudaMallocAsync" - else: - env_var += ",backend:cudaMallocAsync" - - os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var - print(f"Setup environment PYTORCH_CUDA_ALLOC_CONF={env_var}") + os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var + print(f"Setup environment PYTORCH_CUDA_ALLOC_CONF={env_var}") diff --git a/webui.py b/webui.py index 0567668d8b4..aef977f7c20 100644 --- a/webui.py +++ b/webui.py @@ -10,7 +10,8 @@ startup_timer = timer.startup_timer startup_timer.record("launcher") -import cuda_malloc +from cuda_malloc import init_cuda_malloc +init_cuda_malloc() startup_timer.record("cuda_malloc") initialize.imports()