Skip to content

Commit 607ebc1

Browse files
committed
4736558
1 parent edbeab4 commit 607ebc1

File tree

1 file changed

+6
-3
lines changed
  • assets/training/model_management/environments/foundation-model-serve/context/foundation/model/serve

1 file changed

+6
-3
lines changed

assets/training/model_management/environments/foundation-model-serve/context/foundation/model/serve/replica_manager.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,12 @@ def _get_cuda_visible_devices():
8080
str: A comma-separated string of GPU IDs, e.g. "0,1,2,3".
8181
"""
8282
gpu_ids = os.environ.get("CUDA_VISIBLE_DEVICES", None)
83-
if gpu_ids is None:
84-
gpu_ids = ",".join(
85-
map(str, range(torch.cuda.device_count()))) if torch.cuda.is_available() else ""
83+
logger.info(f"CUDA_VISIBLE_DEVICES read from environment: {gpu_ids}")
84+
85+
if not os.environ.get("ENFORCE_CUDA_VISIBLE_DEVICES", "false").lower() == "true":
86+
gpu_ids = ",".join(map(str, range(torch.cuda.device_count()))) if torch.cuda.is_available() else ""
87+
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids
88+
logger.info(f"Setting CUDA_VISIBLE_DEVICES to: {gpu_ids} via automatic detection.")
8689
return gpu_ids
8790

8891
def initialize(self):

0 commit comments

Comments
 (0)