@@ -198,10 +198,19 @@ def _create_mlflow_app(sagemaker_session) -> Optional[MlflowApp]:
198198 if new_app .status in ["Created" , "Updated" ]:
199199 return new_app
200200 elif new_app .status in ["Failed" , "Stopped" ]:
201- raise RuntimeError (f"MLflow app creation failed with status: { new_app .status } " )
201+ # Get detailed error from MLflow app
202+ error_msg = f"MLflow app creation failed with status: { new_app .status } "
203+ if hasattr (new_app , 'failure_reason' ) and new_app .failure_reason :
204+ error_msg += f". Reason: { new_app .failure_reason } "
205+ raise RuntimeError (error_msg )
202206 time .sleep (poll_interval )
203207
204- raise RuntimeError (f"MLflow app creation timed out after { max_wait_time } seconds" )
208+ # Timeout case - get current status and any error details
209+ new_app .refresh ()
210+ error_msg = f"MLflow app creation failed. Current status: { new_app .status } "
211+ if hasattr (new_app , 'failure_reason' ) and new_app .failure_reason :
212+ error_msg += f". Reason: { new_app .failure_reason } "
213+ raise RuntimeError (error_msg )
205214
206215 except Exception as e :
207216 logger .error ("Failed to create MLflow app: %s" , e )
@@ -693,7 +702,7 @@ def _validate_eula_for_gated_model(model, accept_eula, is_gated_model):
693702
694703
695704def _validate_s3_path_exists (s3_path : str , sagemaker_session ):
696- """Validate if S3 path exists and is accessible ."""
705+ """Validate S3 path and create bucket/prefix if they don't exist ."""
697706 if not s3_path .startswith ("s3://" ):
698707 raise ValueError (f"Invalid S3 path format: { s3_path } " )
699708
@@ -705,19 +714,34 @@ def _validate_s3_path_exists(s3_path: str, sagemaker_session):
705714 s3_client = sagemaker_session .boto_session .client ('s3' )
706715
707716 try :
708- # Check if bucket exists and is accessible
709- s3_client .head_bucket (Bucket = bucket_name )
717+ # Check if bucket exists, create if it doesn't
718+ try :
719+ s3_client .head_bucket (Bucket = bucket_name )
720+ except Exception as e :
721+ if "NoSuchBucket" in str (e ) or "Not Found" in str (e ):
722+ # Create bucket
723+ region = sagemaker_session .boto_region_name
724+ if region == 'us-east-1' :
725+ s3_client .create_bucket (Bucket = bucket_name )
726+ else :
727+ s3_client .create_bucket (
728+ Bucket = bucket_name ,
729+ CreateBucketConfiguration = {'LocationConstraint' : region }
730+ )
731+ else :
732+ raise
710733
711- # If prefix is provided, check if it exists
734+ # If prefix is provided, check if it exists, create if it doesn't
712735 if prefix :
713736 response = s3_client .list_objects_v2 (Bucket = bucket_name , Prefix = prefix , MaxKeys = 1 )
714737 if 'Contents' not in response :
715- raise ValueError (f"S3 prefix '{ prefix } ' does not exist in bucket '{ bucket_name } '" )
738+ # Create the prefix by putting an empty object
739+ if not prefix .endswith ('/' ):
740+ prefix += '/'
741+ s3_client .put_object (Bucket = bucket_name , Key = prefix , Body = b'' )
716742
717743 except Exception as e :
718- if "NoSuchBucket" in str (e ):
719- raise ValueError (f"S3 bucket '{ bucket_name } ' does not exist or is not accessible" )
720- raise ValueError (f"Failed to validate S3 path '{ s3_path } ': { str (e )} " )
744+ raise ValueError (f"Failed to validate/create S3 path '{ s3_path } ': { str (e )} " )
721745
722746
723747def _validate_hyperparameter_values (hyperparameters : dict ):
0 commit comments