Thiết kế website giá rẻ

Question

I am trying to train linear-model in AWS sagemaker, where predictor type is “regressor”.
I am stuck I have no clue what I have to do.
I have uploaded my data to S3-bucket.

Bellow is the code to upload data in S3-bucket in required format.

def export_data_lr(data, name, pre):
    # split data into X and y subsets
    X = data.drop(columns="price")
    y = data.price.copy()
    
    
    # transformation
    X = pre.transform(X)
    
    file_name = get_file_name_lr(name)
    (
         y
        . to_frame()
        .join(X)
        .to_csv(file_name, index=False, header=False)
    )

def upload_to_bucket_lr(name,BUCKET_NAME):
    file_name = get_file_name_lr(name)
    
    (
        boto3
        .Session()
        .resource("s3")
        .Bucket(BUCKET_NAME)
        .Object(os.path.join(DATA_PREFIX, f"{name}/{name}-lr.csv"))
        .upload_file(file_name)
    )

Upto here every thing is working fine. Data is correctly uploaded to S3-bucket without any problem.
In my s3-bucket i am able to see my training, testing and validation data.

Bellow is code for building linear-learner model with predictor type is “regressor”.
I am not using much hyperparameters for shake of simplicity.

from sagemaker import image_uris
def return_lr_model(sagemaker,BUCKET_NAME,algorithm,version):
    
    OUTPUT_PATH = get_output_path(BUCKET_NAME)
    region = sagemaker.Session().boto_region_name
    session = sagemaker.Session()
    role = sagemaker.get_execution_role()
    image_uri = image_uris.retrieve(region=boto3.Session().region_name, framework="linear-learner")
    
    lr_model = Estimator(
        image_uri = image_uri,
        role = role,
        instance_count = 1,
        instance_type = "ml.m4.xlarge",
        output_path = OUTPUT_PATH,
        sagemaker_session = session,
        base_job_name = 'LR-v1')
    return lr_model

def perform_hyperparameter(lr_model):
    lr_model.set_hyperparameters(
        feature_dim = "auto",
        predictor_type = "regressor",
        normalize_data = False,
        epochs = 15
    )
    return lr_model

def create_lr_model(sagemaker,BUCKET_NAME,algorithm,version):
    lr_model = return_lr_model(sagemaker,BUCKET_NAME,algorithm,version)
    lr_model =  perform_hyperparameter(lr_model)
    return lr_model

lr_model = create_lr_model(sagemaker,BUCKET_NAME,algorithm,version)

Code to create data channel

def get_data_channels(train,val):
    train = get_file_name_lr(train)
    bucket_path = f"s3://{BUCKET_NAME}/{DATA_PREFIX}/train/{train}"
    train_data_channel = TrainingInput(bucket_path, content_type="csv")
    
    val = get_file_name_lr(val)
    bucket_path = f"s3://{BUCKET_NAME}/{DATA_PREFIX}/val/{val}"
    val_data_channel = TrainingInput(bucket_path, content_type="text/csv")
    
    data_channels = {
        "train": train_data_channel,
        "validation": val_data_channel
    }
    
    return data_channels

data_channel = get_data_channels("train","val")

Train Model:

lr_model.fit(data_channel)

Here my actual problem starts, bellow is error which I am encountering while training the model.

Error:

INFO:sagemaker:Creating training-job with name: LR-v1-2024-06-16-05-01-56-352
2024-06-16 05:01:56 Starting - Starting the training job...
2024-06-16 05:02:17 Starting - Preparing the instances for training...
2024-06-16 05:02:48 Downloading - Downloading input data...
2024-06-16 05:03:23 Downloading - Downloading the training image.........
2024-06-16 05:04:29 Training - Training image download completed. Training in progress..Docker entrypoint called with argument(s): train
Running default environment configuration script
[06/16/2024 05:04:54 INFO 140601654445888] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.0', 'quantile': '0.5', 'loss_insensitivity': '0.01', 'huber_delta': '1.0', 'num_classes': '1', 'accuracy_top_k': '3', 'wd': 'auto', 'l1': 'auto', 'momentum': 'auto', 'learning_rate': 'auto', 'beta_1': 'auto', 'beta_2': 'auto', 'bias_lr_mult': 'auto', 'bias_wd_mult': 'auto', 'use_lr_scheduler': 'true', 'lr_scheduler_step': 'auto', 'lr_scheduler_factor': 'auto', 'lr_scheduler_minimum_lr': 'auto', 'positive_example_weight_mult': '1.0', 'balance_multiclass_weights': 'false', 'normalize_data': 'true', 'normalize_label': 'auto', 'unbias_data': 'auto', 'unbias_label': 'auto', 'num_point_for_scaler': '10000', '_kvstore': 'auto', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_log_level': 'info', '_tuning_objective_metric': '', 'early_stopping_patience': '3', 'early_stopping_tolerance': '0.001', '_enable_profiler': 'false'}
[06/16/2024 05:04:54 INFO 140601654445888] Merging with provided configuration from /opt/ml/input/config/hyperparameters.json: {'epochs': '15', 'feature_dim': 'auto', 'normalize_data': 'False', 'predictor_type': 'regressor'}
[06/16/2024 05:04:54 INFO 140601654445888] Final configuration: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.0', 'quantile': '0.5', 'loss_insensitivity': '0.01', 'huber_delta': '1.0', 'num_classes': '1', 'accuracy_top_k': '3', 'wd': 'auto', 'l1': 'auto', 'momentum': 'auto', 'learning_rate': 'auto', 'beta_1': 'auto', 'beta_2': 'auto', 'bias_lr_mult': 'auto', 'bias_wd_mult': 'auto', 'use_lr_scheduler': 'true', 'lr_scheduler_step': 'auto', 'lr_scheduler_factor': 'auto', 'lr_scheduler_minimum_lr': 'auto', 'positive_example_weight_mult': '1.0', 'balance_multiclass_weights': 'false', 'normalize_data': 'False', 'normalize_label': 'auto', 'unbias_data': 'auto', 'unbias_label': 'auto', 'num_point_for_scaler': '10000', '_kvstore': 'auto', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_log_level': 'info', '_tuning_objective_metric': '', 'early_stopping_patience': '3', 'early_stopping_tolerance': '0.001', '_enable_profiler': 'false', 'predictor_type': 'regressor'}
/opt/amazon/lib/python3.8/site-packages/mxnet/model.py:97: SyntaxWarning: "is" with a literal. Did you mean "=="?
  if num_device is 1 and 'dist' not in kvstore:
/opt/amazon/lib/python3.8/site-packages/scipy/optimize/_shgo.py:495: SyntaxWarning: "is" with a literal. Did you mean "=="?
  if cons['type'] is 'ineq':
/opt/amazon/lib/python3.8/site-packages/scipy/optimize/_shgo.py:743: SyntaxWarning: "is not" with a literal. Did you mean "!="?
  if len(self.X_min) is not 0:
[06/16/2024 05:04:57 WARNING 140601654445888] Loggers have already been setup.
[06/16/2024 05:04:57 INFO 140601654445888] Final configuration: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.0', 'quantile': '0.5', 'loss_insensitivity': '0.01', 'huber_delta': '1.0', 'num_classes': '1', 'accuracy_top_k': '3', 'wd': 'auto', 'l1': 'auto', 'momentum': 'auto', 'learning_rate': 'auto', 'beta_1': 'auto', 'beta_2': 'auto', 'bias_lr_mult': 'auto', 'bias_wd_mult': 'auto', 'use_lr_scheduler': 'true', 'lr_scheduler_step': 'auto', 'lr_scheduler_factor': 'auto', 'lr_scheduler_minimum_lr': 'auto', 'positive_example_weight_mult': '1.0', 'balance_multiclass_weights': 'false', 'normalize_data': 'False', 'normalize_label': 'auto', 'unbias_data': 'auto', 'unbias_label': 'auto', 'num_point_for_scaler': '10000', '_kvstore': 'auto', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_log_level': 'info', '_tuning_objective_metric': '', 'early_stopping_patience': '3', 'early_stopping_tolerance': '0.001', '_enable_profiler': 'false', 'predictor_type': 'regressor'}
[06/16/2024 05:04:57 WARNING 140601654445888] Loggers have already been setup.
Process 7 is a worker.
[06/16/2024 05:04:57 INFO 140601654445888] Using default worker.
[06/16/2024 05:04:57 INFO 140601654445888] Checkpoint loading and saving are disabled.
[06/16/2024 05:04:57 ERROR 140601654445888] Customer Error: No iterator has been registered for ContentType ('csv', '1.0')

2024-06-16 05:05:13 Uploading - Uploading generated training model
2024-06-16 05:05:13 Failed - Training job failed
---------------------------------------------------------------------------
UnexpectedStatusException                 Traceback (most recent call last)
Cell In[278], line 1
----> 1 lr_model.fit(data_channel)

File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/workflow/pipeline_context.py:346, in runnable_by_pipeline.<locals>.wrapper(*args, **kwargs)
    342         return context
    344     return _StepArguments(retrieve_caller_name(self_instance), run_func, *args, **kwargs)
--> 346 return run_func(*args, **kwargs)

File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/estimator.py:1346, in EstimatorBase.fit(self, inputs, wait, logs, job_name, experiment_config)
   1344 self.jobs.append(self.latest_training_job)
   1345 if wait:
-> 1346     self.latest_training_job.wait(logs=logs)

File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/estimator.py:2703, in _TrainingJob.wait(self, logs)
   2701 # If logs are requested, call logs_for_jobs.
   2702 if logs != "None":
-> 2703     self.sagemaker_session.logs_for_job(self.job_name, wait=True, log_type=logs)
   2704 else:
   2705     self.sagemaker_session.wait_for_job(self.job_name)

File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/session.py:5797, in Session.logs_for_job(self, job_name, wait, poll, log_type, timeout)
   5776 def logs_for_job(self, job_name, wait=False, poll=10, log_type="All", timeout=None):
   5777     """Display logs for a given training job, optionally tailing them until job is complete.
   5778 
   5779     If the output is a tty or a Jupyter cell, it will be color-coded
   (...)
   5795         exceptions.UnexpectedStatusException: If waiting and the training job fails.
   5796     """
-> 5797     _logs_for_job(self, job_name, wait, poll, log_type, timeout)

File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/session.py:8026, in _logs_for_job(sagemaker_session, job_name, wait, poll, log_type, timeout)
   8023             last_profiler_rule_statuses = profiler_rule_statuses
   8025 if wait:
-> 8026     _check_job_status(job_name, description, "TrainingJobStatus")
   8027     if dot:
   8028         print()

File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/session.py:8079, in _check_job_status(job, desc, status_key_name)
   8073 if "CapacityError" in str(reason):
   8074     raise exceptions.CapacityError(
   8075         message=message,
   8076         allowed_statuses=["Completed", "Stopped"],
   8077         actual_status=status,
   8078     )
-> 8079 raise exceptions.UnexpectedStatusException(
   8080     message=message,
   8081     allowed_statuses=["Completed", "Stopped"],
   8082     actual_status=status,
   8083 )

UnexpectedStatusException: Error for Training job LR-v1-2024-06-16-05-01-56-352: Failed. Reason: ClientError: No iterator has been registered for ContentType ('csv', '1.0'), exit code: 2

I tried to create new bucket but not worked.

Thanks in advance.
I hope to hear from you soon.

Thiết kế website giá rẻ

Danh mục

Getting UnexpectedStatusException while training linear model in AWS Sagemaker?