Thiết kế website giá rẻ

Question

I am trying to train the model using Sagemaker’s Estimator class. My directory structure is as follows:

- temp
   - train_step
      - train.py
      - requirements.txt
   - temp.py

train.py:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

######################################## VERSION TESTING ##############################################################
import logging
import sys
import pkg_resources
import json

def get_python_and_package_versions():
    # Get Python version
    python_version = sys.version

    # Get installed package versions
    installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set}

    # Combine Python version and installed package versions into a dictionary
    version_data = {
        "python_version": python_version,
        "installed_packages": installed_packages
    }
    
    # Initialize logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    
    # Check if a default logger already exists
    if not logger.hasHandlers():
        # If no default logger exists, create a new one
        handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)
        logger.addHandler(handler)
    
    # Log the version data
    logger.info("Python version and installed package versions:")
    logger.info(json.dumps(version_data, indent=4))

# Example usage:
get_python_and_package_versions()


######################################## VERSION TESTING ##############################################################


# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a Random Forest classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Make predictions on the test set
predictions = classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

temp.py:

import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.image_uris import get_base_python_image_uri
from sagemaker.local import LocalSession
import boto3

# Create boto session
boto_session = boto3.Session()

local_session = LocalSession(boto_session = boto_session, default_bucket = pipeline_bucket_name) #bucket name

inputs = {
        "train": sagemaker.inputs.TrainingInput(
            s3_data=processor_output #S3 URI where data exists
        )

    }

estimator = Estimator(
            image_uri=get_base_python_image_uri(
                "us-east-1", py_version=str(38)
            ),
            role=role, #Add execution role
            instance_count=1,
            instance_type="local",
            output_path=artifact_location, #s3-bucket-location
            base_job_name=training_base_job_name, # name of the training job
            sagemaker_session=local_session,
            source_dir="./train_step",
            code_location=training_code_location, #s3-bucket-location
            entry_point="train.py",
            container_entry_point=["find", ".", "-type", "f", "(" , "-name", "train.py", "-o", "-name", "train.csv", ")" ]
        )
train_args = estimator.fit(
    inputs=inputs
)

It seems that the directory specified in source_dir is not getting added. As I tried to find train.py and train.csv, I could find path to train.csv but none of the files in train_step directory.

train.csv - ./opt/ml/input/data/train/train.csv.

Note: train.py is a dummy code just to make it reproducible without dataset.
On checking the s3 location specified in code_location, I can see the source_code uploaded there.

Thiết kế website giá rẻ

Danh mục

How to upload custom training code to Sagemaker Estimator Python