I am trying to use the kubeflow v2 following the document below but but I am not able to run this succesfully. Below is my code snippet, Could you please let me know if something wrong.
https://www.kubeflow.org/docs/components/pipelines/v2/components/containerized-python-components/#1-source-code-setup
kfp 2.7.0
kfp-kubernetes 1.2.0
kfp-pipeline-spec 0.3.0
kfp-server-api 2.0.5
kfp component build src/ --component-filepattern my_component.py
~/Doc/S/d/s/python-containerized ❯ tree
.
├── kpf-test.ipynb
├── kubeflow-demo.yaml
└── src
├── Dockerfile
├── __pycache__
│ └── my_component.cpython-312.pyc
├── component_metadata
│ └── dataset_download.yaml
├── kfp_config.ini
├── my_component.py
└── runtime-requirements.txt
from kfp import dsl
@dsl.component(base_image='python:3.10',
target_image='mohitverma1688/my-component:v0.4',
packages_to_install=['pathlib','boto3','requests','kfp-kubernetes'])
def dataset_download(url: str, base_path:str, input_bucket:str):
import os
import requests
import zipfile
from pathlib import Path
import argparse
import boto3
from botocore.client import Config
s3 = boto3.client(
"s3",
endpoint_url="http://minio-service.kubeflow:9000",
aws_access_key_id="minio",
aws_secret_access_key="minio123",
config=Config(signature_version="s3v4"),
)
# Create export bucket if it does not yet exist
response = s3.list_buckets()
input_bucket_exists = False
for bucket in response["Buckets"]:
if bucket["Name"] == input_bucket:
input_bucket_exists = True
if not input_bucket_exists:
s3.create_bucket(ACL="public-read-write", Bucket=input_bucket)
# Save zip files to S3 import_bucket
data_path = Path(base_path)
if data_path.is_dir():
print(f"{data_path} directory exists.")
else:
print(f"Did not find {data_path} directory, creating one...")
data_path.mkdir(parents=True,exist_ok=True)
# Download pizza , steak and sushi data
with open(f"{data_path}/data.zip", "wb") as f:
request = requests.get(f"{url}")
print(f"Downloading data from {url}...")
f.write(request.content)
for root, dir, files in os.walk(data_path):
for filename in files:
local_path = os.path.join(root,filename)
s3.upload_file(
local_path,
input_bucket,
"data.zip",
ExtraArgs={"ACL": "public-read"},
)
with zipfile.ZipFile(data_path/"data.zip", "r") as zip_ref:
print("Unzipping data...")
zip_ref.extractall(data_path)
if __name__ == "__main__":
download(url, base_path, input_bucket)
#%%writefile pipeline.py
import src.my_component
BASE_PATH="/data"
URL="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip"
INPUT_BUCKET="datanewbucket"
@dsl.pipeline(name='CNN-TinyVG-Demo',
description='This pipeline is a demo for training,evaluating and deploying Convutional Neural network',
display_name='Kubeflow-MlFLow-Demo')
def kubeflow_pipeline(base_path: str = BASE_PATH,
url:str = URL,
input_bucket:str = INPUT_BUCKET):
pvc1 = kubernetes.CreatePVC(
# can also use pvc_name instead of pvc_name_suffix to use a pre-existing PVC
pvc_name='kubeflow-pvc3',
access_modes=['ReadWriteOnce'],
size='500Mi',
storage_class_name='standard',
)
task1 = dataset_download(base_path=base_path,
url=url,
input_bucket=input_bucket)
task1.set_caching_options(False)
kubernetes.mount_pvc(
task1,
pvc_name=pvc1.outputs['name'],
mount_path='/data',
)
compiler.Compiler().compile(kubeflow_pipeline, 'kubeflow-demo.yaml')
from kfp.client import Client
client = Client(host='http://localhost:8002')
run = client.create_run_from_pipeline_package(
'kubeflow-demo.yaml',
)
0524 12:18:56.775872 30 cache.go:116] Connecting to cache endpoint 10.96.39.24:8887
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead:
https://pip.pypa.io/warnings/venv
[KFP Executor 2024-05-24 12:19:19,641 INFO]: --component_module_path is not specified. Looking for component `dataset_download` in config file `kfp_config.ini` instead
/usr/local/lib/python3.10/site-packages/kfp/dsl/kfp_config.py:69: UserWarning: No existing KFP Config file found
warnings.warn('No existing KFP Config file found')
Traceback (most recent call last):
File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.10/site-packages/kfp/dsl/executor_main.py", line 109, in <module>
executor_main()
File "/usr/local/lib/python3.10/site-packages/kfp/dsl/executor_main.py", line 75, in executor_main
raise RuntimeError('No components found in `kfp_config.ini`')
RuntimeError: No components found in `kfp_config.ini`
I0524 12:19:19.748954 30 launcher_v2.go:151] publish success.
F0524 12:19:19.749580 30 main.go:49] failed to execute component: exit status 1
time="2024-05-24T12:19:20.313Z" level=info msg="sub-process exited" argo=true error="<nil>"
Error: exit status 1
time="2024-05-24T12:19:20.602Z" level=info msg="sub-process exited" argo=true error="<nil>"
Error: exit status 1
I am wondering if I have understood the document or not. Is this right way to run? I am using jupyter notebook.