While trying to import documents from google storage bucket using this function we are getting an HTTP:500 error, we are trying to push the documents into the train queue of the specified processor
def import_docs_to_train(bucket_name, gcs_uri, processor_id, project_id, location):
print(" In Importing documents to train the model function...")
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
dataset_gcs_uri = f'gs://{BUCKET_NAME_OUTPUT}/'
if True:
print(f"GCS URI of the JSON file: {json_file_gcs_uri}")
print(f"GCS URI of the input documents: {gcs_uri}")
client = documentai.DocumentServiceClient()
batch_input_config = BatchDocumentsInputConfig(
gcs_prefix=GcsPrefix(gcs_uri_prefix=gcs_uri)
)
batch_documents_import_configs = ImportDocumentsRequest.BatchDocumentsImportConfig(
batch_input_config=batch_input_config
)
batch_inputs=[]
batch_documents_import_configs = ImportDocumentsRequest.BatchDocumentsImportConfig(batch_input_config=batch_input_config)
batch_documents_import_configs.dataset_split = "DATASET_SPLIT_UNASSIGNED"
batch_inputs.append(batch_documents_import_configs)
#update_dataset(dataset)
doc_request = ImportDocumentsRequest(
dataset=dataset_gcs_uri,
batch_documents_import_configs=batch_inputs
)
Getting the following error:
raise exceptions.from_grpc_error(exc) from exc
google.api_core.exceptions.InvalidArgument: 400 Invalid resource field value in the request. [reason: "RESOURCE_PROJECT_INVALID"
domain: "googleapis.com"
metadata {
key: "service"
value: "documentai.googleapis.com"
}
metadata {
key: "method"
value: "google.cloud.documentai.v1beta3.DocumentService.ImportDocuments"
}
]
New contributor
Deepika Majji is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.