In the JSON, I have information within each main object, which includes sub-objects and lists of objects with more details.
"ns0:sfobject": {
"@xmlns:ns0": "urn:sfobject.sfapi.api.com",
"ns0:id": "xxxx",
"ns0:type": "table",
"ns0:person": {
"ns0:country_of_birth": "xxx",
"ns0:date_of_birth": "xxx",
"ns0:last_modified_by": "xxx",
"ns0:last_modified_on": "xxx",
"ns0:identity_information": {
"ns0:account_uuid": "xxx",
"ns0:created_on_timestamp": "xxx",
"ns0:last_modified_on": "xxx"
},
"ns0:personal_information": [
{
"ns0:created_by": "xxx",
"ns0:last_name": "xxx",
"ns0:nationality": "xxx",
},
{
"ns0:created_by": "yyy",
"ns0:last_name": "yyy",
"ns0:nationality": "yyy",
},
],
"ns0:address_information": [
{
"ns0:address1": "xxx",
"ns0:country": "xxx",
"ns0:zip_code": "xxx"
},
{
"ns0:address1": "xxx",
"ns0:country": "xxx",
"ns0:zip_code": "xxx"
},
{
"ns0:address1": "xxx",
"ns0:country": "xxx",
"ns0:zip_code": "xxx"
},
],
"ns0:employment_information": [
{
"ns0:assignment_class": "xxx",
"ns0:assignment_uuid": "xxx",
"ns0:user_id": "xxx",
"ns0:job_information": [
{
"ns0:business_unit": "xxx",
"ns0:calc_method_indicator": "xxx",
"ns0:company": "xxx"
},
{
"ns0:business_unit": "yyy",
"ns0:calc_method_indicator": "yyy",
"ns0:company": "yyy"
},
{
"ns0:business_unit": "zzz",
"ns0:calc_method_indicator": "zzz",
"ns0:company": "zzz"
},
{
"ns0:business_unit": "aaa",
"ns0:calc_method_indicator": "aaa",
"ns0:company": "aaa"
},
],
"ns0:compensation_information": [
{
"ns0:created_by": "xxx",
"ns0:created_on_timestamp": "xxx",
"ns0:paycompensation_recurring": [
{
"ns0:annualizationFactor": "xxx",
"ns0:calculated_amount": "xxx"
},
{
"ns0:annualizationFactor": "yyy",
"ns0:calculated_amount": "yyy"
},
{
"ns0:annualizationFactor": "zzz",
"ns0:calculated_amount": "zzz"
}
]
},
{
"ns0:created_by": "xxx",
"ns0:created_on_timestamp": "xxx",
"ns0:paycompensation_recurring": [
{
"ns0:annualizationFactor": "xxx",
"ns0:calculated_amount": "xxx"
},
{
"ns0:annualizationFactor": "yyy",
"ns0:calculated_amount": "yyy"
},
{
"ns0:annualizationFactor": "zzz",
"ns0:calculated_amount": "zzz"
}
]
},
],
"ns0:associated_employee_information": [
{
"ns0:country_of_birth": "xxx",
"ns0:created_by": "xxx",
"ns0:associated_employee_employment_information": {
"ns0:assignment_class": "xxx",
"ns0:user_id": "xxx"
}
},
{
"ns0:country_of_birth": "yyy",
"ns0:created_by": "yyy",
"ns0:associated_employee_employment_information": {
"ns0:assignment_class": "yyy",
"ns0:user_id": "yyy"
}
}
]
}
So what I want is a Python code that, in an automated way (i.e., by simply instructing it to search for objects and sub-objects), extracts multiple dataframes. This will allow me to later create a database. The expected result is:
Is it possible to reach it?
Thank you so much.
I have try this
import json
import pandas as pd
# Ruta al archivo JSON
json_file_path = r'json_rout'
# Cargar el JSON desde el archivo
with open(json_file_path, 'r') as file:
data = json.load(file)
# Extraer datos
person_data = data["ns0:sfobject"]["ns0:person"]
personal_information = person_data["ns0:personal_information"]
address_information = person_data["ns0:address_information"]
phone_information = person_data["ns0:phone_information"]
email_information = person_data["ns0:email_information"]
employment_information = person_data.get("ns0:employment_information", [])
#job_information = person_data["ns0:job_information"]
# Convertir a DataFrames
person_df = pd.json_normalize(person_data)
personal_information_df = pd.DataFrame(personal_information)
address_information_df = pd.DataFrame(address_information)
phone_information_df = pd.json_normalize(phone_information)
email_information_df = pd.json_normalize(email_information)
employment_information_df = pd.json_normalize(employment_information)
#job_information_df = pd.json_normalize(job_information)
# Guardar los DataFrames en archivos CSV
person_df.to_csv(r'C:UsersMariaGironaOrtsPycharmProjectsvy-data-corporate-people-employeedataperson_data.csv', index=False)
personal_information_df.to_csv(r'C:UsersMariaGironaOrtsPycharmProjectsvy-data-corporate-people-employeedatapersonal_information.csv', index=False)
address_information_df.to_csv(r'C:UsersMariaGironaOrtsPycharmProjectsvy-data-corporate-people-employeedataaddress_information.csv', index=False)
phone_information_df.to_csv(r'C:UsersMariaGironaOrtsPycharmProjectsvy-data-corporate-people-employeedataphone_information.csv', index=False)
email_information_df.to_csv(r'C:UsersMariaGironaOrtsPycharmProjectsvy-data-corporate-people-employeedataemail_information.csv', index=False)
employment_information_df.to_csv(r'C:UsersMariaGironaOrtsPycharmProjectsvy-data-corporate-people-employeedataemployment_information.csv', index=False)
#job_information_df.to_csv(r'C:UsersMariaGironaOrtsPycharmProjectsvy-data-corporate-people-employeedatajob_information.csv', index=False)
But it does not work. And I expect different dataframes