We have an Athena table in which there is a column that contains JSON values.
The datatype of the main column(which contains JSON values) in Athena is a string datatype.
DDL of Athena table is like below:
CREATE EXTERNAL TABLE 'table1'(
'id' bigint,
'requestbody' string //this column contains json values
)
also, this ‘requestbody’ column contains around 5000 JSON fields in it. And, it has also nested arrays in it.
Example,
{
"flow": {
"req": {
"application": {
"applicants": [
{
"id": 191915,
"applicationID": 189176,
"applicantIndex": null,
"sequenceNumber": 1,
"relationType": 1,
"relationCode": null,
"customerType": 1,
"prefix": "MR",
"incomeData": [
{
"name": "Applicant Income",
"description": "Executive Communication",
"submittedIncome": 4000,
"adjustedIncome": 4000,
"isAdjusted": false,
"adjustedBy": null
},
{
"name": "Applicant Other Income",
"description": "Car Allowance Car Allowance",
"submittedIncome": 100,
"adjustedIncome": 100,
"isAdjusted": false,
"adjustedBy": null,
"adjustedDate": null,
"comment": null,
"customerAppIncomeOrder": 1,
"combine": true
}
]
}
]
}
}
}
}
I tried below code to read this:
with A as (
SELECT
CAST(json_extract(requestbody, '$.flow.req.application.applicants')as ARRAY(MAP(VARCHAR,VARCHAR))) applicants
FROM
table1
)
, b AS (
SELECT
incomedata1['name'] name
FROM
A
CROSS JOIN UNNEST (applicants) t (applicant)
CROSS JOIN UNNEST (applicant.incomedata) t (incomedata1)
)
select name from b;
But I am getting an error:
line 14:23: Expression applicant is not of type ROW