Need to get the better version in optimized way. Need to get optimized way of code. It’s getting much execution time so getting lot of time to run the job. Need to get the better version in optimized way. Need to get optimized way of code. It’s getting much execution time so getting lot of time to run the job. Need to get the better version in optimized way. Need to get optimized way of code. It’s getting much execution time so getting lot of time to run the job.
# Create MRO CTE
window_spec = Window.partitionBy("Recipient_ID").orderBy(
regexp_extract(col("SRC_FILE_NAME"), 'Id+', 0).desc(),
regexp_extract(col("SRC_FILE_NAME"), 'I\d+', 0).desc()
)
df_mro_cte = df_mro_clm.withColumn("row_number", row_number().over(window_spec))
.filter(col("row_number") == 1)
.select(
regexp_extract(col("SRC_FILE_NAME"), 'I\d+', 0).alias("fdate"),
col("*")
)
# Create MBR CTE
df_mbr_cte = df_pln_mbr.filter(col("Grp_Sk") == 15).select(
col("Mcaid_HI_Card_Num"),
col("Pln_Mbr_Id"),
col("Crn_Ind")
)
# Create MROClaims
df_mroclaims = df_mro_cte.join(df_mbr_cte, df_mro_cte["Recipient_ID"] == df_mbr_cte["Mcaid_HI_Card_Num"], "inner")
.join(df_member_market_claim, df_mbr_cte["Pln_Mbr_Id"] == df_member_market_claim["Pln_Mbr_Id"], "inner")
.filter(
(df_member_market_claim["ProductCategoryCode"] == 'M') &
(to_date(col("Date_Begin_Service_Detail"), "yyyyMMdd").isNotNull()) &
(col("Claim_Trans_Type") < 'V') &
(df_member_market_claim["CurrentMarketCode"] == 15) &
(col("Crn_Ind") == 1)
)
.select(
trim(col("Claim_Numb")).alias("CLAIM_NUM"),
col("1").alias("LINE_NUM"),
when(col("Paid_Denied_Code") == 'pr', 'PAID').otherwise('DENIED').alias("CLAIM_STATUS"),
to_date(col("Date_Begin_Service_Detail"), "yyyyMMdd").alias("FIRST_DATE_OF_SVC"),
to_date(col("Date_Begin_Service_Detail"), "yyyyMMdd").alias("LAST_DATE_OF_SVC"),
col("NULL").alias("PRENATAL_DATE"),
col("LoadMemberIDHix").alias("MEMBER_ID"),
col("'MROBH'").alias("PROV_ID"),
col("'FACT_BHSO'").alias("PROV_ID_SPECIAL"),
col("NULL").alias("PROV_SPECIAL_NPI"),
col("Amt_Paid_Total").alias("PAID_AMT"),
col("Amt_Allowed_Total").alias("ALLOWED_AMT"),
col("'53'").alias("PLACE_OF_SVC"),
col("NULL").alias("NBR_TIMES"),
when(trim(col("Diag_Code")) == '', None).otherwise(trim(col("Diag_Code"))).alias("ICD_DIAG_1"),
when(trim(col("Diag_Code2")) == '', None).otherwise(trim(col("Diag_Code2"))).alias("ICD_DIAG_2"),
when(trim(col("Diag_Code3")) == '', None).otherwise(trim(col("Diag_Code3"))).alias("ICD_DIAG_3"),
when(trim(col("Diag_Code4")) == '', None).otherwise(trim(col("Diag_Code4"))).alias("ICD_DIAG_4"),
when(trim(col("Diag_Code5")) == '', None).otherwise(trim(col("Diag_Code5"))).alias("ICD_DIAG_5"),
col("NULL").alias("ICD_DIAG_6"),
col("NULL").alias("ICD_DIAG_7"),
when(
(substring(trim(col("Proc_Code")), 1, 1).isin('0')) &
(~substring(trim(col("Proc_Code")), 5, 1).isin('1', '2', '3', '4')),
trim(col("Proc_Code"))
).otherwise(None).alias("CPT4_CODE"),
col("NULL").alias("REV_CODE"),
when(
(substring(trim(col("Proc_Code")), 1, 1).isin('0', '1', '2')) &
(~substring(trim(col("Proc_Code")), 5, 1).isin('0', '2', '5', '6')),
trim(col("Proc_Code"))
).otherwise(None).alias("CPT4_CODE2"),
when(trim(col("Proc_Mod1")) == '', None).otherwise(trim(col("Proc_Mod1"))).alias("CPT4_MODIFIER1"),
when(trim(col("Proc_Mod2")) == '', None).otherwise(trim(col("Proc_Mod2"))).alias("CPT4_MODIFIER2"),
when(trim(col("Proc_Mod3")) == '', None).otherwise(trim(col("Proc_Mod3"))).alias("CPT4_MODIFIER3"),
when(trim(col("Proc_Mod4")) == '', None).otherwise(trim(col("Proc_Mod4"))).alias("CPT4_MODIFIER4"),
when(substring(trim(col("Proc_Code")), 1, 1).isin('0', '1', '2') &
~substring(trim(col("Proc_Code")), 5, 1).isin('0', '2', '5', '6'),
trim(col("Proc_Code"))
).otherwise(None).alias("HCPCS"),
col("NULL").alias("LOINC"),
col("NULL").alias("LAB_VALUE"),
col("NULL").alias("FILLER_VALUE1"),
col("NULL").alias("FILLER_VALUE2"),
col("NULL").alias("FILLER_VALUE3"),
col("'INMCD-MRO'").alias("FILLER_VALUE4"),
col("FillerValue").alias("FILLER_VALUES"),
col("'N'").alias("SUPPLEMENTAL_FLAG"),
col("NULL").alias("SUPPLEMENTAL_SOURCE"),
col("NULL").alias("BILL_PROV_ID"),
col("NULL").alias("HIOS_PLAN_ID"),
col("NULL").alias("POSTED_DATE")
).distinct()
# Show the result
df_mroclaims.show()
user26580893 is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.