I have a CSV file containing 100 columns. Out of these, I want to check for blank values in the following columns:
bank and trade code
book value
business unit
COE value
corporate product id
counterparty legal entity
currency
cusip
face amount
legal entity
origination date
QRM book value
QRM face value
If any of these columns contain blank values, I want to highlight the particular column in the print statement. However, there’s a special condition for the origination date column: if it contains blank values but the corresponding source system column has values like “post-close adjustment” or “GL-SDI gap”, these blank values are acceptable and should not be flagged. i have tried the below code but this is not working as intended.
import pandas as pd
# Read the CSV file
df = pd.read_csv('c:/user/file.csv')
# Columns to check for blank values
columns_to_check = ['bank', 'trade code', 'book value', 'business unit', 'COE value', 'corporate product id',
'counterparty legal entity', 'currency', 'qsip', 'face amount', 'legal entity',
'origination date', 'qrm book value', 'qrm face value', 'source system']
# Function to check for blank values and print column names with blanks
def check_for_blank_values(df):
for col in columns_to_check:
blank_values = df[df[col].isna()]
if not blank_values.empty and not (col == 'origination date' and ~blank_values['source system'].isin(['post-close adjustment', 'GL-SDI gap']).all()):
print(f"Column '{col}' has blank values.")
# Check for blank values
check_for_blank_values(df)
Check for Scientific Notation:
I also want to check if any values in the following columns are in scientific notation:
book value
face amount
QRM book value
QRM face amount
If any of these columns contain values in scientific notation, I want to highlight this in the print statement by writing the below code but its printing all 4 coulmns names how i deliberately changed the first cell value to scientific under book_value column
import pandas as pd
`# Read the CSV file
df = pd.read_csv('c:/user/file.csv')
# Function to check if any value in the column is in scientific notation
def check_scientific_values(df, column_names):
for column_name in column_names:
df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
if df[column_name].dtype == 'float64':
print(f"The values in column '{column_name}' are in scientific notation.")
# Columns to check
columns_to_check = ['book value', 'face amount', 'QRM face amount', 'QRM book value']
check_scientific_values(df, columns_to_check)`
For 1st query, i tried this
import pandas as pd
# Read the CSV file
df = pd.read_csv('c:/user/file.csv')
# Columns to check for blank values
columns_to_check = ['bank', 'trade code', 'book value', 'business unit', 'COE value', 'corporate product id',
'counterparty legal entity', 'currency', 'qsip', 'face amount', 'legal entity',
'origination date', 'qrm book value', 'qrm face value', 'source system']
# Function to check for blank values and print column names with blanks
def check_for_blank_values(df):
for col in columns_to_check:
blank_values = df[df[col].isna()]
if not blank_values.empty and not (col == 'origination date' and ~blank_values['source system'].isin(['post-close adjustment', 'GL-SDI gap']).all()):
print(f"Column '{col}' has blank values.")
# Check for blank values
check_for_blank_values(df)
For second query, i tried this
import pandas as pd
`# Read the CSV file
df = pd.read_csv('c:/user/file.csv')
# Function to check if any value in the column is in scientific notation
def check_scientific_values(df, column_names):
for column_name in column_names:
df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
if df[column_name].dtype == 'float64':
print(f"The values in column '{column_name}' are in scientific notation.")
# Columns to check
columns_to_check = ['book value', 'face amount', 'QRM face amount', 'QRM book value']
check_scientific_values(df, columns_to_check)`