I have the excel sheet with 20 plus columns, If i want to select a row which doesn’t contain the text n/a, is it possible to pass n/a as text and find it?
The code which I tried is, ””import pandas as pd
import re
import os
def extract_data(input_file):
# Read the input Excel file
df = pd.read_excel(input_file)
# Check if 'agreed' is present in column 5
if not df.iloc[:, 4].astype(str).str.contains('Agreed', case=False, na=False).any():
print("No 'agreed' found in column 5")
return
# Filter rows where column 5 contains 'agreed'
filtered_df = df[df.iloc[:, 4].astype(str).str.contains('Agreed', case=False, na=False)]
# Initialize DataFrames for the four sheets
volumen_df = pd.DataFrame()
mrr_premium_df = pd.DataFrame()
lrr_df = pd.DataFrame()
meb_df = pd.DataFrame()
# Define a function to extract values from text
def extract_values(text, pattern):
match = re.search(pattern, text, re.IGNORECASE)
return match.group(1) if match else None
# Function to check if exclusion texts are in the specified column
def contains_exclusion_texts(value):
exclusion_texts = ["n/a", "entfallen", "tbd"]
return any(excluded_text in str(value).lower() for excluded_text in exclusion_texts)
# Process each row individually
for index, row in filtered_df.iterrows():
col13 = contains_exclusion_texts(row.iloc[13])
col14 = contains_exclusion_texts(row.iloc[14])
col15 = contains_exclusion_texts(row.iloc[15])
pdu_short_name = str(row.iloc[11]).replace('FAULT_', '')
cycle_time = extract_values(str(row.iloc[20]), r'(d+s*ms)')
n_value = extract_values(str(row.iloc[20]), r'ns*=s*(d+)')
q_value = extract_values(str(row.iloc[20]), r'qs*=s*(d+)')
max_delta_counter = extract_values(str(row.iloc[20]), r'MaxDeltaCounterInits*=s*(d+)')
no_new_or_repeated_data = extract_values(str(row.iloc[20]), r'NoNewOrRepeatedDatas*=s*(d+)')
data = {
'PDU short name': pdu_short_name,
'Cycle time': cycle_time,
'n': n_value,
'q': q_value,
'Max Delta Counter': max_delta_counter,
'No New Or Repeated Data': no_new_or_repeated_data
}
if not col13:
# Volumen
volumen_df = volumen_df.append(data, ignore_index=True)
if not col14 :
# Check column 12 for LRR or MRR_Premium
type_col_value = str(row.iloc[12])
if 'LRR' in type_col_value:
lrr_df = lrr_df.append(data, ignore_index=True)
if 'MRR_Premium' in type_col_value:
mrr_premium_df = mrr_premium_df.append(data, ignore_index=True)
if not col15 :
# Meb
meb_df = meb_df.append(data, ignore_index=True)
# Define output file path
output_file = os.path.join(os.path.dirname(input_file), 'extracted_data3.xlsx')
# Save the extracted data to a new Excel file with different sheets
with pd.ExcelWriter(output_file) as writer:
volumen_df.to_excel(writer, sheet_name='Volumen', index=False)
mrr_premium_df.to_excel(writer, sheet_name='MRR Premium', index=False)
lrr_df.to_excel(writer, sheet_name='LRR', index=False)
meb_df.to_excel(writer, sheet_name='Meb', index=False)
print(f"Data extracted and saved to {output_file}")
Get the input file path from the user
input_file_path = input(“Enter the path to your input Excel file: “)
Call the function with the user-provided file path
extract_data(input_file_path)
”” In this code, im trying to find if n/a or entfallen or tbd is present in the requireed columns, but im getting output for entfallen and tbd only not for n/a. Means the code is not detecting n/a in any of the columns. What maybe the reason? because of forward slash present in n/a, should I find alternative approach to read n/a instead of string.