My code is fetching the values from source csv, transforming some data and then creates a new csv in the same format. everything works fine, however email column label is getting replaced with ‘[email protected]’. For me is not clear which logic it follows to decide on the column labels, since I don’t explicitly outline the header names
#!/bin/bash
# Check if CSV file argument is provided
if [ $# -ne 1 ]; then
echo "Usage: $0 <csv_file>"
exit 1
fi
csv_file="$1"
# Check if the specified file exists
if [ ! -f "$csv_file" ]; then
echo "Error: File '$csv_file' not found."
exit 1
fi
# Extract fields from the CSV
awk_out1=$(awk -F, '{print $1}' "$csv_file")
awk_out2=$(awk -F, '{print $2}' "$csv_file")
awk_out3=$(awk -F, '{print $3}' "$csv_file")
awk_out4=$(awk -v FPAT='([^,]*)|("[^"]+")' '{print $4}' "$csv_file")
awk_out5=$(awk -F, '{print $5}' "$csv_file")
awk_out6=$(awk -F, '{print $6}' "$csv_file")
# Convert the outputs to arrays
IFS=$'n' read -d '' -r -a array1 <<< "$awk_out1"
IFS=$'n' read -d '' -r -a array2 <<< "$awk_out2"
IFS=$'n' read -d '' -r -a array3 <<< "$awk_out3"
IFS=$'n' read -d '' -r -a array4 <<< "$awk_out4"
IFS=$'n' read -d '' -r -a array5 <<< "$awk_out5"
IFS=$'n' read -d '' -r -a array6 <<< "$awk_out6"
# Declare an associative array to track emails
declare -A email_tracker
# Redirect output to accounts_new.csv
exec > accounts_new.csv
# Loop through the arrays and print corresponding lines
for ((i=0; i<${#array2[@]}; i++)); do
# Extract the current elements from all arrays
field1="${array1[i]}"
field2="${array2[i]}"
field3="${array3[i]}"
field4="${array4[i]}"
field5="${array5[i]}"
field6="${array6[i]}"
# Format field3 with capitalized words
formatted_field3=$(echo "$field3" | awk '{print toupper(substr($1, 1, 1)) tolower(substr($1, 2))" " toupper(substr($2, 1, 1)) tolower(substr($2, 2))}')
formatted_field5=$(echo "$field3" | awk '{print tolower(substr($1, 1, 1))""tolower(substr($2, 1, 1)) tolower(substr($2, 2))}')
# Check for duplicates and adjust the email format if necessary
email="${formatted_field5}@abc.com"
if [[ -n "${email_tracker[$email]}" ]]; then
email="${formatted_field5}${field2}@abc.com"
else
email_tracker[$email]=1
fi
# Print the fields on the same line
echo "$field1,$field2,$formatted_field3,$field4,$email,$field6"
done
I expect that csv colun labels shouls remain the same