For some reason df.replace() is not working for me. I want to fill nan values with a dummy value, pivot, then turn the dummy values back into nans using replace, but replace is not working. On further investigation it seems that the ‘yy’ value is not being recognised as the same as the fillna value so the function cant find anything to replace. e.g.
“Checking again for ‘yy’ values presence: False”
I don’t know what’s going on. Note this also still happens for me using inplace = True, regex=True and if I put the find and replace items in a dictionary e.g. {‘yy’:np.nan}. My real data is being read from an excel sheet using read_excel.
import pandas as pd
import numpy as np
# Load example data
data_as_dict ={'SiteID': {0: 'Somewhere Creek D/S', 1: 'Somewhere Creek D/S', 2: 'Somewhere Creek D/S', 3: 'Somewhere Creek D/S', 4: 'Somewhere Creek D/S', 5: 'Somewhere Creek D/S', 6: 'Somewhere Creek D/S', 7: 'Somewhere Creek D/S', 8: 'Somewhere Creek D/S'}, 'ParameterID': {0: 'EW_APHA1030E.IONBAL', 1: 'EW_APHA1030E.IONBAL', 2: 'EW_APHA1030E.SUM_OF_IONS', 3: 'EW_APHA1030E.SUM_OF_IONS', 4: 'EW_APHA1030E.TFSS', 5: 'EW_APHA2120C_UV.COLOUR_TRUE', 6: 'EW_APHA2130.TURB_BEFORE', 7: 'EW_APHA2320.ALK_BICAR', 8: 'EW_APHA2320.ALK_BICAR'}, 'SampleDate': {0: '2017-04-03 09:30:00', 1: '2019-04-17 13:30:00', 2: '2017-04-03 09:30:00', 3: '2019-04-17 13:30:00', 4: '2017-04-03 09:30:00', 5: '2017-04-03 09:30:00', 6: '2017-04-03 09:30:00', 7: '2017-04-03 09:30:00', 8: '2019-04-17 13:30:00'}, 'Reading': {0: 15.0, 1: -0.7, 2: 278.0, 3: 975.0, 4: 278.0, 5: 35.0, 6: 20.0, 7: 98.0, 8: 230.0}, 'SampledBy': {0: 'dafdsfd', 1: np.nan, 2: 'dafdsfd', 3: np.nan, 4: 'dafdsfd', 5: 'dafdsfd', 6: 'dafdsfd', 7: 'dafdsfd', 8: np.nan}, 'LabID': {0: 'dagfdfda', 1: np.nan, 2: 'dagfdfda', 3: np.nan, 4: 'dagfdfda', 5: 'dagfdfda', 6: 'dagfdfda', 7: 'dagfdfda', 8: np.nan}, 'Overflow': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Easting': {0: 189162, 1: 189162, 2: 189162, 3: 189162, 4: 189162, 5: 189162, 6: 189162, 7: 189162, 8: 189162}, 'Northing': {0: 269534, 1: 269534, 2: 269534, 3: 269534, 4: 269534, 5: 269534, 6: 269534, 7: 269534, 8: 269534}, 'RL': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Type': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'SiteDescription': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Status': {0: False, 1: False, 2: False, 3: False, 4: False, 5: False, 6: False, 7: False, 8: False}, 'Unit': {0: 'Percentage', 1: 'Percentage', 2: 'Milligrams per litre', 3: 'Milligrams per litre', 4: 'Milligrams per litre', 5: 'Platinum-Cobalt', 6: 'Nephelometric Turbidity Units', 7: 'Milligrams per litre', 8: 'Milligrams per litre'}, 'Symbol': {0: '%', 1: '%', 2: 'mg/L', 3: 'mg/L', 4: 'mg/L', 5: 'Hazen', 6: 'NTU', 7: 'mg/L', 8: 'mg/L'}, 'Format': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Description': {0: 'Anion-Cation Balance', 1: 'Anion-Cation Balance', 2: 'Sum of Ions', 3: 'Sum of Ions', 4: 'TFSS', 5: 'Colour (True)', 6: 'Turbidity', 7: 'Bicarbonate Alkalinity as CaCO3', 8: 'Bicarbonate Alkalinity as CaCO3'}, 'Overflow2': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Overflow3': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Overflow8': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Lat': {0: -18.16110761, 1: -18.16110761, 2: -18.16110761, 3: -18.16110761, 4: -18.16110761, 5: -18.16110761, 6: -18.16110761, 7: -18.16110761, 8: -18.16110761}, 'Lon': {0: 146.380626652441, 1: 146.380626652441, 2: 146.380626652441, 3: 146.380626652441, 4: 146.380626652441, 5: 146.380626652441, 6: 146.380626652441, 7: 146.380626652441, 8: 146.380626652441}, 'Zone': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'FlwTo': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Marker': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan, 5: np.nan, 6: np.nan, 7: np.nan, 8: np.nan}, 'Parameter': {0: 'Anion-Cation Balance %', 1: 'Anion-Cation Balance %', 2: 'Sum of Ions mg/L', 3: 'Sum of Ions mg/L', 4: 'TFSS mg/L', 5: 'Colour (True) Hazen', 6: 'Turbidity NTU', 7: 'Bicarbonate Alkalinity as CaCO3 mg/L', 8: 'Bicarbonate Alkalinity as CaCO3 mg/L'}}
df=pd.DataFrame.from_dict(data_as_dict)
# Print a sample of the dataframe to verify 'yy' values
print("DataFrame before fill na:")
print(df.head())
# Replace NaNs with "yy" to not lose anything
df = df.fillna("yy")
# Print a sample of the dataframe to verify 'yy' values
print("DataFrame before pivot, with nan filled with yy:")
print(df.head())
# Create pivot table
dsf = df.pivot(index=['SiteID', 'ParameterID', 'SampleDate', 'SampledBy', 'LabID', 'Overflow', 'Easting', 'Northing', 'RL', 'Type', 'SiteDescription', 'Status', 'Unit', 'Symbol', 'Format', 'Description', 'Overflow2', 'Overflow3', 'Overflow8', 'Lat', 'Lon', 'Zone', 'FlwTo', 'Marker'], columns='Parameter', values='Reading')
# Print data types to check if there are any issues
print("Data types in the DataFrame after pivot:")
print(dsf.dtypes)
# Print a sample of the dataframe to verify 'yy' values
print("DataFrame after pivot:")
print(dsf.head())
# Check for 'yy' values
print()
print("Checking for 'yy' values presence:")
print()
print((dsf == 'yy').any().any())
# replace values
dsf = dsf.replace('yy',np.nan)
# Verify replacement by printing a sample of the dataframe
print("DataFrame after replacement (should be no yy values):")
print()
print(dsf.head())
# Check again for 'yy' values
print("Checking again for 'yy' values presence:")
print((dsf == 'yy').any().any())