I have 2 DFs with object
type columns, which work fine with concatenation.
Code
<code>df1 = pd.DataFrame({'A': ['A0', 'A1'], 'B': ['B0', None]})
df2 = pd.DataFrame({'A': ['A4', 'A5'], 'B': [None, None]})
print(">>>>>>>>>>>>>>> Original DFs")
print(df1)
print(df2)
print(">>>>>>>>>>>>>>> Original DTypes")
print(df1.dtypes)
print(df2.dtypes)
</code>
<code>df1 = pd.DataFrame({'A': ['A0', 'A1'], 'B': ['B0', None]})
df2 = pd.DataFrame({'A': ['A4', 'A5'], 'B': [None, None]})
print(">>>>>>>>>>>>>>> Original DFs")
print(df1)
print(df2)
print(">>>>>>>>>>>>>>> Original DTypes")
print(df1.dtypes)
print(df2.dtypes)
</code>
df1 = pd.DataFrame({'A': ['A0', 'A1'], 'B': ['B0', None]})
df2 = pd.DataFrame({'A': ['A4', 'A5'], 'B': [None, None]})
print(">>>>>>>>>>>>>>> Original DFs")
print(df1)
print(df2)
print(">>>>>>>>>>>>>>> Original DTypes")
print(df1.dtypes)
print(df2.dtypes)
Corresponding Output
<code>>>>>>>>>>>>>>>> Original DFs
A B
0 A0 B0
1 A1 None
A B
0 A4 None
1 A5 None
>>>>>>>>>>>>>>> Original DTypes
A object
B object
dtype: object
A object
B object
dtype: object
>>>>>>>>>>>>>>> Concatenation 1 - No Warning
A B
0 A0 B0
1 A1 None
0 A4 None
1 A5 None
</code>
<code>>>>>>>>>>>>>>>> Original DFs
A B
0 A0 B0
1 A1 None
A B
0 A4 None
1 A5 None
>>>>>>>>>>>>>>> Original DTypes
A object
B object
dtype: object
A object
B object
dtype: object
>>>>>>>>>>>>>>> Concatenation 1 - No Warning
A B
0 A0 B0
1 A1 None
0 A4 None
1 A5 None
</code>
>>>>>>>>>>>>>>> Original DFs
A B
0 A0 B0
1 A1 None
A B
0 A4 None
1 A5 None
>>>>>>>>>>>>>>> Original DTypes
A object
B object
dtype: object
A object
B object
dtype: object
>>>>>>>>>>>>>>> Concatenation 1 - No Warning
A B
0 A0 B0
1 A1 None
0 A4 None
1 A5 None
But if I do the same with categorical
columns, I get a FutureWarning
Code with Categorical data type
<code>print(">>>>>>>>>>>>>>> Categorical DTypes")
df1 = df1.astype('category')
df2 = df2.astype('category')
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 2 - Gives warning")
print(pd.concat([df1, df2]))
</code>
<code>print(">>>>>>>>>>>>>>> Categorical DTypes")
df1 = df1.astype('category')
df2 = df2.astype('category')
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 2 - Gives warning")
print(pd.concat([df1, df2]))
</code>
print(">>>>>>>>>>>>>>> Categorical DTypes")
df1 = df1.astype('category')
df2 = df2.astype('category')
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 2 - Gives warning")
print(pd.concat([df1, df2]))
Corresponding Output
<code>>>>>>>>>>>>>>>> Categorical DTypes
A category
B category
dtype: object
A category
B category
dtype: object
>>>>>>>>>>>>>>> Concatenation 2 - Gives warning
bla.py:37: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
print(pd.concat([df1, df2]))
A B
0 A0 B0
1 A1 NaN
0 A4 NaN
1 A5 NaN
</code>
<code>>>>>>>>>>>>>>>> Categorical DTypes
A category
B category
dtype: object
A category
B category
dtype: object
>>>>>>>>>>>>>>> Concatenation 2 - Gives warning
bla.py:37: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
print(pd.concat([df1, df2]))
A B
0 A0 B0
1 A1 NaN
0 A4 NaN
1 A5 NaN
</code>
>>>>>>>>>>>>>>> Categorical DTypes
A category
B category
dtype: object
A category
B category
dtype: object
>>>>>>>>>>>>>>> Concatenation 2 - Gives warning
bla.py:37: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
print(pd.concat([df1, df2]))
A B
0 A0 B0
1 A1 NaN
0 A4 NaN
1 A5 NaN
df2
had a NaN
to start with and there is no problem with it, but when I try to concatenate with all NaN
columns, I get the warning. The suggestion is to remove such entries altogether. Why is this the case? Why does concatenation seem to have issues with NaNs
?
Here is the full code
<code>import pandas as pd
def bla():
'''The main function, that can also be called fromother scripts as an API'''
df1 = pd.DataFrame({'A': ['A0', 'A1'], 'B': ['B0', None]})
df2 = pd.DataFrame({'A': ['A4', 'A5'], 'B': [None, None]})
print(">>>>>>>>>>>>>>> Original DFs")
print(df1)
print(df2)
print(">>>>>>>>>>>>>>> Original DTypes")
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 1 - No Warning")
print(pd.concat([df1, df2]))
print(">>>>>>>>>>>>>>> Categorical DTypes")
df1 = df1.astype('category')
df2 = df2.astype('category')
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 2 - Gives warning")
print(pd.concat([df1, df2]))
if __name__ == '__main__':
bla()
</code>
<code>import pandas as pd
def bla():
'''The main function, that can also be called fromother scripts as an API'''
df1 = pd.DataFrame({'A': ['A0', 'A1'], 'B': ['B0', None]})
df2 = pd.DataFrame({'A': ['A4', 'A5'], 'B': [None, None]})
print(">>>>>>>>>>>>>>> Original DFs")
print(df1)
print(df2)
print(">>>>>>>>>>>>>>> Original DTypes")
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 1 - No Warning")
print(pd.concat([df1, df2]))
print(">>>>>>>>>>>>>>> Categorical DTypes")
df1 = df1.astype('category')
df2 = df2.astype('category')
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 2 - Gives warning")
print(pd.concat([df1, df2]))
if __name__ == '__main__':
bla()
</code>
import pandas as pd
def bla():
'''The main function, that can also be called fromother scripts as an API'''
df1 = pd.DataFrame({'A': ['A0', 'A1'], 'B': ['B0', None]})
df2 = pd.DataFrame({'A': ['A4', 'A5'], 'B': [None, None]})
print(">>>>>>>>>>>>>>> Original DFs")
print(df1)
print(df2)
print(">>>>>>>>>>>>>>> Original DTypes")
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 1 - No Warning")
print(pd.concat([df1, df2]))
print(">>>>>>>>>>>>>>> Categorical DTypes")
df1 = df1.astype('category')
df2 = df2.astype('category')
print(df1.dtypes)
print(df2.dtypes)
print(">>>>>>>>>>>>>>> Concatenation 2 - Gives warning")
print(pd.concat([df1, df2]))
if __name__ == '__main__':
bla()