I’m encountering a UnicodeDecodeError
in my FastAPI application while processing audio files. The error traceback is as follows:
<code>Traceback (most recent call last):
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesuvicornprotocolshttphttptools_impl.py", line 399, in run_asgi
result = await app( # type: ignore[func-returns-value]
...
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 303, in jsonable_encoder
jsonable_encoder(
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 289, in jsonable_encoder
encoded_value = jsonable_encoder(
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 318, in jsonable_encoder
return ENCODERS_BY_TYPE[type(obj)](obj)
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 59, in <lambda>
bytes: lambda o: o.decode(),
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9f in position 144: invalid start byte
</code>
<code>Traceback (most recent call last):
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesuvicornprotocolshttphttptools_impl.py", line 399, in run_asgi
result = await app( # type: ignore[func-returns-value]
...
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 303, in jsonable_encoder
jsonable_encoder(
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 289, in jsonable_encoder
encoded_value = jsonable_encoder(
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 318, in jsonable_encoder
return ENCODERS_BY_TYPE[type(obj)](obj)
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 59, in <lambda>
bytes: lambda o: o.decode(),
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9f in position 144: invalid start byte
</code>
Traceback (most recent call last):
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesuvicornprotocolshttphttptools_impl.py", line 399, in run_asgi
result = await app( # type: ignore[func-returns-value]
...
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 303, in jsonable_encoder
jsonable_encoder(
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 289, in jsonable_encoder
encoded_value = jsonable_encoder(
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 318, in jsonable_encoder
return ENCODERS_BY_TYPE[type(obj)](obj)
File "C:UserssanjaAppDataLocalProgramsPythonPython310libsite-packagesfastapiencoders.py", line 59, in <lambda>
bytes: lambda o: o.decode(),
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9f in position 144: invalid start byte
Code:
<code>@app.post("/submit_response")
async def submit_response(session_id: str = Form(...), audio: UploadFile = File(...)):
session = get_session(session_id)
audio_path = f"{session_id}_response.wav"
# Save the uploaded audio file
with open(audio_path, "wb") as f:
shutil.copyfileobj(audio.file, f)
# Process the audio file here
response_text = transcribe_audio(audio_path) # Replace with actual transcription function
# Update the current scenario conversation with the candidate's response
session.current_scenario_conversation[-1] = (
session.current_scenario_conversation[-1][0],
session.current_scenario_conversation[-1][1],
response_text
)
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
# Retrieve the current trait and perform satisfaction check
trait = TRAITS[session.current_trait_index]
status, feedback = satisfaction_check(
session.agents["satisfaction_check"],
session.current_scenario_conversation[-1][1],
response_text,
trait['trait_name']
)
if status == "satisfied":
# If satisfied, score the scenario and move to the next trait/scenario
score = score_scenario(session.agents["scoring"], session.current_scenario_conversation, trait)
save_conversation_to_file(session.interview_filename, ("Score", score))
move_to_next_scenario(session)
return {
"message": "Moving to next scenario",
"question": session.current_scenario_conversation[-1][1],
"score": score # Return the score
}
elif status == "insufficient":
# If the response is insufficient, generate a follow-up question
if len(session.current_scenario_conversation) >= 2:
move_to_next_scenario(session)
return {
"message": "Moving to next scenario due to insufficient response",
"question": session.current_scenario_conversation[-1][1]
}
else:
follow_up_question = generate_follow_up(
session.agents["follow_up"],
session.candidate_name,
session.current_scenario_conversation,
len(session.current_scenario_conversation),
insufficient=True
)
session.current_scenario_conversation.append(("Follow-Up", len(session.current_scenario_conversation), follow_up_question, ""))
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
return {
"message": "Follow-up question for insufficient response",
"question": follow_up_question
}
else: # unsatisfied
# Generate a follow-up question for unsatisfactory response
follow_up_question = generate_follow_up(
session.agents["follow_up"],
session.candidate_name,
session.current_scenario_conversation,
len(session.current_scenario_conversation),
insufficient=False
)
session.current_scenario_conversation.append(("Follow-Up", len(session.current_scenario_conversation), follow_up_question, ""))
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
if len(session.current_scenario_conversation) >= 3:
move_to_next_scenario(session)
return {
"message": "Moving to next scenario after follow-up",
"question": session.current_scenario_conversation[-1][1]
}
return {
"message": "Follow-up question for unsatisfactory response",
"question": follow_up_question
}
def transcribe_audio(audio_path: str) -> str:
"""
Transcribe audio file to text using speech_recognition.
"""
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Audio unintelligible"
except sr.RequestError as e:
return f"Could not request results; {e}"
</code>
<code>@app.post("/submit_response")
async def submit_response(session_id: str = Form(...), audio: UploadFile = File(...)):
session = get_session(session_id)
audio_path = f"{session_id}_response.wav"
# Save the uploaded audio file
with open(audio_path, "wb") as f:
shutil.copyfileobj(audio.file, f)
# Process the audio file here
response_text = transcribe_audio(audio_path) # Replace with actual transcription function
# Update the current scenario conversation with the candidate's response
session.current_scenario_conversation[-1] = (
session.current_scenario_conversation[-1][0],
session.current_scenario_conversation[-1][1],
response_text
)
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
# Retrieve the current trait and perform satisfaction check
trait = TRAITS[session.current_trait_index]
status, feedback = satisfaction_check(
session.agents["satisfaction_check"],
session.current_scenario_conversation[-1][1],
response_text,
trait['trait_name']
)
if status == "satisfied":
# If satisfied, score the scenario and move to the next trait/scenario
score = score_scenario(session.agents["scoring"], session.current_scenario_conversation, trait)
save_conversation_to_file(session.interview_filename, ("Score", score))
move_to_next_scenario(session)
return {
"message": "Moving to next scenario",
"question": session.current_scenario_conversation[-1][1],
"score": score # Return the score
}
elif status == "insufficient":
# If the response is insufficient, generate a follow-up question
if len(session.current_scenario_conversation) >= 2:
move_to_next_scenario(session)
return {
"message": "Moving to next scenario due to insufficient response",
"question": session.current_scenario_conversation[-1][1]
}
else:
follow_up_question = generate_follow_up(
session.agents["follow_up"],
session.candidate_name,
session.current_scenario_conversation,
len(session.current_scenario_conversation),
insufficient=True
)
session.current_scenario_conversation.append(("Follow-Up", len(session.current_scenario_conversation), follow_up_question, ""))
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
return {
"message": "Follow-up question for insufficient response",
"question": follow_up_question
}
else: # unsatisfied
# Generate a follow-up question for unsatisfactory response
follow_up_question = generate_follow_up(
session.agents["follow_up"],
session.candidate_name,
session.current_scenario_conversation,
len(session.current_scenario_conversation),
insufficient=False
)
session.current_scenario_conversation.append(("Follow-Up", len(session.current_scenario_conversation), follow_up_question, ""))
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
if len(session.current_scenario_conversation) >= 3:
move_to_next_scenario(session)
return {
"message": "Moving to next scenario after follow-up",
"question": session.current_scenario_conversation[-1][1]
}
return {
"message": "Follow-up question for unsatisfactory response",
"question": follow_up_question
}
def transcribe_audio(audio_path: str) -> str:
"""
Transcribe audio file to text using speech_recognition.
"""
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Audio unintelligible"
except sr.RequestError as e:
return f"Could not request results; {e}"
</code>
@app.post("/submit_response")
async def submit_response(session_id: str = Form(...), audio: UploadFile = File(...)):
session = get_session(session_id)
audio_path = f"{session_id}_response.wav"
# Save the uploaded audio file
with open(audio_path, "wb") as f:
shutil.copyfileobj(audio.file, f)
# Process the audio file here
response_text = transcribe_audio(audio_path) # Replace with actual transcription function
# Update the current scenario conversation with the candidate's response
session.current_scenario_conversation[-1] = (
session.current_scenario_conversation[-1][0],
session.current_scenario_conversation[-1][1],
response_text
)
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
# Retrieve the current trait and perform satisfaction check
trait = TRAITS[session.current_trait_index]
status, feedback = satisfaction_check(
session.agents["satisfaction_check"],
session.current_scenario_conversation[-1][1],
response_text,
trait['trait_name']
)
if status == "satisfied":
# If satisfied, score the scenario and move to the next trait/scenario
score = score_scenario(session.agents["scoring"], session.current_scenario_conversation, trait)
save_conversation_to_file(session.interview_filename, ("Score", score))
move_to_next_scenario(session)
return {
"message": "Moving to next scenario",
"question": session.current_scenario_conversation[-1][1],
"score": score # Return the score
}
elif status == "insufficient":
# If the response is insufficient, generate a follow-up question
if len(session.current_scenario_conversation) >= 2:
move_to_next_scenario(session)
return {
"message": "Moving to next scenario due to insufficient response",
"question": session.current_scenario_conversation[-1][1]
}
else:
follow_up_question = generate_follow_up(
session.agents["follow_up"],
session.candidate_name,
session.current_scenario_conversation,
len(session.current_scenario_conversation),
insufficient=True
)
session.current_scenario_conversation.append(("Follow-Up", len(session.current_scenario_conversation), follow_up_question, ""))
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
return {
"message": "Follow-up question for insufficient response",
"question": follow_up_question
}
else: # unsatisfied
# Generate a follow-up question for unsatisfactory response
follow_up_question = generate_follow_up(
session.agents["follow_up"],
session.candidate_name,
session.current_scenario_conversation,
len(session.current_scenario_conversation),
insufficient=False
)
session.current_scenario_conversation.append(("Follow-Up", len(session.current_scenario_conversation), follow_up_question, ""))
save_conversation_to_file(session.interview_filename, session.current_scenario_conversation[-1])
if len(session.current_scenario_conversation) >= 3:
move_to_next_scenario(session)
return {
"message": "Moving to next scenario after follow-up",
"question": session.current_scenario_conversation[-1][1]
}
return {
"message": "Follow-up question for unsatisfactory response",
"question": follow_up_question
}
def transcribe_audio(audio_path: str) -> str:
"""
Transcribe audio file to text using speech_recognition.
"""
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Audio unintelligible"
except sr.RequestError as e:
return f"Could not request results; {e}"
Context:
- I am using FastAPI to create an API that handles audio file uploads for speech-to-text processing.
- The error occurs when the application attempts to handle or encode data returned from processing these audio files.
- The audio files are being uploaded and processed in binary format.
Questions:
- What could be causing the
UnicodeDecodeError
in this scenario? - How can I resolve this issue when dealing with binary data in FastAPI?
- Are there best practices for handling audio files and their encoding when integrating speech-to-text functionality in a FastAPI application?
4