Thiết kế website giá rẻ

Question

I don’t understand how I can read the bytestream for a TTS azure service in python.

From the docs: https://learn.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.audiodatastream?view=azure-python

bool = can_read_data(requested_bytes: int, pos: int)
and
int = read_data(audio_buffer: bytes, pos: int | None = None)

so

<code>import azure.cognitiveservices.speech as speechsdk

speech_config = speechsdk.SpeechConfig(subscription='key', region='uksouth')

speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm)

speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)

text = "Hello, world!"

# Synthesize the speech

result = speech_synthesizer.speak_text_async(text).get()

# Create an AudioDataStream from the synthesized result

if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:

print("Speech synthesized for text [{}]".format(text))

audio_data_stream = speechsdk.AudioDataStream(result)

audio_data_stream.save_to_wav_file("output.wav")

# Reset the stream position to the beginning since saving to file puts the position to end.

audio_data_stream.position = 0

# Reads data from the stream

audio_buffer = bytes(16000)

total_size = 0

filled_size = audio_data_stream.read_data(audio_buffer)

while filled_size > 0:

print("{} bytes received.".format(filled_size))

total_size += filled_size

filled_size = audio_data_stream.read_data(audio_buffer)

print("Totally {} bytes received for text [{}].".format(total_size, text))

# Initialize playing

from pydub import AudioSegment

import io

audio_segment = AudioSegment(

data=audio_buffer, # The raw audio data you received

sample_width=2, # Bytes per sample

frame_rate=16000, # Sampling frequency

channels=1 # Mono

)

from pydub.playback import play

play(audio_segment)

elif result.reason == speechsdk.ResultReason.Canceled:

cancellation_details = result.cancellation_details

print("Speech synthesis canceled: {}".format(cancellation_details.reason))

if cancellation_details.reason == speechsdk.CancellationReason.Error:

print("Error details: {}".format(cancellation_details.error_details))

</code>

<code>import azure.cognitiveservices.speech as speechsdk speech_config = speechsdk.SpeechConfig(subscription='key', region='uksouth') speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm) speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None) text = "Hello, world!" # Synthesize the speech result = speech_synthesizer.speak_text_async(text).get() # Create an AudioDataStream from the synthesized result if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: print("Speech synthesized for text [{}]".format(text)) audio_data_stream = speechsdk.AudioDataStream(result) audio_data_stream.save_to_wav_file("output.wav") # Reset the stream position to the beginning since saving to file puts the position to end. audio_data_stream.position = 0 # Reads data from the stream audio_buffer = bytes(16000) total_size = 0 filled_size = audio_data_stream.read_data(audio_buffer) while filled_size > 0: print("{} bytes received.".format(filled_size)) total_size += filled_size filled_size = audio_data_stream.read_data(audio_buffer) print("Totally {} bytes received for text [{}].".format(total_size, text)) # Initialize playing from pydub import AudioSegment import io audio_segment = AudioSegment( data=audio_buffer, # The raw audio data you received sample_width=2, # Bytes per sample frame_rate=16000, # Sampling frequency channels=1 # Mono ) from pydub.playback import play play(audio_segment) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech synthesis canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details)) </code>

import azure.cognitiveservices.speech as speechsdk
speech_config = speechsdk.SpeechConfig(subscription='key', region='uksouth')
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)


text = "Hello, world!"
# Synthesize the speech
result = speech_synthesizer.speak_text_async(text).get()

# Create an AudioDataStream from the synthesized result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized for text [{}]".format(text))
    audio_data_stream = speechsdk.AudioDataStream(result)
    audio_data_stream.save_to_wav_file("output.wav")
    # Reset the stream position to the beginning since saving to file puts the position to end.
    audio_data_stream.position = 0

    # Reads data from the stream
    audio_buffer = bytes(16000)
    total_size = 0
    filled_size = audio_data_stream.read_data(audio_buffer)
    while filled_size > 0:
        print("{} bytes received.".format(filled_size))
        total_size += filled_size
        filled_size = audio_data_stream.read_data(audio_buffer)
    print("Totally {} bytes received for text [{}].".format(total_size, text))
        # Initialize playing

    from pydub import AudioSegment
    import io

    audio_segment = AudioSegment(
        data=audio_buffer,  # The raw audio data you received
        sample_width=2,  # Bytes per sample
        frame_rate=16000,  # Sampling frequency
        channels=1  # Mono
    )
    
    from pydub.playback import play
    play(audio_segment)

elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        print("Error details: {}".format(cancellation_details.error_details))

Its streaming – and saving. But the stream doesnt sound right. What am I getting wrong?

Thiết kế website giá rẻ

Danh mục

Reading bytes AudioDataStream with Azure TTS and playing