Thiết kế website giá rẻ

Question

I’m currently facing an issue with UTF-8 encoding while using a text generation AI in my project. Here’s a summary of the situation:

Problem Description:
When attempting to load data from a JSON or .pth file for my AI model, I encounter a UTF-8 encoding error. I’ve tried multiple encodings (utf-8, utf-16, latin-1, cp1252) without success. The specific error message I’m encountering is: UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0xe9 in position 570: invalid continuation byte
eps Taken:

I’ve checked and adjusted the encodings in the Python code to try and resolve the issue.
I’ve used different methods to load JSON and .pth files while specifying encodings.
Expected Outcome:
I expect my program to load the data successfully without any encoding errors, allowing my AI model to function as intended.

Code or Screenshots:


BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

intents_file = os.path.join(BASE_DIR, 'data', 'intent.json')
data_file = os.path.join(BASE_DIR, 'data.pth')
unanswered_questions_file = os.path.join(BASE_DIR, 'unanswered_questions.json')
generate_data_file = os.path.join(BASE_DIR, 'generate_data.pth')
lstm_data_file = os.path.join(BASE_DIR, 'lstm_data.pth')

encodings_to_try = ['utf-8', 'utf-16', 'latin-1', 'cp1252']

def load_json_file(file_path):
    data = None
    for encoding in encodings_to_try:
        try:
            with open(file_path, 'r', encoding=encoding) as json_data:
                data = json.load(json_data)
            break 
        except (UnicodeDecodeError, json.JSONDecodeError) as e:
            print(f"Error with encoding {encoding}: {e}")
            continue
    
    if data is None:
        print(f"Failed to load {file_path}. Check file encoding or content.")
    
    return data

def load_pt_files(file_path):
    try:
        return torch.load(file_path)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

intents = load_json_file(intents_file)
data = load_pt_files(data_file)
generate_data = load_pt_files(generate_data_file)
lstm_data = load_pt_files(lstm_data_file)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if data is not None:
    input_size = data["input_size"]
    hidden_size = data["hidden_size"]
    output_size = data["output_size"]
    all_words = data['all_words']
    tags = data['tags']
    model_state = data["model_state"]

    model = NeuralNet(input_size, hidden_size, output_size).to(device)
    model.load_state_dict(model_state)
    model.eval()

if lstm_data is not None:
    texts = lstm_data["texts"]
    characters = lstm_data["characters"]

    lstm_model = LSTMModel(input_size=len(characters), hidden_size=128, output_size=len(characters)).to(device)
    lstm_model0 = LSTMModel(input_size=len(characters), hidden_size=128, output_size=len(characters)).to(device)

    lstm_model.load_state_dict(lstm_data['model_state'])
    lstm_model0.load_state_dict(generate_data['model_state'])

    lstm_model.eval()
    lstm_model0.eval()

bot_name = "Sam"

Any assistance in resolving this UTF-8 encoding issue would be greatly appreciated! Thank you in advance for your suggestions and guidance.

def get_response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).float().to(device)
    output = model(X)
    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]
    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        if tag == "economic_forecast":
            forecast = forecast_gdp(os.path.join(BASE_DIR, 'data', 'economic_data.csv'))
            return f"The GDP forecast for the next period is: {forecast}"
        for intent in intents['intents']:
            if tag == intent["tag"]:
                if tag == "generate_video":
                    category = sentence[-1]  # Extract the last word as the category
                    if category in DATASET_DIRS:
                        video = generate_video(category)
                        if video is not None:
                            show_generated_video(video)
                            response = random.choice([resp for intent in intents['intents'] if intent['tag'] == tag for resp in intent['responses']]).format(category)
                            unanswered_questions = load_unanswered_questions()
                            unanswered_questions.append({"question": msg, "response": response})
                            save_unanswered_questions(unanswered_questions)
                            return response
                        else:
                            return f"Desole, je ne trouve pas de categorie pour {category}."
                    else:
                        return f"Desole, la categorie {category} n'existe pas dans les donnees de videos."

                if tag == "generate_image":
                    category = sentence[-1]  # Extract the last word as the category
                    if category in DATASET_DIRS:
                        image = generate_image(category)
                        if image is not None:
                            show_generated_image(image)
                            response = random.choice([resp for intent in intents['intents'] if intent['tag'] == tag for resp in intent['responses']]).format(category)
                            unanswered_questions = load_unanswered_questions()
                            unanswered_questions.append({"question": msg, "response": response})
                            save_unanswered_questions(unanswered_questions)
                            return response
                        else:
                            return f"Desole, je ne trouve pas de categorie pour {category}."
                    else:
                        return f"Desole, la categorie {category} n'existe pas dans les donnees d'images."
                else:
                    return random.choice(intent['responses'])
    else:
        for keyword in keywords.values():
            if keyword in msg:
                response = generate_text_based_on_keyword(keyword, length=100, temperature=1.2)
                unanswered_questions = load_unanswered_questions()
                unanswered_questions.append({"question": msg, "response": response})
                save_unanswered_questions(unanswered_questions)
                return response

        # Si aucune réponse n'a été générée pour une question non reconnue
        response = "Je suis desole, mais je ne comprends pas. Pouvez-vous reformuler votre question ?"
        unanswered_questions = load_unanswered_questions()
        unanswered_questions.append({"question": msg, "response": response})
        save_unanswered_questions(unanswered_questions)
        return response

Danh mục