Thiết kế website giá rẻ

Question

The program works well overall; however, it encounters an issue when it processes accented words. When the program encounters words with accented characters, it stops functioning correctly. The exact reason for this interruption is unclear, and debugging is needed to identify the cause of this issue with accented words.

Through some debugging with various print statements added here and there, I was able to pinpoint that the problem lies within the readCharacters function when it reads character by character.

The program accepts only C standard libraries.

#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <locale.h>

// Object structure containing a String, its frequency, and coordinates in the matrix
typedef struct {
    wchar_t string[31];
    float frequency;
    int occurrence;
    int x;
    int y;
} Object;

// Matrix structure containing a list of objects and its dimensions
typedef struct {
    Object** list;
    int* x;
    int y;
} Matrix;

// Alphabet structure containing a sub-alphabet and a word
struct Alphabet {
    struct Alphabet* subalphabet[46];
    Object* word;
};

typedef struct Alphabet Dictionary;

// Function prototypes
Dictionary* deallocate(Dictionary* dict);
void printMatrix(Matrix* matrix, FILE* file);
int asciiIndex(wchar_t character);
int endString(wchar_t string1[], Matrix* matrix, Dictionary* dict, int end);
void readCharacters(FILE* file, Matrix* matrix, Dictionary* dictionary);

// Main function
int main() {
    setlocale(LC_ALL, "en_US.UTF-8");  // Set locale to support Unicode

    FILE* inputFile = fopen("test.txt", "r");
    FILE* outputFile = fopen("output.csv", "w");
    if (inputFile == NULL || outputFile == NULL) {  // Check if files opened successfully
        exit(EXIT_FAILURE);  // Exit with error if not
    }

    Dictionary* dictionary = (Dictionary*)calloc(1, sizeof(Dictionary));  // Allocate memory for dictionary
    dictionary->word = (Object*)calloc(1, sizeof(Object));  // Allocate memory for a word in the dictionary

    Matrix matrix;
    matrix.y = 1;
    matrix.x = (int*)calloc(1, sizeof(int));
    matrix.x[0] = 1;
    matrix.list = (Object**)calloc(1, matrix.y * sizeof(Object*));  // Allocate memory for the matrix
    matrix.list[matrix.y - 1] = (Object*)calloc(1, 2 * sizeof(Object));  // Allocate memory for a row of the matrix

    readCharacters(inputFile, &matrix, dictionary);  // Call function to read characters from file
    printMatrix(&matrix, outputFile);  // Call function to print matrix to output file

    fclose(outputFile);  // Close output file
    fclose(inputFile);  // Close input file

    for (int i = 0; i < matrix.y - 1; i++) {  // Free allocated memory for matrix
        free(matrix.list[i]);
    }
    free(matrix.list);
    free(matrix.x);

    for (int i = 0; i < 46; i++) {  // Free allocated memory for dictionary
        dictionary->subalphabet[i] = deallocate(dictionary->subalphabet[i]);  // Deallocate sub-alphabet recursively
    }
    free(dictionary->word);  // Free memory for the word
    free(dictionary);  // Free memory for the dictionary

    return 0;  // Return 0
}

// Function to deallocate memory for the dictionary (summary)
Dictionary* deallocate(Dictionary* dict) {
    // Implementation omitted for brevity
    return NULL; 
}

// Function to convert character to an ASCII index
int asciiIndex(wchar_t character) {
    wchar_t c = towlower(character);  // Convert character to lowercase

    if (iswalpha(c)) {  // Handle non-accented letters
        return c - L'a';
    } else {  // Handle accented characters
        switch (c) {
            case L'à': return 26;
            case L'è': return 27;
            case L'é': return 28;
            case L'ì': return 29;
            case L'ò': return 30;
            case L'ù': return 31;
            case L'!': return 32;
            case L'?': return 33;
            case L'.': return 34;
            case L''': return 35;
        }
    }

    if (c >= L'0' && c <= L'9') {  // Handle numbers
        return 36 + (c - L'0');
    }

    return -1;  // Unrecognized character
}

// Function to handle the end of a string
int endString(wchar_t string1[], Matrix* matrix, Dictionary* dict, int end) {
    if (string1[0] == L'') {  // If string is empty
        return 0;  // Return 0
    }
    wcscpy(matrix->list[matrix->y - 1][1].string, string1);  // Copy string to matrix
    matrix->list[matrix->y - 1][1].x = 1;  // Assign 1 to x coordinate in position 1
    matrix->list[matrix->y - 1][0].x = 0;  // Assign 0 to x coordinate in position 0
    
    int check = operationalSearch(matrix->list[matrix->y - 1][0].string, 0, dict, matrix->y - 1, 1, matrix);  // Call operationalSearch function
    if (check == 0) {
        matrix->y++;
        matrix->list = realloc(matrix->list, matrix->y * sizeof(Object*));  // Reallocate memory for matrix
        matrix->x = realloc(matrix->x, matrix->y * sizeof(int));
        matrix->x[matrix->y - 1] = 1;
        matrix->list[matrix->y - 1] = (Object*)calloc(1, 2 * sizeof(Object));
    }

    wcscpy(matrix->list[matrix->y - 1][0].string, string1);  // Copy string to matrix
    if (end == 1) {  // If end is 1
        wcscpy(matrix->list[matrix->y - 1][1].string, matrix->list[0][0].string);  // Copy initial string to matrix
        int check = operationalSearch(matrix->list[matrix->y - 1][0].string, 0, dict, matrix->y - 1, 1, matrix);  // Call operationalSearch function
        return 0;  // Return 0
    }

    string1[0] = L'';  // Reset string
    return 0;  // Return 0
}

// Function to read characters from the file and handle the temporary string
void readCharacters(FILE* file, Matrix* matrix, Dictionary* dictionary) {
    wchar_t tempString[31];  // Declare a string of 30 characters
    int index = 0;  // Initialize the index to 0
    wchar_t character;  // Declare the character variable
    int firstWord = 0;
    const wchar_t* accentedChars = L"àèéìòóùÀÈÉÌÒÓÙ";

    while ((character = fgetwc(file))) {  // Read character from file
        wchar_t nextChar = fgetwc(file);  // Read next character
        fseek(file, -1, SEEK_CUR);  // Move file pointer back

        if ((character == L' ' || character == L'n') && nextChar == WEOF) {  // Handle end of file case
            wcscpy(matrix->list[matrix->y - 1][1].string, matrix->list[0][0].string);  // Copy initial string to matrix
            operationalSearch(matrix->list[matrix->y - 1][0].string, 0, dictionary, matrix->y - 1, 1, matrix);  // Check if key has been encountered in the text
            return;
        }

        // Read the first word
        if (firstWord == 0 && (character == L'n' || character == L' ')) {  // Skip initial n or space characters
            continue;
        } else if (firstWord == 0 && (iswalnum(character) || (wcschr(accentedChars, character) != NULL))) {  // Handle alphanumeric characters or accented characters
            matrix->list[0][0].string[index] = character;
            index++;
            if (nextChar == L' ' || nextChar == L'n' || nextChar == L'!' || nextChar == L'?' || nextChar == L'.') {  // Terminate string if followed by space, newline, or punctuation
                matrix->list[0][0].string[index] = L'';
                index = 0;
                firstWord = 1;
            } else if (nextChar == L''') {  // Append apostrophe and terminate string
                matrix->list[0][0].string[index] = L''';
                matrix->list[0][0].string[index + 1] = L'';
                index = 0;
                firstWord = 1;
                fseek(file, 1, SEEK_CUR);  // Increment cursor to avoid reading apostrophe twice
            }
            continue;
        } else if (firstWord == 0 && (character == L'!' || character == L'?' || character == L'.')) {  // Handle special characters
            matrix->list[0][0].string[index] = character;
            matrix->list[0][0].string[index + 1] = L'';
            firstWord = 1;
            index = 0;
            continue;
        }

        if (character == L'!' || character == L'?' || character == L'.') {
            if (index > 0) {
                tempString[index] = L'';  // Terminate string
                endString(tempString, matrix, dictionary, 0);  // Handle end of string
                index = 0;
            }
            tempString[0] = character;
            tempString[1] = L'';
            if (nextChar == WEOF) {
                endString(tempString, matrix, dictionary, 1);  // Handle end of file
                break;
            }
            endString(tempString, matrix, dictionary, 0);
            index = 0;
        } else if (iswalnum(character) || (wcschr(accentedChars, character) != NULL) || character == L''') {
            tempString[index++] = character;  // Append character to tempString
            wprintf(L"%lc", character);

            if (nextChar == L' ' || nextChar == L'n' || nextChar == WEOF ||
                nextChar == L'!' || nextChar == L'?' || nextChar == L'.' || character == L''') {
                tempString[index] = L'';  // Terminate string
                endString(tempString, matrix, dictionary, (nextChar == WEOF) ? 1 : 0);
                index = 0;
            }
        } else if (character == L' ' || character == L'n') {
            if (index > 0) {
                tempString[index] = L'';  // Terminate string
                endString(tempString, matrix, dictionary, 0);
                index = 0;
            }
        } else if (!(iswalnum(character) || (wcschr(accentedChars, character) != NULL) ||
                    character == L''' || character == L'!' ||
                    character == L'?' || character == L'.') &&
                    nextChar == L' ' || nextChar == L'n') {
            if (index > 0) {
                tempString[index] = L'';  // Terminate string
                endString(tempString, matrix, dictionary, 0);
                index = 0;
            }
            continue;
        }

        if (nextChar == WEOF) {  // Break if end of file
            break;
        }
    }
    return;
}

// Function to print the matrix to the file
void printMatrix(Matrix* matrix, FILE* file) {
    for (int i = 0; i <= matrix->y - 1; i++) {
        if (matrix->list[i][1].frequency == 0) {
            return;
        }
        fwprintf(file, L"%ls", matrix->list[i][0].string);
        for (int j = 1; j <= matrix->x[i]; j++) {
            fwprintf(file, L",%ls,%.4f", matrix->list[i][j].string, matrix->list[i][j].frequency);
        }
        if (i != matrix->y - 1) {
            fwprintf(file, L"n");
        }
    }
}

This program is designed to read an Italian text file that may contain special characters such as ., ?, !, and '. It scans the words in the text and adds them to a CSV file, maintaining the frequency of each word. The program uses a matrix to store the words and their respective frequencies and coordinates, while a dictionary structure is used to manage the alphabet and sub-alphabets.

operationalSearch: Searches for a string in the matrix and updates occurrences and frequencies.
compareStrings: Compares two wide character strings, considering case insensitivity.
deallocate: Recursively deallocates memory for the dictionary.

Thiết kế website giá rẻ

Danh mục

Italian Text Frequency Analyzer: Handling Special Characters and Accents