So I am working on a project where I have to make a code where it can read a txt file and then it will store the words of the file and then count the occurrence of words in the file and then print out the 10 most occurring, 10 least occurring and unique words(words that appear once, least and unique will overlap but that is fine) the main issue is the my wordCount function which is meant to go through the array I have and then record the occurrence of each word into a counter array ie if the word “The” is in the file and appeared 10 times “The” would have a corresponding counter of 10. Additionally is that I am not allowed to use string. Anyway here is the code that I am having trouble with
#include <iostream>
#include <fstream>
#include <algorithm>
using namespace std;
char word[500000];
char wordlist[50000][20];
int counter[50000];
// ensures all char are lower case
void wordLow(char word[]){
for(int j=0; j<50000;j++){
tolower(word[j]);
}
cout<<"Complete"<<endl;
}
//removes punctuation
void removPunc(char word[]){
for(int j=0; j<50000;j++){
if(ispunct(word[j])){
word[j]=word[j+1];
}
}
cout<<"Complete"<<endl;
}
// wordcompo is to compile words into a proper 2d array where to make a word this is done by making sure that the current n is a whitespace and the next position is a alphabetical
// character then int m will be decreased till the begining of the word is found and then in the next for loop s = m and then increases to make the word.
void wordComp(char word[]){
int e=0;
int m=0;
int s=0;
for(int n=0;n<50000;n++){
if(isspace(word[n])&&isalpha(word[n+1])){
m=n-1;
while(!isspace(word[m])){
m--;
if(m==0){
break;
}
}
for(s=m;s<n;s++){
wordlist[e][s] = word[s];
}
e++;
}
}
cout<<"Complete"<<endl;
}
// counts the occurence of words
void wordCount(char wordlist[][20],char word[]){
for(int v=0;v<50000;v++){
// if(isspace(wordlist[v][])==1){
counter[v]++;
//}
}
cout<<"Complete"<<endl;
}
//prints the 10 highest occuring words
void printHigh(int counter[],char wordlist[][20]){
int y=0;
cout << "Most occuring words in file: "<<endl;
for(int k=0;k<50000;k++){
if(counter[k]>50){
for(int l=0;l<20;l++){
if(isalpha(wordlist[k][l])){
cout << wordlist[k][l];
}
cout << "has occured "<< counter[k] <<" times"<<endl;
y++;
}
}
if(y==10){
break;
}
}
}
// pritns the ten lowest occuring words
void printLow(int counter[],char wordlist[][20]){
int z=0;
cout << "Least occuring words in file: "<<endl;
for(int c=0;c<50000;c++){
if(counter[c]==1){
for(int h=0;h<20;h++){
if(isalpha(wordlist[c][h])){
cout << wordlist[c][h];
}
}
cout << " has occured "<< counter[c] <<" times"<<endl;
z++;
}
if(z==10){
break;
}
}
}
// prints all words the occur once
void printUnq(int counter[],char wordlist[][20]){
int p;
cout << "Unique words in file: "<<endl;
for(int f=0;f<50000;f++){
if(counter[f]==1){
for(p=0;p<20;p++){
if(isalpha(wordlist[f][p])){
cout << wordlist[f][p];
}
if(isspace(wordlist[f][p])){
cout << " has occured "<< counter[f] <<" times"<<endl;
}
}
}else if(isspace(wordlist[f][p])){
break;
}
}
}
int main(){
char filename[30];
ifstream fin;
int i = 0;
int a = 0;
char text;
cout << "Please enter the file name: ";
cin >> filename;
fin.open(filename);
if(!fin){
cerr << "Error opening file";
return 0;
}
else{
for (i=0;i < 500000; ++i){
fin >> noskipws >>word[i];
}
fin.close();
removPunc(word);
wordLow(word);
wordComp(word);
wordCount(wordlist,word);
printHigh(counter,wordlist);
printLow(counter,wordlist);
printUnq(counter,wordlist);
}
return 0;
}
What I have at the moment is a function that just goes through my wordList array and just results in every part of counter having the element of 1 what I want it to do is that whenever it sees a word it will check if it is already been checked or if it is new, if new it will increase a new element by 1 if it is already done then it will increase the counter that was associated with that word
Example new word
“bob” goes is the next word, there is no “bob” prior to it so new word counter at a new position increases and is then the “bob” counter
Example recurring word
“and” is the next word, “and” has occurred before the counter for “and” would increase
If anyone can help this mess of a situation that would be appreciated