Thiết kế website giá rẻ

Question

I have this dataset

<code>mydata=structure(list(crop_name = c("Guar", "Guar", "Guar", "Guar",

"Guar", "Guar", "Guar", "Guar", "Guar", "Guar", "Bajra", "Bajra",

"Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra",

"Bajra"), B08A = c(2781L, 2817L, 2700L, 1780L, 3702L, 4094L,

3921L, 3780L, 2855L, 3501L, 2963L, 2578L, 1996L, 4502L, 4217L,

3528L, 3689L, 3529L, 3050L, 2996L), nir = c(2576L, 2607L, 2328L,

1541L, 3329L, 3681L, 3636L, 3382L, 2481L, 3174L, 2820L, 2691L,

2318L, 4229L, 3868L, 3648L, 3271L, 3244L, 2497L, 2752L), swir = c(2866L,

3589L, 2782L, 1447L, 2784L, 3084L, 3019L, 2972L, 1779L, 2687L,

3889L, 2332L, 1570L, 2574L, 2910L, 2546L, 2953L, 3176L, 3198L,

3464L), gcvi = c(0.624211853, 1.065768621, 0.897310513, 0.861111111,

1.384670487, 2.349408553, 2.246428571, 1.756316218, 1.753607103,

2.933085501, 0.923601637, 0.996290801, 2.274011299, 2.311667971,

3.084477296, 2.86031746, 2.494658119, 2.164878048, 1.824660633,

1.596226415)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,

-20L))

</code>

<code>mydata=structure(list(crop_name = c("Guar", "Guar", "Guar", "Guar", "Guar", "Guar", "Guar", "Guar", "Guar", "Guar", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra"), B08A = c(2781L, 2817L, 2700L, 1780L, 3702L, 4094L, 3921L, 3780L, 2855L, 3501L, 2963L, 2578L, 1996L, 4502L, 4217L, 3528L, 3689L, 3529L, 3050L, 2996L), nir = c(2576L, 2607L, 2328L, 1541L, 3329L, 3681L, 3636L, 3382L, 2481L, 3174L, 2820L, 2691L, 2318L, 4229L, 3868L, 3648L, 3271L, 3244L, 2497L, 2752L), swir = c(2866L, 3589L, 2782L, 1447L, 2784L, 3084L, 3019L, 2972L, 1779L, 2687L, 3889L, 2332L, 1570L, 2574L, 2910L, 2546L, 2953L, 3176L, 3198L, 3464L), gcvi = c(0.624211853, 1.065768621, 0.897310513, 0.861111111, 1.384670487, 2.349408553, 2.246428571, 1.756316218, 1.753607103, 2.933085501, 0.923601637, 0.996290801, 2.274011299, 2.311667971, 3.084477296, 2.86031746, 2.494658119, 2.164878048, 1.824660633, 1.596226415)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L)) </code>

mydata=structure(list(crop_name = c("Guar", "Guar", "Guar", "Guar", 
    "Guar", "Guar", "Guar", "Guar", "Guar", "Guar", "Bajra", "Bajra", 
    "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", "Bajra", 
    "Bajra"), B08A = c(2781L, 2817L, 2700L, 1780L, 3702L, 4094L, 
    3921L, 3780L, 2855L, 3501L, 2963L, 2578L, 1996L, 4502L, 4217L, 
    3528L, 3689L, 3529L, 3050L, 2996L), nir = c(2576L, 2607L, 2328L, 
    1541L, 3329L, 3681L, 3636L, 3382L, 2481L, 3174L, 2820L, 2691L, 
    2318L, 4229L, 3868L, 3648L, 3271L, 3244L, 2497L, 2752L), swir = c(2866L, 
    3589L, 2782L, 1447L, 2784L, 3084L, 3019L, 2972L, 1779L, 2687L, 
    3889L, 2332L, 1570L, 2574L, 2910L, 2546L, 2953L, 3176L, 3198L, 
    3464L), gcvi = c(0.624211853, 1.065768621, 0.897310513, 0.861111111, 
    1.384670487, 2.349408553, 2.246428571, 1.756316218, 1.753607103, 
    2.933085501, 0.923601637, 0.996290801, 2.274011299, 2.311667971, 
    3.084477296, 2.86031746, 2.494658119, 2.164878048, 1.824660633, 
    1.596226415)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
    -20L))

I’m trying to create a classifier that will predict which class of crop (bajra and guar) observation belong to. So the dependent variable is crop_name, the rest vars are predictors.
But I’m not just trying to create a classifier, I’m trying to automatically enumerate the hyperparameters of a random forest to achieve the accuracy I need (at least 90% by F-measure). Thus, the model must iterate over hyperparameters until the desired accuracy is achieved. (At the same time, if based on these predictors such accuracy is in principle not achievable, then provide the maximum achieved accuracy, for example it can be 70%).
Here are my attempts

<code>library(caret)

library(randomForest)

# Convert the factor variable to a numeric one

mydata$crop_name <- as.numeric(mydata$crop_name) - 1

# We split the data into training and test samples

set.seed(123)

trainIndex <- createDataPartition(mydata$crop_name, p = 0.8, list = FALSE)

trainData <- mydata[trainIndex, ]

testData <- mydata[-trainIndex, ]

# Define a function to evaluate the model

evaluate_model <- function(model, testData) {

predictions <- predict(model, testData)

confusionMatrix <- confusionMatrix(data = factor(predictions, levels = c(0, 1)),

reference = factor(testData$crop_name, levels = c(0, 1)),

positive = "1")

precision <- confusionMatrix$byClass['Pos Pred Value']

recall <- confusionMatrix$byClass['Sensitivity']

f1 <- 2 * precision * recall / (precision + recall)

return(list(precision = precision, recall = recall, f1 = f1))

}

# **Random Forest**

# Define a grid of parameters for searching

rfGrid <- expand.grid(mtry = c(2, 3, 4),

ntree = c(500, 1000, 1500))

best_rf_f1 <- 0

# Enumerate random forest parameters

for (i in 1:nrow(rfGrid)) {

# Model training

rfModel <- randomForest(crop_name ~ ., data = trainData,

mtry = rfGrid$mtry[i],

ntree = rfGrid$ntree[i])

# Model evaluation

rfMetrics <- evaluate_model(rfModel, testData)

# Save the model if the F1-measure is better than the previous ones

if (rfMetrics$f1 > best_rf_f1) {

best_rf_f1 <- rfMetrics$f1

best_rf_model <- rfModel

}

cat("Random Forest - F1-measure:", best_rf_f1, "n")

</code>

<code>library(caret) library(randomForest) # Convert the factor variable to a numeric one mydata$crop_name <- as.numeric(mydata$crop_name) - 1 # We split the data into training and test samples set.seed(123) trainIndex <- createDataPartition(mydata$crop_name, p = 0.8, list = FALSE) trainData <- mydata[trainIndex, ] testData <- mydata[-trainIndex, ] # Define a function to evaluate the model evaluate_model <- function(model, testData) { predictions <- predict(model, testData) confusionMatrix <- confusionMatrix(data = factor(predictions, levels = c(0, 1)), reference = factor(testData$crop_name, levels = c(0, 1)), positive = "1") precision <- confusionMatrix$byClass['Pos Pred Value'] recall <- confusionMatrix$byClass['Sensitivity'] f1 <- 2 * precision * recall / (precision + recall) return(list(precision = precision, recall = recall, f1 = f1)) } # **Random Forest** # Define a grid of parameters for searching rfGrid <- expand.grid(mtry = c(2, 3, 4), ntree = c(500, 1000, 1500)) best_rf_f1 <- 0 # Enumerate random forest parameters for (i in 1:nrow(rfGrid)) { # Model training rfModel <- randomForest(crop_name ~ ., data = trainData, mtry = rfGrid$mtry[i], ntree = rfGrid$ntree[i]) # Model evaluation rfMetrics <- evaluate_model(rfModel, testData) # Save the model if the F1-measure is better than the previous ones if (rfMetrics$f1 > best_rf_f1) { best_rf_f1 <- rfMetrics$f1 best_rf_model <- rfModel } } cat("Random Forest - F1-measure:", best_rf_f1, "n") </code>

library(caret)
library(randomForest)

# Convert the factor variable to a numeric one
mydata$crop_name <- as.numeric(mydata$crop_name) - 1

# We split the data into training and test samples
set.seed(123)
trainIndex <- createDataPartition(mydata$crop_name, p = 0.8, list = FALSE)
trainData <- mydata[trainIndex, ]
testData <- mydata[-trainIndex, ]

# Define a function to evaluate the model
evaluate_model <- function(model, testData) {
 predictions <- predict(model, testData)
 confusionMatrix <- confusionMatrix(data = factor(predictions, levels = c(0, 1)),
 reference = factor(testData$crop_name, levels = c(0, 1)),
 positive = "1")
 precision <- confusionMatrix$byClass['Pos Pred Value']
 recall <- confusionMatrix$byClass['Sensitivity']
 f1 <- 2 * precision * recall / (precision + recall)
 return(list(precision = precision, recall = recall, f1 = f1))
}

# **Random Forest**

# Define a grid of parameters for searching
rfGrid <- expand.grid(mtry = c(2, 3, 4),
 ntree = c(500, 1000, 1500))

best_rf_f1 <- 0

# Enumerate random forest parameters
for (i in 1:nrow(rfGrid)) {
 # Model training
 rfModel <- randomForest(crop_name ~ ., data = trainData,
 mtry = rfGrid$mtry[i],
 ntree = rfGrid$ntree[i])

 # Model evaluation
 rfMetrics <- evaluate_model(rfModel, testData)

 # Save the model if the F1-measure is better than the previous ones
 if (rfMetrics$f1 > best_rf_f1) {
 best_rf_f1 <- rfMetrics$f1
 best_rf_model <- rfModel
 }
}
cat("Random Forest - F1-measure:", best_rf_f1, "n")

however I get this error

<code>Error in if (rfMetrics$f1 > best_rf_f1) { :

missing value where TRUE/FALSE needed

In addition: Warning message:

In randomForest.default(m, y, ...) :

The response has five or fewer unique values. Are you sure you want to do regression?

</code>

<code>Error in if (rfMetrics$f1 > best_rf_f1) { : missing value where TRUE/FALSE needed In addition: Warning message: In randomForest.default(m, y, ...) : The response has five or fewer unique values. Are you sure you want to do regression? </code>

Error in if (rfMetrics$f1 > best_rf_f1) { : 
  missing value where TRUE/FALSE needed
In addition: Warning message:
In randomForest.default(m, y, ...) :
  The response has five or fewer unique values.  Are you sure you want to do regression?

What am I doing wrong? And how to correctly “force” the model
Iterate through your own hyperparameters until the desired accuracy, or the maximum possible accuracy, is achieved.
As usual, any help is greatly appreciated

Thiết kế website giá rẻ

Danh mục

Error in if (rfMetrics$f1 > best_rf_f1) { : missing value where TRUE/FALSE needed When perform Random Forest in R