I am working on a large dataset. Keeping in the mind that LLM will hallucinate, I have divided the data into the chunks. Now I am able to get the answer in one of the chunk .
For example in one chunk
Q – Where is London?
A – London is in the United Kingdom
other chunk
A – There is no result for london (which is correct)
How should I go forward from here to get only the accurate answer from the chunks ? I have tried few concepts but it didn’t work very well for me. I have used semantic kernel.
<code>var kernelBuilder = Kernel.CreateBuilder();
//Connecting mysql to ollama
#pragma warning disable SKEXP0010
var kernel = kernelBuilder
.AddOpenAIChatCompletion(
modelId: "llama3:8b",
apiKey: "",
endpoint: new Uri("")
)
.Build();
// Create the chat service
var aiModel = kernel.GetRequiredService<IChatCompletionService>();
// MySQL connection configuration
var connectionString = ""; // Replace with actual values
using var connection = new MySql.Data.MySqlClient.MySqlConnection(connectionString);
try
{
await connection.OpenAsync();
Console.WriteLine("Connection to the database was successful.");
}
catch (Exception ex)
{
Console.WriteLine($"Error connecting to the database: {ex.Message}");
return;
}
string query = @"SELECT * FROM country";
//Helper method to convert DataTable to List of Dictionary
List<Dictionary<string, object>> DataTableToList(DataTable table)
{
var result = new List<Dictionary<string, object>>();
foreach (DataRow row in table.Rows)
{
var dict = new Dictionary<string, object>();
foreach (DataColumn col in table.Columns)
{
dict[col.ColumnName] = row[col];
}
result.Add(dict);
}
return result;
}
string jsonResult;
using (var command = new MySql.Data.MySqlClient.MySqlCommand(query, connection))
{
using (var reader = await command.ExecuteReaderAsync())
{
var dataTable = new DataTable();
dataTable.Load(reader);
var dataList = DataTableToList(dataTable);
jsonResult = JsonSerializer.Serialize(dataList);
}
}
//Split the JSON data into chunks of 10 rows each
List<string> SplitJsonIntoChunks(string json, int chunkSize = 100)
{
var dataList = JsonSerializer.Deserialize<List<Dictionary<string, object>>>(json);
var chunks = new List<string>();
for (int i = 0; i < dataList.Count; i += chunkSize)
{
var chunk = dataList.Skip(i).Take(chunkSize).ToList();
chunks.Add(JsonSerializer.Serialize(chunk));
}
return chunks;
}
var jsonChunks = SplitJsonIntoChunks(jsonResult);
var aiResponses = new List<Dictionary<string, object>>();
var relevantDataList = new List<Dictionary<string, object>>();
int chunkCounter = 1;
while (true)
{
Console.Write("Your question: ");
var question = Console.ReadLine();
StringBuilder sb = new();
foreach (var chunk in jsonChunks)
{
//Include the JSON data in the question to provide context for the AI model
var prompt =
$"You are a highly helpful AI assistant with the current date and time. Below is a dataset in JSON format:{chunk}" +
$"Based on this dataset, please answer the following question:{question}"
var arguments = new OpenAIPromptExecutionSettings { MaxTokens = 200, Temperature = 0 };
//Stream the response back async
await foreach (var message in aiModel.GetStreamingChatMessageContentsAsync(prompt, arguments))
{
sb.Append(message.Content);
}
// Create a dictionary for the AI response
var aiResponseDict = new Dictionary<string, object>
{
{ "ChunkNumber", chunkCounter },
{ "Question", question },
{ "Response", sb.ToString()
}
};
// Convert the dictionary to a JSON string and display it
var responseJson = JsonSerializer.Serialize(aiResponseDict, new JsonSerializerOptions { WriteIndented = true });
Console.WriteLine(responseJson);
chunkCounter ++;
</code>
<code>var kernelBuilder = Kernel.CreateBuilder();
//Connecting mysql to ollama
#pragma warning disable SKEXP0010
var kernel = kernelBuilder
.AddOpenAIChatCompletion(
modelId: "llama3:8b",
apiKey: "",
endpoint: new Uri("")
)
.Build();
// Create the chat service
var aiModel = kernel.GetRequiredService<IChatCompletionService>();
// MySQL connection configuration
var connectionString = ""; // Replace with actual values
using var connection = new MySql.Data.MySqlClient.MySqlConnection(connectionString);
try
{
await connection.OpenAsync();
Console.WriteLine("Connection to the database was successful.");
}
catch (Exception ex)
{
Console.WriteLine($"Error connecting to the database: {ex.Message}");
return;
}
string query = @"SELECT * FROM country";
//Helper method to convert DataTable to List of Dictionary
List<Dictionary<string, object>> DataTableToList(DataTable table)
{
var result = new List<Dictionary<string, object>>();
foreach (DataRow row in table.Rows)
{
var dict = new Dictionary<string, object>();
foreach (DataColumn col in table.Columns)
{
dict[col.ColumnName] = row[col];
}
result.Add(dict);
}
return result;
}
string jsonResult;
using (var command = new MySql.Data.MySqlClient.MySqlCommand(query, connection))
{
using (var reader = await command.ExecuteReaderAsync())
{
var dataTable = new DataTable();
dataTable.Load(reader);
var dataList = DataTableToList(dataTable);
jsonResult = JsonSerializer.Serialize(dataList);
}
}
//Split the JSON data into chunks of 10 rows each
List<string> SplitJsonIntoChunks(string json, int chunkSize = 100)
{
var dataList = JsonSerializer.Deserialize<List<Dictionary<string, object>>>(json);
var chunks = new List<string>();
for (int i = 0; i < dataList.Count; i += chunkSize)
{
var chunk = dataList.Skip(i).Take(chunkSize).ToList();
chunks.Add(JsonSerializer.Serialize(chunk));
}
return chunks;
}
var jsonChunks = SplitJsonIntoChunks(jsonResult);
var aiResponses = new List<Dictionary<string, object>>();
var relevantDataList = new List<Dictionary<string, object>>();
int chunkCounter = 1;
while (true)
{
Console.Write("Your question: ");
var question = Console.ReadLine();
StringBuilder sb = new();
foreach (var chunk in jsonChunks)
{
//Include the JSON data in the question to provide context for the AI model
var prompt =
$"You are a highly helpful AI assistant with the current date and time. Below is a dataset in JSON format:{chunk}" +
$"Based on this dataset, please answer the following question:{question}"
var arguments = new OpenAIPromptExecutionSettings { MaxTokens = 200, Temperature = 0 };
//Stream the response back async
await foreach (var message in aiModel.GetStreamingChatMessageContentsAsync(prompt, arguments))
{
sb.Append(message.Content);
}
// Create a dictionary for the AI response
var aiResponseDict = new Dictionary<string, object>
{
{ "ChunkNumber", chunkCounter },
{ "Question", question },
{ "Response", sb.ToString()
}
};
// Convert the dictionary to a JSON string and display it
var responseJson = JsonSerializer.Serialize(aiResponseDict, new JsonSerializerOptions { WriteIndented = true });
Console.WriteLine(responseJson);
chunkCounter ++;
</code>
var kernelBuilder = Kernel.CreateBuilder();
//Connecting mysql to ollama
#pragma warning disable SKEXP0010
var kernel = kernelBuilder
.AddOpenAIChatCompletion(
modelId: "llama3:8b",
apiKey: "",
endpoint: new Uri("")
)
.Build();
// Create the chat service
var aiModel = kernel.GetRequiredService<IChatCompletionService>();
// MySQL connection configuration
var connectionString = ""; // Replace with actual values
using var connection = new MySql.Data.MySqlClient.MySqlConnection(connectionString);
try
{
await connection.OpenAsync();
Console.WriteLine("Connection to the database was successful.");
}
catch (Exception ex)
{
Console.WriteLine($"Error connecting to the database: {ex.Message}");
return;
}
string query = @"SELECT * FROM country";
//Helper method to convert DataTable to List of Dictionary
List<Dictionary<string, object>> DataTableToList(DataTable table)
{
var result = new List<Dictionary<string, object>>();
foreach (DataRow row in table.Rows)
{
var dict = new Dictionary<string, object>();
foreach (DataColumn col in table.Columns)
{
dict[col.ColumnName] = row[col];
}
result.Add(dict);
}
return result;
}
string jsonResult;
using (var command = new MySql.Data.MySqlClient.MySqlCommand(query, connection))
{
using (var reader = await command.ExecuteReaderAsync())
{
var dataTable = new DataTable();
dataTable.Load(reader);
var dataList = DataTableToList(dataTable);
jsonResult = JsonSerializer.Serialize(dataList);
}
}
//Split the JSON data into chunks of 10 rows each
List<string> SplitJsonIntoChunks(string json, int chunkSize = 100)
{
var dataList = JsonSerializer.Deserialize<List<Dictionary<string, object>>>(json);
var chunks = new List<string>();
for (int i = 0; i < dataList.Count; i += chunkSize)
{
var chunk = dataList.Skip(i).Take(chunkSize).ToList();
chunks.Add(JsonSerializer.Serialize(chunk));
}
return chunks;
}
var jsonChunks = SplitJsonIntoChunks(jsonResult);
var aiResponses = new List<Dictionary<string, object>>();
var relevantDataList = new List<Dictionary<string, object>>();
int chunkCounter = 1;
while (true)
{
Console.Write("Your question: ");
var question = Console.ReadLine();
StringBuilder sb = new();
foreach (var chunk in jsonChunks)
{
//Include the JSON data in the question to provide context for the AI model
var prompt =
$"You are a highly helpful AI assistant with the current date and time. Below is a dataset in JSON format:{chunk}" +
$"Based on this dataset, please answer the following question:{question}"
var arguments = new OpenAIPromptExecutionSettings { MaxTokens = 200, Temperature = 0 };
//Stream the response back async
await foreach (var message in aiModel.GetStreamingChatMessageContentsAsync(prompt, arguments))
{
sb.Append(message.Content);
}
// Create a dictionary for the AI response
var aiResponseDict = new Dictionary<string, object>
{
{ "ChunkNumber", chunkCounter },
{ "Question", question },
{ "Response", sb.ToString()
}
};
// Convert the dictionary to a JSON string and display it
var responseJson = JsonSerializer.Serialize(aiResponseDict, new JsonSerializerOptions { WriteIndented = true });
Console.WriteLine(responseJson);
chunkCounter ++;
2