I am running the same instructions every time but on different user input, so I am using the beta cache feature to reduce the input token amount
No matter how I structure my prompts It will not properly cache, I have only been able to go from 1700 input tokens to 1600.
My goal is to keep the instructions and more_info in the cache.
Parts of the code are filled with random bits for privacy.
from config import ANTHROPIC_API_KEY
import json
import time
import anthropic
import pandas as pd
client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
# Load the full dataset
input_file = "data.csv"
output_file = "data.json"
df = pd.read_csv(input_file, header=None)
df.columns = ["1", "2"]
# Define the system prompt to set Claude's role
system_prompt = """
insert here
"""
# Define the constant parts of the user prompt
instructions = """
long instructions
"""
more_info = """
a lot of info
"""
# Define the output format and prefill Claude's response
PREFILL = """
{
prefill
}
"""
def get_completion(task: str, system_prompt="", prefill=None):
response = client.beta.prompt_caching.messages.create(
model="claude-3-5-sonnet-20240620",
system=system_prompt.strip(),
max_tokens=1000,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": f"{instructions}nn{more_info}",
"cache_control": {"type": "ephemeral"},
},
{
"type": "text",
"text": f"<task>{task}</task>",
},
{
"type": "text",
"text": "short sentence",
},
],
}
],
temperature=0.0,
)
return response.content[0].text.strip()
def main():
try:
start_time_total = time.time()
tasks_output = []
for idx, row in df.iterrows():
task_id = row["Task ID"]
task_description = row["Task Description"]
if task_description:
start_time = time.time()
assistant_reply = get_completion(
task=task,
system_prompt=system_prompt,
prefill=PREFILL,
)
end_time = time.time()
# Try to parse the assistant's reply as JSON
try:
response_json = json.loads(assistant_reply)
# Add the task ID to the JSON output
task_json = {
"Task Number": task_id,
**response_json,
}
tasks_output.append(task_json)
print(
f"Processed task {task_id}: {task_description} (Time taken: {end_time - start_time:.2f} seconds)"
)
except json.JSONDecodeError:
print(
f"Error: Invalid JSON response for task {task_id}. Moving on..."
)
print(f"Raw Model Output: {assistant_reply}")
task_json = {
"Task Number": task_id,
"Invalid JSON Response": assistant_reply,
}
tasks_output.append(task_json)
# Save the results to the output JSON file
with open(output_file, "w") as outfile:
json.dump(tasks_output, outfile, indent=4)
end_time_total = time.time()
time_taken_total = end_time_total - start_time_total
minutes, seconds = divmod(time_taken_total, 60)
print(
f"All tasks processed. Results saved to {output_file} nTime taken: {int(minutes)} minutes and {seconds:.2f} seconds"
)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()
I have tried structuring the prompts in various ways, combining all into one static prompt and caching it. No luck.
henryarthur is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.