Thiết kế website giá rẻ

Question

I am running the same instructions every time but on different user input, so I am using the beta cache feature to reduce the input token amount

No matter how I structure my prompts It will not properly cache, I have only been able to go from 1700 input tokens to 1600.

My goal is to keep the instructions and more_info in the cache.

Parts of the code are filled with random bits for privacy.

from config import ANTHROPIC_API_KEY
import json
import time
import anthropic
import pandas as pd

client = anthropic.Client(api_key=ANTHROPIC_API_KEY)

# Load the full dataset
input_file = "data.csv"
output_file = "data.json"

df = pd.read_csv(input_file, header=None)
df.columns = ["1", "2"]

# Define the system prompt to set Claude's role
system_prompt = """
insert here
"""

# Define the constant parts of the user prompt
instructions = """
long instructions
"""

more_info = """
a lot of info
"""

# Define the output format and prefill Claude's response
PREFILL = """
{
    prefill
}
"""


def get_completion(task: str, system_prompt="", prefill=None):
    response = client.beta.prompt_caching.messages.create(
        model="claude-3-5-sonnet-20240620",
        system=system_prompt.strip(),
        max_tokens=1000,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"{instructions}nn{more_info}",
                        "cache_control": {"type": "ephemeral"},
                    },
                    {
                        "type": "text",
                        "text": f"<task>{task}</task>",
                    },
                    {
                        "type": "text",
                        "text": "short sentence",
                    },
                ],
            }
        ],
        temperature=0.0,
    )
    return response.content[0].text.strip()


def main():
    try:
        start_time_total = time.time()
        tasks_output = []

        for idx, row in df.iterrows():
            task_id = row["Task ID"]
            task_description = row["Task Description"]

            if task_description:
                start_time = time.time()

                assistant_reply = get_completion(
                    task=task,
                    system_prompt=system_prompt,
                    prefill=PREFILL,
                )

                end_time = time.time()

                # Try to parse the assistant's reply as JSON
                try:
                    response_json = json.loads(assistant_reply)

                    # Add the task ID to the JSON output
                    task_json = {
                        "Task Number": task_id,
                        **response_json,
                    }

                    tasks_output.append(task_json)

                    print(
                        f"Processed task {task_id}: {task_description} (Time taken: {end_time - start_time:.2f} seconds)"
                    )

                except json.JSONDecodeError:
                    print(
                        f"Error: Invalid JSON response for task {task_id}. Moving on..."
                    )
                    print(f"Raw Model Output: {assistant_reply}")

                    task_json = {
                        "Task Number": task_id,
                        "Invalid JSON Response": assistant_reply,
                    }
                    tasks_output.append(task_json)

        # Save the results to the output JSON file
        with open(output_file, "w") as outfile:
            json.dump(tasks_output, outfile, indent=4)

        end_time_total = time.time()
        time_taken_total = end_time_total - start_time_total
        minutes, seconds = divmod(time_taken_total, 60)
        print(
            f"All tasks processed. Results saved to {output_file} nTime taken: {int(minutes)} minutes and {seconds:.2f} seconds"
        )

    except Exception as e:
        print(f"Error: {e}")


if __name__ == "__main__":
    main()

I have tried structuring the prompts in various ways, combining all into one static prompt and caching it. No luck.

Thiết kế website giá rẻ

Danh mục

How to properly use claude API Cache to store prompt