I’m working on a Python project where I use OpenAI’s GPT-4 API to analyze data from screenshots and PDFs.
My goal is to send the request with the screenshot and the PDF (Both in a single request) to the gpt-4 api.
The part with the screenshot sending works and I get no errors when I also attach the pdf. But it does not take into account anything written in that PDF.
Here are the relevant parts of the code:
<code>from PyPDF2 import PdfReader
</code>
<code>from PyPDF2 import PdfReader
</code>
from PyPDF2 import PdfReader
<code>def encode_pdf_to_base64(pdf_path):
"""
Encodes PDF to base64
"""
try:
with open(pdf_path, "rb") as pdf_file:
return base64.b64encode(pdf_file.read()).decode('utf-8')
except FileNotFoundError:
print(f"Error: The file '{pdf_path}' was not found.")
return None
</code>
<code>def encode_pdf_to_base64(pdf_path):
"""
Encodes PDF to base64
"""
try:
with open(pdf_path, "rb") as pdf_file:
return base64.b64encode(pdf_file.read()).decode('utf-8')
except FileNotFoundError:
print(f"Error: The file '{pdf_path}' was not found.")
return None
</code>
def encode_pdf_to_base64(pdf_path):
"""
Encodes PDF to base64
"""
try:
with open(pdf_path, "rb") as pdf_file:
return base64.b64encode(pdf_file.read()).decode('utf-8')
except FileNotFoundError:
print(f"Error: The file '{pdf_path}' was not found.")
return None
<code>def analyze_with_pdf_and_screenshot(screenshot_path, pdf_path=None):
"""
Analyzes the Screenshot with the help of the PDF
"""
base64_image = encode_image(screenshot_path)
base64_pdf = encode_pdf_to_base64(pdf_path) if pdf_path else None
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze the given Screenshot with the help of the attached PDF and answer with its help"
},
# Also tried this one and it didnt work
#{
# "type": "file",
# "file": {
# "name": os.path.basename(pdf_path),
# "type": "application/pdf",
# "content": base64_pdf
# }
#},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
],
"files": [
{
"name": os.path.basename(pdf_path),
"type": "application/pdf",
"content": base64_pdf
}
]
}
],
"max_tokens": 2500
}
try:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
result = response.json()
print("API-Answer:", json.dumps(result, indent=2))
return ""
except requests.RequestException as e:
print(f"Error analyzing file: {e}")
return ""
</code>
<code>def analyze_with_pdf_and_screenshot(screenshot_path, pdf_path=None):
"""
Analyzes the Screenshot with the help of the PDF
"""
base64_image = encode_image(screenshot_path)
base64_pdf = encode_pdf_to_base64(pdf_path) if pdf_path else None
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze the given Screenshot with the help of the attached PDF and answer with its help"
},
# Also tried this one and it didnt work
#{
# "type": "file",
# "file": {
# "name": os.path.basename(pdf_path),
# "type": "application/pdf",
# "content": base64_pdf
# }
#},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
],
"files": [
{
"name": os.path.basename(pdf_path),
"type": "application/pdf",
"content": base64_pdf
}
]
}
],
"max_tokens": 2500
}
try:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
result = response.json()
print("API-Answer:", json.dumps(result, indent=2))
return ""
except requests.RequestException as e:
print(f"Error analyzing file: {e}")
return ""
</code>
def analyze_with_pdf_and_screenshot(screenshot_path, pdf_path=None):
"""
Analyzes the Screenshot with the help of the PDF
"""
base64_image = encode_image(screenshot_path)
base64_pdf = encode_pdf_to_base64(pdf_path) if pdf_path else None
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze the given Screenshot with the help of the attached PDF and answer with its help"
},
# Also tried this one and it didnt work
#{
# "type": "file",
# "file": {
# "name": os.path.basename(pdf_path),
# "type": "application/pdf",
# "content": base64_pdf
# }
#},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
],
"files": [
{
"name": os.path.basename(pdf_path),
"type": "application/pdf",
"content": base64_pdf
}
]
}
],
"max_tokens": 2500
}
try:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
result = response.json()
print("API-Answer:", json.dumps(result, indent=2))
return ""
except requests.RequestException as e:
print(f"Error analyzing file: {e}")
return ""
New contributor
Nico Morningstar is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.