I’ve created a python script to download images from google photo via API including their metadata (mainly I’m interested in the GPS location data).
I tried different options but unfortunately they are not retrieved, any help is appreciated
Here is the code of the script:
import os
import json
import requests
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient import discovery
from PIL import Image, ExifTags
import exifread
import piexif
import time
import io
import exif
from PIL.ExifTags import TAGS, GPSTAGS
# Define the scopes
SCOPES = ['https://www.googleapis.com/auth/photoslibrary.readonly']
DEBUG = 1
SHORT_EXECUTION = 1
PATH_TEMP_FOLDER= '/tmp/scripts/my-venv/photos/'
def authenticate_google_photos():
creds = None
# The file token.json stores the user's access and refresh tokens
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('client_secret.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
return creds
def get_photos_list_from_album(creds, albumID, page_size=10):
service = discovery.build('photoslibrary', 'v1', credentials = creds, static_discovery = False)
hasNextPageToken = True
nextPageToken = ""
i=0
while(hasNextPageToken):
results = service.mediaItems().search(body={"albumId": albumID, "pageSize": 100, "pageToken": nextPageToken}).execute()
if(i==0):
photos = results.get('mediaItems', [])
else:
photos = photos + results.get('mediaItems', [])
#print(f"{photos[0]}")
result = service.mediaItems().get(mediaItemId="test").execute()
metadata = result.get('mediaMetadata', {})
gps_info = metadata.get('location', {})
print(f"{metadata}")
print(f"{gps_info}")
#if (DEBUG):
#print(f"{results}")
if 'nextPageToken' in results:
hasNextPageToken = True
nextPageToken = results['nextPageToken']
else:
hasNextPageToken = False
nextPageToken = ""
i=i+1
return photos
def get_albums_list(creds, page_size=10):
service = discovery.build('photoslibrary', 'v1', credentials = creds, static_discovery = False)
hasNextPageToken = True
nextPageToken = ""
i=0
while(hasNextPageToken):
results = service.albums().list(pageSize=page_size, pageToken =nextPageToken, fields="nextPageToken,albums(id,title)").execute()
if(i==0):
albums = results.get('albums', [])
else:
albums = albums + results.get('albums', [])
if (DEBUG):
#print(f"{results}")
print(f"Reading albums: {len(albums)} identified")
if 'nextPageToken' in results:
hasNextPageToken = True
nextPageToken = results['nextPageToken']
else:
hasNextPageToken = False
nextPageToken = ""
#todo remove it
if(SHORT_EXECUTION):
hasNextPageToken = False
i=i+1
if not albums:
print('No albums found.')
else:
if (DEBUG):
print('Albums:')
for item in albums:
if (DEBUG):
print(f"{item['title'].encode('utf8')} ({item['id']})")
return albums
def download_photo(url, filename):
print("in download " +filename)
if (not(os.path.isfile(PATH_TEMP_FOLDER + filename))):
response = requests.get(url)
with open(PATH_TEMP_FOLDER + filename, 'wb') as file:
file.write(response.content)
#elif (DEBUG):
# print("File exist, skip download")
#if (filename.count(".heic")>0 or filename.count(".HEIC")>0):
# print("File heic convertion done")
def get_exif_data(photo_data):
fp = open(photo_data, "rb")
exif_image = exif.Image(fp)
result = {}
for field in exif_image.list_all():
try:
result[field] = exif_image[field]
except:
pass
exif_data = {}
gps_data = {}
image_exif = Image.open(photo_data)._getexif()
if not image_exif:
return None
# Iterate over all EXIF data
for tag, value in image_exif.items():
tag_name = TAGS.get(tag, tag)
exif_data[tag_name] = value
#if (DEBUG):
# print(f"Field: {tag_name}={value}")
# Extract GPS info if present
if tag_name == 'GPSInfo':
for gps_tag in value:
sub_tag_name = GPSTAGS.get(gps_tag, gps_tag)
gps_data[sub_tag_name] = value[gps_tag]
# print(f"gps_data: {sub_tag_name}={value[gps_tag]}")
#return gps_data if gps_data else None
return result
def extract_gps_from_image(image_path):
with open(image_path, 'rb') as f:
tags = exifread.process_file(f, details=False)
if (DEBUG):
for t in tags:
if ("GPS" in t):
print(f"tag: {t}={tags[t]}")
gps_info = get_gps_location(tags)
fields = get_exif_data(image_path)
if (DEBUG):
for f in fields:
if (("gps" or "GPS") in f):
print(f"Field: {f}={fields[f]}")
#todo remove
#time.sleep(5)
return gps_info
def get_gps_location(exif_data):
gps_info = {}
if 'GPS GPSLatitude' in exif_data and 'GPS GPSLongitude' in exif_data:
gps_latitude = exif_data['GPS GPSLatitude']
gps_latitude_ref = exif_data['GPS GPSLatitudeRef']
gps_longitude = exif_data['GPS GPSLongitude']
gps_longitude_ref = exif_data['GPS GPSLongitudeRef']
lat = convert_to_degrees(gps_latitude)
lon = convert_to_degrees(gps_longitude)
if gps_latitude_ref.values[0] != 'N':
lat = -lat
if gps_longitude_ref.values[0] != 'E':
lon = -lon
gps_info['Latitude'] = lat
gps_info['Longitude'] = lon
return gps_info
def convert_to_degrees(value):
d = float(value.values[0].num) / float(value.values[0].den)
m = float(value.values[1].num) / float(value.values[1].den)
s = float(value.values[2].num) / float(value.values[2].den)
return d + (m / 60.0) + (s / 3600.0)
def extract_photo_from_album(creds,album):
if (DEBUG):
print(f"Album: {album}")
#todo remove it
if (SHORT_EXECUTION):
photos = get_photos_list_from_album(creds,"test_Album_code")
else:
photos = get_photos_list_from_album(creds,album["id"])
if (DEBUG):
print(f"Photos identified: {len(photos)}")
extract_photo_metadata(photos)
def extract_photo_metadata(photos):
gps_data = {}
for photo in photos:
print(f"Photo: {photo}")
#TODO check
#photo_url = photo['baseUrl'] + "=d"
photo_url = photo['baseUrl']
photo_filename = photo['filename']
#photo_filename = photo_filename.replace('.HEIC', '.jpg')
if (not (photo_filename.count(".MOV")>0
or photo_filename.count(".mov")>0
or photo_filename.count(".mp4")>0
or photo_filename.count(".MP4")>0 )):
#print(f"Photo: {photo_filename}, URL: {photo_url}")
download_photo(photo_url, photo_filename)
if (DEBUG):
print(f"Photo name: {photo['filename']}")
gps_info = extract_gps_from_image(PATH_TEMP_FOLDER + photo_filename)
if gps_info:
gps_data[photo_filename] = gps_info
#os.remove(PATH_TEMP_FOLDER+photo_filename) # Remove the downloaded photo
# Print the GPS locations
for photo, location in gps_data.items():
if (DEBUG):
print(f"Photo: {photo}, Location: {location}")
def main():
creds = authenticate_google_photos()
albums = get_albums_list(creds)
for album in albums:
extract_photo_from_album(creds,album)
exit(0)
if __name__ == '__main__':
main()
that if I use
photo_url = photo['baseUrl'] + "=d"
with =d
parameter, it downloads all the metadata except the geolocation,
while without the d parameter, no metadata are exported. Here below the documentation from google where there is not the parameter for all Metadata export
https://developers.google.com/photos/library/guides/access-media-items
many thanks in advance
download gps meta info from photo