I’m trying to use websocket in python to get all comments from a video but for some reason some times the response comes broken or my code breaks it as shown here at “updatedAt”: “20
90
24-05-23T04:32:43Z”. this should come in one single line but its breaking line and my code :
{
"kind": "youtube#commentThread",
"etag": "tDBWc_n2heSBY8O1O5pmnreVtLs",
"id": "Ugx44hu-DqxbmnInPQp4AaABAg",
"snippet": {
"channelId": "UCBJycsmduvYEL83R_U4JriQ",
"videoId": "1KEtxTQUzxY",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "uTgV5p5VmHXEqonQjGj4nXMh2m8",
"id": "Ugx44hu-DqxbmnInPQp4AaABAg",
"snippet": {
"channelId": "UCBJycsmduvYEL83R_U4JriQ",
"videoId": "1KEtxTQUzxY",
"textDisplay": "I wonder if this would be a good product for people who want to practice rollerskating but don't have lots of space ????",
"textOriginal": "I wonder if this would be a good product for people who want to practice rollerskating but don't have lots of space ????",
"authorDisplayName": "@denden8372",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AIdro_k1FwzFYDNQwpt1EpUQ_ghYZQrg5uxdzopNsKu3i9MGFkU=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/@denden8372",
"authorChannelId": {
"value": "UCRuGL6tDdqJZXekXlali8hQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2024-05-23T04:31:57Z",
"updatedAt": "20
90
24-05-23T04:32:43Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
}
]
}
I’m connecting to the port 443 of youtube api, receiving the response in bytes, decoding in utf-8 and trying to parse it to json but, with the example above, json.load can’t do it. Here’s my code:
import socket
import ssl
import json
import re
# Your YouTube API key
API_KEY = 'AIzaSyBYEN3s2iRoDREu4qK3ieS1fTACdbsjXkc'
VIDEO_ID = '1KEtxTQUzxY'
HOST = 'www.googleapis.com'
PORT = 443
def create_request(video_id, page_token=None):
base_url = f'/youtube/v3/commentThreads?part=snippet&videoId={video_id}&key={API_KEY}&maxResults=20'
if page_token:
base_url += f'&pageToken={page_token}'
request = f"GET {base_url} HTTP/1.1rnHost: {HOST}rnConnection: closernrn"
return request
def fetch_comments(video_id):
comments = []
page_token = None
while True:
request = create_request(video_id, page_token)
# Create a socket and wrap it with SSL for HTTPS
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
context = ssl.create_default_context()
with context.wrap_socket(sock, server_hostname=HOST) as ssock:
ssock.connect((HOST, PORT))
ssock.sendall(request.encode('utf-8'))
response = b''
while True:
data = ssock.recv(4096)
if not data:
break
response += data
response = response.decode('utf-8')
headers, body =re.split('rnrn....rn',response)
body = body.rstrip('0rn')
json_body = json.loads(body)
for item in json_body.get('items', []):
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(comment)
page_token = json_body.get('nextPageToken')
if not page_token:
break
return comments
if __name__ == "__main__":
all_comments = fetch_comments(VIDEO_ID)
for comment in all_comments:
print(comment)
I’ve tried rewriting the code 20 times but i cant solve this.
Bruno Augusto is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.