I am using the following to download images from URLs.
async def get(url, session, text):
try:
async with session.get(url=url) as response:
if response.status == 200:
resp = await response.read()
res = ''.join(random.choices(string.ascii_lowercase +
string.digits, k=8))
text_dict[res] = text
url_dict[res] = url
#print("Successfully got url {} with resp of length {}.".format(url, len(resp)))
f = await aiofiles.open(res+'.jpg', mode='wb')
await f.write(resp)
await f.close()
except Exception as e:
print("Unable to get url {} due to {}.".format(url, e.__class__))
async def main(urls, text):
connector = aiohttp.TCPConnector(limit=500)
async with aiohttp.ClientSession(connector=connector) as session:
ret = await asyncio.gather(*(get(url, session, text[ind]) for ind, url in enumerate(urls)))
print("Finalized all. Return is a list of len {} outputs.".format(len(ret)))
start = time.time()
await main(smaller_url, smaller_text)
end = time.time()
print("Took {} seconds to pull {} websites.".format(end - start, len(smaller_url)))
I have a list of 100000 URLs (variable smaller_url
). The code seems to complete with only a 7000 downloads and I’m unable to debug if this issue is due to the TCPConnector
limits or if its due to something else.