I am new to Google Colab. I have created the following simple Python program which will count from 0 to 100000000 with help of multiple threads. (Because GPUs and TPUs can handle thousands of threads)
<code>import threading;
import tensorflow as tf;
threadCount = 5000;
def count(threadId):
with tf.device('/device:GPU:0'):
i = threadId;
while i < 100000000:
i += threadCount;
def main():
with tf.device('/device:GPU:0'):
threadList = [];
for threadId in range(threadCount):
threadList.append(
threading.Thread(target=count, args=(threadId, ))
)
threadList[threadId].start();
for thread in threadList:
thread.join();
print("Complete");
main();
</code>
<code>import threading;
import tensorflow as tf;
threadCount = 5000;
def count(threadId):
with tf.device('/device:GPU:0'):
i = threadId;
while i < 100000000:
i += threadCount;
def main():
with tf.device('/device:GPU:0'):
threadList = [];
for threadId in range(threadCount):
threadList.append(
threading.Thread(target=count, args=(threadId, ))
)
threadList[threadId].start();
for thread in threadList:
thread.join();
print("Complete");
main();
</code>
import threading;
import tensorflow as tf;
threadCount = 5000;
def count(threadId):
with tf.device('/device:GPU:0'):
i = threadId;
while i < 100000000:
i += threadCount;
def main():
with tf.device('/device:GPU:0'):
threadList = [];
for threadId in range(threadCount):
threadList.append(
threading.Thread(target=count, args=(threadId, ))
)
threadList[threadId].start();
for thread in threadList:
thread.join();
print("Complete");
main();
Then I changed to google colab runtime to TPU v2 and ran the code hoping that this code will run super fast on TPUs.
But there was no performance increase and I realized that this code did not ran on TPUs.
What should I do to run this code on GPU threads or TPU threads so that this code complete super fast?