I just installed spark on my local computer and try to run some simple code to test if everything is good, but it keeps showing me: Python worker exited unexpectedly (crashed). this is the code I’m running:
from pyspark.rdd import RDD
def print_partitions(data):
if isinstance(data, RDD):
numPartitions = data.getNumPartitions()
partitions = data.glom().collect()
else:
numPartitions = data.rdd.getNumPartitions()
partitions = data.rdd.glom().collect()
print(f"####### NUMBER OF PARTITIONS: {numPartitions}")
for index, partition in enumerate(partitions):
# show partition if it is not empty
if len(partition) > 0:
print(f"Partition {index}: {len(partition)} records")
print(partition)
list_tutors = [(1,'Aaditya'),(2,'Chinnavit'),(3,'Neha'),(4,'Huashun'),(5,'Mohammad'),
(10,'Peter'),(11,'Paras'),(12, 'Tooba'),(3, 'David'),(18,'Cheng'),(9,'Haqqani')]
no_of_partitions = 4
rdd = sc.parallelize(list_tutors, no_of_partitions)
print("Number of partitions:{}".format(rdd.getNumPartitions()))
print("Partitioner:{}".format(rdd.partitioner))
print_partitions(rdd)
New contributor
QH F is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.