How to split words in a text using PySpark
# Import your libraries
from pyspark.sql.functions import *
# Start writing code
split_words=google_file_store.withColumn('words',split(col('contents'),' ')).select(explode(col("words")).alias("word"))
output=split_words.filter((col('word')=='bear') | (col('word')=='bull'))
output=output.groupBy(col('word')).agg(count('*').alias('netry'))
# To validate your solution, convert your final pySpark df to a pandas df
output.toPandas()
Comments
Post a Comment