New Project Code
New Spark Project Code
Section titled “New Spark Project Code”All the imports and UDFs togther:
Section titled “All the imports and UDFs togther:”from pyspark.sql.functions import sum, max, min, first, when, col, lag, lead, count, lit, month, year, to_date, concat_ws, last_day, row_number, desc_nulls_last, substring_indexfrom pyspark.sql.window import Windowfrom pyspark.sql.types import StringType, DateType, TimestampTypeimport pandas as pdimport re
def renameAllColumns(df): oldColumnNames = df.columns newColumnNames = list(map(lambda name: name[0].casefold() + re.sub('[^a-zA-Z0-9\n\.]', '', name[1:]), oldColumnNames)) dfNew = df
for i in range(0, len(oldColumnNames)): dfNew = dfNew.withColumnRenamed(oldColumnNames[i], newColumnNames[i])
return dfNew
Just imports
Section titled “Just imports”from pyspark.sql.functions import sum, max, min, first, when, col, lag, lead, count, lit, month, year, to_date, concat_ws, last_day, row_number, desc_nulls_last, substring_indexfrom pyspark.sql.window import Windowfrom pyspark.sql.types import StringType, DateType, TimestampTypeimport pandas as pdimport re
Rename all columns with no special characters
Section titled “Rename all columns with no special characters”import re
def renameAllColumns(df): oldColumnNames = df.columns newColumnNames = list(map(lambda name: name[0].casefold() + re.sub('[^a-zA-Z0-9\n\.]', '', name[1:]), oldColumnNames)) dfNew = df
for i in range(0, len(oldColumnNames)): dfNew = dfNew.withColumnRenamed(oldColumnNames[i], newColumnNames[i])
return dfNew