AWS Glue essentially enables users to use Spark in an AWS environment. To start an AWS Glue session users should be working with a UltiHash cluster deployed on AWS, import AWS Glue and configure the S3A driver as described in the code below:
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
## @params: [JOB_NAME]
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
# Define the UltiHash endpoint URL
s3_endpoint = "<https://ultihash.cluster.io>"
sc = SparkContext()
# AWS access and secret keys could be any, since authentication is not yet supported by UltiHash
sc._jsc.hadoopConfiguration().set("fs.s3a.access.key", "mocked") # Replace with the corresponding UltiHash credentials
sc._jsc.hadoopConfiguration().set("fs.s3a.secret.key", "mocked") # Replace with the corresponding UltiHash credentials
# The S3 endpoint is a URL pointing to the deployed UltiHash cluster
sc._jsc.hadoopConfiguration().set("fs.s3a.endpoint", s3_endpoint)
# S3 path style access has to be enabled
sc._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")
sc._jsc.hadoopConfigration().set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
sc._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")
sc._jsc.hadoopConfiguration().set("fs.s3a.connection.ssl.enabled", "false")
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)