Create Spark cluster and run custom jar on it.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
aws emr create-cluster --release-label emr-4.5.0 --name mySparkCluster --instance-type r3.2xlarge --instance-count 3 --tags MyTag1:MyValue1 MyTag2:MyValue2 --enable-debugging --ec2-attributes KeyName=MyKeyPair --use-default-roles --log-uri s3://my-bucket/logs --applications Name=Spark --configurations file:///home/dmitry/projects/mySparkApp/spark-config.json --steps Type=CUSTOM_JAR, Name=MySparkApp, Jar=command-runner.jar, Args=[spark-submit, --class,myApps.MySparkApp, s3://myBucket/jars/mySparkApp-0.0.1-SNAPSHOT.jar, --commandLineParam1=value1, --commandLineParam2=value2] --auto-terminate |
spark-config.json file:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
[{ "Classification":"spark-defaults", "Properties":{ "spark.dynamicAllocation.enabled":"false", "spark.executor.instances":"240", "spark.master":"yarn-cluster", "spark.executor.memory":"8g", "spark.driver.maxResultSize":"0" } }, { "Classification": "hadoop-env", "Configurations": [ { "Classification": "export", "Configurations": [], "Properties": { "JAVA_HOME": "/usr/lib/jvm/java-1.8.0" } } ], "Properties": {} }, { "Classification": "spark-env", "Configurations": [ { "Classification": "export", "Configurations": [], "Properties": { "JAVA_HOME": "/usr/lib/jvm/java-1.8.0" } } ], "Properties": {} } ] |