We have spark application which is running on cluster. After added the new spark worker its started throwing this error,
Job aborted due to stage failure: Task 0 in stage 9903.0 failed 4 times, most recent failure: Lost task 0.3 in stage 9903.0 (TID 32740, 156.140.6.71, executor 5): java.lang.NoClassDefFoundError: Could not initialize class org.xerial.snappy.Snappy
at org.apache.parquet.hadoop.codec.SnappyDecompressor.decompress(SnappyDecompressor.java:62)
at org.apache.parquet.hadoop.codec.NonBlockedDecompressorStream.read(NonBlockedDecompressorStream.java:51)
at java.io.DataInputStream.readFully(DataInputStream.java:195)
at java.io.DataInputStream.readFully(DataInputStream.java:169)
at org.apache.parquet.bytes.BytesInput$StreamBytesInput.toByteArray(BytesInput.java:263)
at org.apache.parquet.hadoop.DictionaryPageReader.reusableCopy(DictionaryPageReader.java:117)
at org.apache.parquet.hadoop.DictionaryPageReader.readDictionaryPage(DictionaryPageReader.java:100)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.expandDictionary(DictionaryFilter.java:80)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.visit(DictionaryFilter.java:180)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.visit(DictionaryFilter.java:50)
at org.apache.parquet.filter2.predicate.Operators$NotEq.accept(Operators.java:195)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.visit(DictionaryFilter.java:360)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.visit(DictionaryFilter.java:50)
at org.apache.parquet.filter2.predicate.Operators$And.accept(Operators.java:309)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.visit(DictionaryFilter.java:360)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.visit(DictionaryFilter.java:50)
at org.apache.parquet.filter2.predicate.Operators$And.accept(Operators.java:309)
at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter.canDrop(DictionaryFilter.java:59)
at org.apache.parquet.filter2.compat.RowGroupFilter.visit(RowGroupFilter.java:104)
at org.apache.parquet.filter2.compat.RowGroupFilter.visit(RowGroupFilter.java:43)
at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
at org.apache.parquet.filter2.compat.RowGroupFilter.filterRowGroups(RowGroupFilter.java:69)
at org.apache.parquet.hadoop.ParquetFileReader.filterRowGroups(ParquetFileReader.java:751)
at org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:644)
at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:148)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:418)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage16.scan_nextBatch_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage16.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$2.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:255)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:836)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:836)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:411)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
Can anyone help me on this issue?
Check your spark application have access to this folder,
Step 1:
Make sure to have access to the below path
Step 2:
Try add these lines into your spark-default.conf file