I’m using scala 2.12, spark 3.1.2, and jdk 1.8, and I’m facing what looks to be a dependency issue: A needed class was not found. This could be due to an error in your runpath. Missing class: org/json4s/JsonAST$JValue
.
$scalaMajorVersion:$sparkVersion"
mentioned below map to these version numbers.
I have created a reusable testing fixture, that I would like to use across various submodules in my scala project to unit-test spark specific logic.
My project is broken down as such:
├───module_a
build.gradle
│ src/
main/
scala/
SparkTransformationLogic.scala
tests/
TestSparkTransformationLogic.scala
├───common-test-utils
build.gradle
│ src/
main/
scala/
SparkTestingFixture.scala
├───module_c
build.gradle
main/
scala/
FooBarLogic.scala
tests/
TestFooBarLogic.scala
build.gradle
I want to reuse the SparkTestingFixture
that I’ve created in the common-test-utils module. If possible, the tests should use isolated dependencies locally, but I think I’m running into solving this point, and debugging the json4s
error mentioned above.
Module A build.gradle
:
apply plugin: 'com.company.gradle.foobar.library'
dependencies {
implementation project(":common-test-utils")
implementation("org.apache.spark:spark-core_$scalaMajorVersion:$sparkVersion"){
exclude group: "org.apache.logging.log4j"
}
implementation "org.apache.spark:spark-mllib_$scalaMajorVersion:$sparkVersion"
implementation ("org.apache.spark:spark-sql_$scalaMajorVersion:$sparkVersion"){
exclude group: "org.apache.logging.log4j"
}
implementation "org.apache.spark:spark-streaming_$scalaMajorVersion:$sparkVersion"
implementation "org.apache.spark:spark-hive_$scalaMajorVersion:$sparkVersion"
implementation "com.company.yahoobug:jvm-config-api:$jvmConfigVersion"
implementation "com.company.yahoobug:sparkutils-environment:$sparkUtilsVersion"
implementation "com.company.yahoobug:sparkutils-job:$sparkUtilsVersion"
implementation "com.company.yahoobug:sparkutils-logging:$sparkUtilsVersion"
implementation "com.company.yahoobug:sparkutils-spark:$sparkUtilsVersion"
testImplementation "org.scalatest:scalatest_$scalaMajorVersion:$scalaTestVersion"
testImplementation "org.slf4j:slf4j-api:1.7.36"
testImplementation "org.json4s:json4s-jackson_2.12:3.6.7"
}
Module common-test-utils build.gradle
:
plugins {
id 'scala'
}
apply plugin: "java"
apply plugin: "scala"
dependencies {
implementation("org.apache.spark:spark-core_$scalaMajorVersion:$sparkVersion")
implementation "org.apache.spark:spark-mllib_$scalaMajorVersion:$sparkVersion"
implementation("org.apache.spark:spark-sql_$scalaMajorVersion:$sparkVersion")
implementation "org.apache.spark:spark-streaming_$scalaMajorVersion:$sparkVersion"
implementation "org.apache.spark:spark-hive_$scalaMajorVersion:$sparkVersion"
implementation "org.scalatest:scalatest_$scalaMajorVersion:$scalaTestVersion"
implementation "org.json4s:json4s-core_2.12:3.6.7"
implementation "org.json4s:json4s-native_2.12:3.6.7"
implementation "org.json4s:json4s-jackson_2.12:3.6.7"
}
Root project build.gradle
:
plugins {
id 'com.foobar.gradle.repos' version '0.3.10'
id 'com.foobar.gradle.foozy.library' version "1.0.23"
id 'com.github.maiflai.scalatest' version '0.35'
id 'com.foobar.gradle.checks' version '1.2.3'
}
apply from: 'versions.gradle'
allprojects {
apply plugin: "com.github.maiflai.scalatest"
group 'com.foobar.foozy'
}
subprojects {
apply plugin: 'java-library'
apply plugin: 'scala'
apply plugin: "com.foorbar.gradle.repos"
apply plugin: "com.github.maiflai.scalatest"
fooBarRepositoryPlugin {
forceGuavaVersion = false
}
tasks.withType(JavaCompile) {
sourceCompatibility = "1.8"
targetCompatibility = "1.8"
}
configurations {
sparkTestImplementation
sparkTestRuntimeOnly
}
dependencies {
implementation "org.scala-lang:scala-library:$scalaVersion"
implementation "org.scala-lang.modules:scala-java8-compat_$scalaMajorVersion:0.9.1"
implementation "org.openjdk.jol:jol-core:0.9" // for local object size diagnostics
implementation "commons-io:commons-io:2.4"
implementation "org.apache.hadoop:hadoop-common:$hadoopVersion"
implementation "org.apache.hadoop:hadoop-mapreduce-client-core:$hadoopVersion"
implementation("org.apache.spark:spark-core_$scalaMajorVersion:$sparkVersion") {
exclude group: "org.apache.logging.log4j"
}
implementation "org.apache.spark:spark-mllib_$scalaMajorVersion:$sparkVersion"
implementation "org.apache.spark:spark-sql_$scalaMajorVersion:$sparkVersion"
implementation "org.apache.spark:spark-streaming_$scalaMajorVersion:$sparkVersion"
implementation "org.apache.spark:spark-hive_$scalaMajorVersion:$sparkVersion"
implementation "org.xerial.snappy:snappy-java:1.1.2.1"
implementation "com.amazonaws:aws-java-sdk-s3:1.11.333"
implementation "com.amazonaws:aws-java-sdk-ec2:1.11.333"
implementation "com.amazonaws:aws-java-sdk-kinesis:1.11.333"
implementation "joda-time:joda-time:$jodaVersion"
implementation "com.github.nscala-time:nscala-time_$scalaMajorVersion:2.14.0"
compile "com.datadoghq:java-dogstatsd-client:$dogstatsdVersion"
testImplementation "junit:junit:4.12"
testImplementation "org.scalatestplus:scalatestplus-junit_$scalaMajorVersion:1.0.0-M2"
testImplementation "org.scalatest:scalatest_$scalaMajorVersion:$scalaTestVersion"
testImplementation "org.mockito:mockito-all:$mockitoVersion"
implementation "com.vladsch.flexmark:flexmark-all:0.62.2"
testRuntimeOnly "org.slf4j:slf4j-jdk14:1.7.30"
testImplementation 'org.apache.logging.log4j:log4j-1.2-api:2.17.2'
}
buildscript {
repositories {
jcenter()
}
dependencies {
classpath 'com.github.jengelman.gradle.plugins:shadow:5.0.0'
}
}
}
checks {
checkScalaStyle = false
}
// Script to copy audit test data
task zipTipZap {
description "Publish task"
doLast {
println("gathering audit data for $project.name")
copy { //Test Results
from ('.') {
include '**/test-results/**/TEST-*.xml'
exclude '**/build/audit_data' //exclude itself from scanning
}
into 'build/audit_data/test_results'
includeEmptyDirs false
}
}
}
If anyone has suggestions as to what is causing the dependency collision with json4s
or how to keep spark testing isolated with its own set of dependencies to avoid the json4s error, I would appreciate some guidance!