Spark submit local to run a jar read and write data to PostgreSQL but got PSQLException: ERROR: relation "dolphins1_copy2" already exists #16713
Unanswered
gwb1987719
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
The code in jar as follows,
SparkSession spark = SparkSession
.builder()
.appName("Java Spark SQL data sources example")
.config("spark.some.config.option", "some-value")
.getOrCreate();
Dataset jdbcDF = spark.read()
.format("jdbc")
.option("url", "jdbc:postgresql://ip:5432/db1")
.option("dbtable", "public.dolphins1")
.option("user", "cas")
.option("password", "cas")
.load();
jdbcDF.createOrReplaceTempView("dolphins1");
The total error log as follows,
[LOG-PATH]: /opt/dolphinscheduler/logs/20241020/15356191508960/12/412/791.log, [HOST]: Host(ip=172.18.0.5, port=1234)
[INFO] 2024-10-20 17:28:12.995 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:12.997 +0800 - ********************************* Initialize task context ***********************************
[INFO] 2024-10-20 17:28:12.997 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:12.997 +0800 - Begin to initialize task
[INFO] 2024-10-20 17:28:12.997 +0800 - Set task startTime: 1729416492997
[INFO] 2024-10-20 17:28:12.998 +0800 - Set task appId: 412_791
[INFO] 2024-10-20 17:28:12.998 +0800 - End initialize task {
"taskInstanceId" : 791,
"taskName" : "spark-submit-sql-api",
"firstSubmitTime" : 1729416492987,
"startTime" : 1729416492997,
"taskType" : "SPARK",
"workflowInstanceHost" : "172.18.0.7:5678",
"host" : "172.18.0.5:1234",
"logPath" : "/opt/dolphinscheduler/logs/20241020/15356191508960/12/412/791.log",
"processId" : 0,
"processDefineCode" : 15356191508960,
"processDefineVersion" : 12,
"processInstanceId" : 412,
"scheduleTime" : 0,
"executorId" : 1,
"cmdTypeIfComplement" : 0,
"tenantCode" : "default",
"processDefineId" : 0,
"projectId" : 0,
"projectCode" : 15355154630752,
"taskParams" : "{"localParams":[],"rawScript":"","resourceList":[{"id":-1,"resourceName":"file:/opt/soft/upload/dolphinscheduler/default/resources/postgresql-42.2.18.jar","res":null}],"programType":"SCALA","mainClass":"com.jdyx.deploy.demo.SQLAPP","mainJar":{"id":-1,"resourceName":"file:/opt/soft/upload/dolphinscheduler/default/resources/test_deploy_jar-1.0-SNAPSHOT.jar","res":null},"deployMode":"local","mainArgs":"","others":"--driver-class-path ../jars/postgresql-42.2.18.jar --jars ../jars/postgresql-42.2.18.jar","yarnQueue":"","driverCores":1,"driverMemory":"512M","numExecutors":2,"executorMemory":"2G","executorCores":2,"sqlExecutionType":"SCRIPT"}",
"environmentConfig" : "export SPARK_HOME=/opt/soft/spark/spark-local\nexport SPARK_SQL_LAUNCHER=/opt/soft/spark/spark-local/bin/spark-sql\nexport JAVA_HOME=/opt/java/openjdk\nexport SPARK_SUBMIT_LAUNCHER=/opt/soft/spark/spark-local/bin/spark-submit\nexport PATH=$SPARK_SQL_LAUNCHER:$JAVA_HOME/bin:$SPARK_SUBMIT_LAUNCHER:$PATH",
"prepareParamsMap" : {
"system.task.definition.name" : {
"prop" : "system.task.definition.name",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "spark-submit-sql-api"
},
"system.project.name" : {
"prop" : "system.project.name",
"direct" : "IN",
"type" : "VARCHAR",
"value" : null
},
"system.project.code" : {
"prop" : "system.project.code",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "15355154630752"
},
"system.workflow.instance.id" : {
"prop" : "system.workflow.instance.id",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "412"
},
"system.biz.curdate" : {
"prop" : "system.biz.curdate",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "20241020"
},
"system.biz.date" : {
"prop" : "system.biz.date",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "20241019"
},
"system.task.instance.id" : {
"prop" : "system.task.instance.id",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "791"
},
"system.workflow.definition.name" : {
"prop" : "system.workflow.definition.name",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "spark-submit-sql-api"
},
"system.task.definition.code" : {
"prop" : "system.task.definition.code",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "15356165839969"
},
"system.workflow.definition.code" : {
"prop" : "system.workflow.definition.code",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "15356191508960"
},
"system.datetime" : {
"prop" : "system.datetime",
"direct" : "IN",
"type" : "VARCHAR",
"value" : "20241020172812"
}
},
"taskAppId" : "412_791",
"taskTimeout" : 2147483647,
"workerGroup" : "default",
"delayTime" : 0,
"currentExecutionStatus" : "SUBMITTED_SUCCESS",
"endTime" : 0,
"resources" : {
"file:/opt/soft/upload/dolphinscheduler/default/resources/test_deploy_jar-1.0-SNAPSHOT.jar" : "",
"file:/opt/soft/upload/dolphinscheduler/default/resources/postgresql-42.2.18.jar" : ""
},
"dryRun" : 0,
"paramsMap" : { },
"cpuQuota" : -1,
"memoryMax" : -1,
"testFlag" : 0,
"logBufferEnable" : false,
"dispatchFailTimes" : 0
}
[INFO] 2024-10-20 17:28:12.999 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:12.999 +0800 - ********************************* Load task instance plugin *********************************
[INFO] 2024-10-20 17:28:12.999 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:12.999 +0800 - Send task status RUNNING_EXECUTION master: 172.18.0.5:1234
[WARN] 2024-10-20 17:28:12.999 +0800 - Current tenant is default tenant, will use root to execute the task
[INFO] 2024-10-20 17:28:12.999 +0800 - TenantCode: default check successfully
[INFO] 2024-10-20 17:28:13.000 +0800 - WorkflowInstanceExecDir: /opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791 check successfully
[INFO] 2024-10-20 17:28:13.000 +0800 - get resource file from path:file:/opt/soft/upload/dolphinscheduler/default/resources/test_deploy_jar-1.0-SNAPSHOT.jar
[INFO] 2024-10-20 17:28:13.004 +0800 - get resource file from path:file:/opt/soft/upload/dolphinscheduler/default/resources/postgresql-42.2.18.jar
[INFO] 2024-10-20 17:28:13.009 +0800 - Download resources: {file:/opt/soft/upload/dolphinscheduler/default/resources/test_deploy_jar-1.0-SNAPSHOT.jar=file:/opt/soft/upload/dolphinscheduler/default/resources/test_deploy_jar-1.0-SNAPSHOT.jar, file:/opt/soft/upload/dolphinscheduler/default/resources/postgresql-42.2.18.jar=file:/opt/soft/upload/dolphinscheduler/default/resources/postgresql-42.2.18.jar} successfully
[INFO] 2024-10-20 17:28:13.009 +0800 - Download upstream files: [] successfully
[INFO] 2024-10-20 17:28:13.009 +0800 - Task plugin instance: SPARK create successfully
[INFO] 2024-10-20 17:28:13.009 +0800 - Initialize spark task params {
"localParams" : [ ],
"varPool" : null,
"mainJar" : {
"id" : -1,
"resourceName" : "file:/opt/soft/upload/dolphinscheduler/default/resources/test_deploy_jar-1.0-SNAPSHOT.jar",
"res" : null
},
"mainClass" : "com.jdyx.deploy.demo.SQLAPP",
"deployMode" : "local",
"mainArgs" : "",
"driverCores" : 1,
"driverMemory" : "512M",
"numExecutors" : 2,
"executorCores" : 2,
"executorMemory" : "2G",
"appName" : null,
"yarnQueue" : "",
"others" : "--driver-class-path ../jars/postgresql-42.2.18.jar --jars ../jars/postgresql-42.2.18.jar",
"programType" : "SCALA",
"rawScript" : "",
"namespace" : null,
"resourceList" : [ {
"id" : -1,
"resourceName" : "file:/opt/soft/upload/dolphinscheduler/default/resources/postgresql-42.2.18.jar",
"res" : null
} ],
"sqlExecutionType" : "SCRIPT"
}
[INFO] 2024-10-20 17:28:13.010 +0800 - Success initialized task plugin instance successfully
[INFO] 2024-10-20 17:28:13.010 +0800 - Set taskVarPool: null successfully
[INFO] 2024-10-20 17:28:13.010 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:13.010 +0800 - ********************************* Execute task instance *************************************
[INFO] 2024-10-20 17:28:13.010 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:13.011 +0800 - Final Shell file is :
#!/bin/bash
BASEDIR=$(cd
dirname $0
; pwd)cd $BASEDIR
export SPARK_HOME=/opt/soft/spark/spark-local
export SPARK_SQL_LAUNCHER=/opt/soft/spark/spark-local/bin/spark-sql
export JAVA_HOME=/opt/java/openjdk
export SPARK_SUBMIT_LAUNCHER=/opt/soft/spark/spark-local/bin/spark-submit
export PATH=$SPARK_SQL_LAUNCHER:$JAVA_HOME/bin:$SPARK_SUBMIT_LAUNCHER:$PATH
${SPARK_HOME}/bin/spark-submit --master local --class com.jdyx.deploy.demo.SQLAPP --conf spark.driver.cores=1 --conf spark.driver.memory=512M --conf spark.executor.instances=2 --conf spark.executor.cores=2 --conf spark.executor.memory=2G --driver-class-path ../jars/postgresql-42.2.18.jar --jars ../jars/postgresql-42.2.18.jar file:/opt/soft/upload/dolphinscheduler/default/resources/test_deploy_jar-1.0-SNAPSHOT.jar
[INFO] 2024-10-20 17:28:13.011 +0800 - Executing shell command : sudo -u root -i /opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791/412_791.sh
[INFO] 2024-10-20 17:28:13.014 +0800 - process start, process id is: 16911
[INFO] 2024-10-20 17:28:15.017 +0800 - ->
24/10/20 17:28:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
24/10/20 17:28:15 WARN DependencyUtils: Local jar /opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791/../jars/postgresql-42.2.18.jar does not exist, skipping.
[INFO] 2024-10-20 17:28:16.018 +0800 - ->
24/10/20 17:28:15 INFO SparkContext: Running Spark version 3.0.0
24/10/20 17:28:15 INFO ResourceUtils: ==============================================================
24/10/20 17:28:15 INFO ResourceUtils: Resources for spark.driver:
[INFO] 2024-10-20 17:28:17.019 +0800 - ->
24/10/20 17:28:16 INFO Executor: Starting executor ID driver on host 031937dc346d
24/10/20 17:28:16 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 43573.
24/10/20 17:28:16 INFO NettyBlockTransferService: Server created on 031937dc346d:43573
24/10/20 17:28:16 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
24/10/20 17:28:16 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 031937dc346d, 43573, None)
24/10/20 17:28:16 INFO BlockManagerMasterEndpoint: Registering block manager 031937dc346d:43573 with 107.7 MiB RAM, BlockManagerId(driver, 031937dc346d, 43573, None)
24/10/20 17:28:16 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 031937dc346d, 43573, None)
24/10/20 17:28:16 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 031937dc346d, 43573, None)
24/10/20 17:28:16 INFO SharedState: Setting hive.metastore.warehouse.dir ('null') to the value of spark.sql.warehouse.dir ('file:/opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791/spark-warehouse').
24/10/20 17:28:16 INFO SharedState: Warehouse path is 'file:/opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791/spark-warehouse'.
[INFO] 2024-10-20 17:28:20.020 +0800 - ->
Exception in thread "main" org.postgresql.util.PSQLException: ERROR: relation "dolphins1_copy2" already exists
at org.postgresql.core.v3.QueryExecutorImpl.receiveErrorResponse(QueryExecutorImpl.java:2553)
at org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:2285)
at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:323)
at org.postgresql.jdbc.PgStatement.executeInternal(PgStatement.java:473)
at org.postgresql.jdbc.PgStatement.execute(PgStatement.java:393)
at org.postgresql.jdbc.PgStatement.executeWithFlags(PgStatement.java:322)
at org.postgresql.jdbc.PgStatement.executeCachedSql(PgStatement.java:308)
at org.postgresql.jdbc.PgStatement.executeWithFlags(PgStatement.java:284)
at org.postgresql.jdbc.PgStatement.executeUpdate(PgStatement.java:258)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.createTable(JdbcUtils.scala:881)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:81)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:122)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:121)
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:944)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:944)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:396)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:380)
at com.jdyx.deploy.demo.SQLAPP.main(SQLAPP.java:37)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
24/10/20 17:28:20 INFO SparkContext: Invoking stop() from shutdown hook
[INFO] 2024-10-20 17:28:21.021 +0800 - ->
24/10/20 17:28:20 INFO SparkUI: Stopped Spark web UI at http://031937dc346d:4040
24/10/20 17:28:20 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
24/10/20 17:28:20 INFO MemoryStore: MemoryStore cleared
24/10/20 17:28:20 INFO BlockManager: BlockManager stopped
24/10/20 17:28:20 INFO BlockManagerMaster: BlockManagerMaster stopped
24/10/20 17:28:20 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
24/10/20 17:28:20 INFO SparkContext: Successfully stopped SparkContext
24/10/20 17:28:20 INFO ShutdownHookManager: Shutdown hook called
24/10/20 17:28:20 INFO ShutdownHookManager: Deleting directory /tmp/spark-90bb241d-304e-4e88-923e-4f895ad7d13a
24/10/20 17:28:20 INFO ShutdownHookManager: Deleting directory /tmp/spark-21066d53-de50-483f-a6b1-2173709cc771
[INFO] 2024-10-20 17:28:21.022 +0800 - process has exited. execute path:/opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791, processId:16911 ,exitStatusCode:1 ,processWaitForStatus:true ,processExitValue:1
[INFO] 2024-10-20 17:28:21.022 +0800 - Start finding appId in /opt/dolphinscheduler/logs/20241020/15356191508960/12/412/791.log, fetch way: log
[INFO] 2024-10-20 17:28:21.023 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:21.023 +0800 - ********************************* Finalize task instance ************************************
[INFO] 2024-10-20 17:28:21.023 +0800 - ***********************************************************************************************
[INFO] 2024-10-20 17:28:21.023 +0800 - Upload output files: [] successfully
[INFO] 2024-10-20 17:28:21.024 +0800 - Send task execute status: FAILURE to master : 172.18.0.5:1234
[INFO] 2024-10-20 17:28:21.024 +0800 - Remove the current task execute context from worker cache
[INFO] 2024-10-20 17:28:21.024 +0800 - The current execute mode isn't develop mode, will clear the task execute file: /opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791
[INFO] 2024-10-20 17:28:21.026 +0800 - Success clear the task execute file: /opt/soft/upload/tmp/dolphinscheduler/exec/process/default/15355154630752/15356191508960_12/412/791
[INFO] 2024-10-20 17:28:21.026 +0800 - FINALIZE_SESSION
Beta Was this translation helpful? Give feedback.
All reactions