cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题
flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf 求解答 org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: org.apache.flink.client.program.ProgramInvocationException: Job failed (JobID: e358699c1be6be1472078771e1fd027f) at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) Caused by: java.util.concurrent.ExecutionException: org.apache.flink.client.program.ProgramInvocationException: Job failed (JobID: e358699c1be6be1472078771e1fd027f) at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) at org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) at tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) ... 11 more Caused by: org.apache.flink.client.program.ProgramInvocationException: Job failed (JobID: e358699c1be6be1472078771e1fd027f) at org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) at org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) at java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) at java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.flink.runtime.client.JobExecutionException: Job execution failed. at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) at org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) ... 19 more Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) at org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) at org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) at org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21) at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) at akka.actor.Actor$class.aroundReceive(Actor.scala:517) at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) at akka.actor.ActorCell.invoke(ActorCell.scala:561) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) at akka.dispatch.Mailbox.run(Mailbox.scala:225) at akka.dispatch.Mailbox.exec(Mailbox.scala:235) at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Caused by: java.net.ConnectException: Connection refused (Connection refused) at java.net.PlainSocketImpl.socketConnect(Native Method) at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) at java.net.Socket.connect(Socket.java:606) at org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) at org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) |
这个问题好诡异啊,一般来说编译会在 env.execute
的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? Best, tison. air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: > cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 > flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 > hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf > 求解答 > > > > > > > > org.apache.flink.client.program.ProgramInvocationException: The main > method caused an error: > org.apache.flink.client.program.ProgramInvocationException: Job failed > (JobID: e358699c1be6be1472078771e1fd027f) > > at > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) > > at > org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) > > at > org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) > > at > org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) > > at > org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) > > at > org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) > > at > org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) > > at java.security.AccessController.doPrivileged(Native Method) > > at javax.security.auth.Subject.doAs(Subject.java:422) > > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) > > at > org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) > > at > org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) > > Caused by: java.util.concurrent.ExecutionException: > org.apache.flink.client.program.ProgramInvocationException: Job failed > (JobID: e358699c1be6be1472078771e1fd027f) > > at > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > > at > java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) > > at > org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) > > at > org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) > > at > tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:498) > > at > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) > > ... 11 more > > Caused by: org.apache.flink.client.program.ProgramInvocationException: Job > failed (JobID: e358699c1be6be1472078771e1fd027f) > > at > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) > > at > java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) > > at > java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) > > at > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > > at > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > > at > org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) > > at > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > > at > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > > at > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > > at > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > > at > org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) > > at > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > > at > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > > at > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > > at > java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) > > at > java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) > > at > java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) > > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > > at java.lang.Thread.run(Thread.java:748) > > Caused by: org.apache.flink.runtime.client.JobExecutionException: Job > execution failed. > > at > org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) > > at > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) > > ... 19 more > > Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed > by NoRestartBackoffTimeStrategy > > at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) > > at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) > > at > org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) > > at > org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) > > at > org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) > > at > org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) > > at > org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:498) > > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) > > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) > > at > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) > > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) > > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) > > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) > > at > scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) > > at akka.japi.pf > .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) > > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > > at akka.actor.Actor$class.aroundReceive(Actor.scala:517) > > at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > > at akka.actor.ActorCell.invoke(ActorCell.scala:561) > > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > > at akka.dispatch.Mailbox.run(Mailbox.scala:225) > > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > > at > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > > at > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > > at > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > > at > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > > Caused by: java.net.ConnectException: Connection refused (Connection > refused) > > at java.net.PlainSocketImpl.socketConnect(Native Method) > > at java.net > .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) > > at java.net > .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) > > at java.net > .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) > > at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) > > at java.net.Socket.connect(Socket.java:606) > > at > org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) > > at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) > > at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) > > at > org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) |
然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x)
Best, tison. tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > 这个问题好诡异啊,一般来说编译会在 env.execute > 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? > > Best, > tison. > > > air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: > >> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 >> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 >> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf >> 求解答 >> >> >> >> >> >> >> >> org.apache.flink.client.program.ProgramInvocationException: The main >> method caused an error: >> org.apache.flink.client.program.ProgramInvocationException: Job failed >> (JobID: e358699c1be6be1472078771e1fd027f) >> >> at >> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) >> >> at >> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) >> >> at >> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) >> >> at >> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) >> >> at >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) >> >> at >> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) >> >> at >> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) >> >> at java.security.AccessController.doPrivileged(Native Method) >> >> at javax.security.auth.Subject.doAs(Subject.java:422) >> >> at >> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) >> >> at >> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) >> >> at >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) >> >> Caused by: java.util.concurrent.ExecutionException: >> org.apache.flink.client.program.ProgramInvocationException: Job failed >> (JobID: e358699c1be6be1472078771e1fd027f) >> >> at >> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) >> >> at >> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) >> >> at >> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) >> >> at >> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) >> >> at >> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) >> >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >> >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> >> at java.lang.reflect.Method.invoke(Method.java:498) >> >> at >> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) >> >> ... 11 more >> >> Caused by: org.apache.flink.client.program.ProgramInvocationException: >> Job failed (JobID: e358699c1be6be1472078771e1fd027f) >> >> at >> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) >> >> at >> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) >> >> at >> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) >> >> at >> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >> >> at >> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >> >> at >> org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) >> >> at >> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >> >> at >> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >> >> at >> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >> >> at >> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >> >> at >> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) >> >> at >> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >> >> at >> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >> >> at >> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >> >> at >> java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) >> >> at >> java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) >> >> at >> java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) >> >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> >> at java.lang.Thread.run(Thread.java:748) >> >> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job >> execution failed. >> >> at >> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) >> >> at >> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) >> >> ... 19 more >> >> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >> by NoRestartBackoffTimeStrategy >> >> at >> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) >> >> at >> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) >> >> at >> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) >> >> at >> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) >> >> at >> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) >> >> at >> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) >> >> at >> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) >> >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >> >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> >> at java.lang.reflect.Method.invoke(Method.java:498) >> >> at >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) >> >> at >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >> >> at >> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) >> >> at >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) >> >> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) >> >> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) >> >> at >> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >> >> at akka.japi.pf >> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >> >> at >> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >> >> at >> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >> >> at >> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >> >> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >> >> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >> >> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >> >> at akka.actor.ActorCell.invoke(ActorCell.scala:561) >> >> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >> >> at akka.dispatch.Mailbox.run(Mailbox.scala:225) >> >> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >> >> at >> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >> >> at >> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >> >> at >> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >> >> at >> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >> >> Caused by: java.net.ConnectException: Connection refused (Connection >> refused) >> >> at java.net.PlainSocketImpl.socketConnect(Native Method) >> >> at java.net >> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) >> >> at java.net >> .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) >> >> at java.net >> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) >> >> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) >> >> at java.net.Socket.connect(Socket.java:606) >> >> at >> org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) >> >> at >> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) >> >> at >> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) >> >> at >> org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) > > |
In reply to this post by tison
你好 是cluster的 本地代码没有报错的 报错的消息贴下面了 flink1.7 时正常的。 后来我加上了flink的环境变量 #flink export FLINK_HOME=/opt/module/flink-1.10.1 export PATH=${FLINK_HOME}/bin:$PATH 这个报错的例子 就正常跑了 但是换了另外一个任务 在1.7 和本地都是可以的。报错如下 ------------------------------------------------------------ The program finished with the following exception: org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: Could not deploy Yarn job cluster. at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) Caused by: org.apache.flink.client.deployment.ClusterDeploymentException: Could not deploy Yarn job cluster. at org.apache.flink.yarn.YarnClusterDescriptor.deployJobCluster(YarnClusterDescriptor.java:398) at org.apache.flink.client.deployment.executors.AbstractJobClusterExecutor.execute(AbstractJobClusterExecutor.java:70) at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1733) at org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94) at org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63) at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) at com.zongteng.ztstream.etl.MongoToKafka.main(MongoToKafka.java:77) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) ... 11 more Caused by: org.apache.flink.yarn.YarnClusterDescriptor$YarnDeploymentException: The YARN application unexpectedly switched to state FAILED during deployment. Diagnostics from YARN: Application application_1590715263014_0033 failed 1 times due to AM Container for appattempt_1590715263014_0033_000001 exited with exitCode: 2 For more detailed output, check application tracking page:http://zongteng72:8088/proxy/application_1590715263014_0033/Then, click on links to logs of each attempt. Diagnostics: Exception from container-launch. Container id: container_1590715263014_0033_01_000001 Exit code: 2 Stack trace: ExitCodeException exitCode=2: at org.apache.hadoop.util.Shell.runCommand(Shell.java:604) at org.apache.hadoop.util.Shell.run(Shell.java:507) at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:789) at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Container exited with a non-zero exit code 2 Failing this attempt. Failing the application. If log aggregation is enabled on your cluster, use this command to further investigate the issue: yarn logs -applicationId application_1590715263014_0033 at org.apache.flink.yarn.YarnClusterDescriptor.startAppMaster(YarnClusterDescriptor.java:999) at org.apache.flink.yarn.YarnClusterDescriptor.deployInternal(YarnClusterDescriptor.java:488) at org.apache.flink.yarn.YarnClusterDescriptor.deployJobCluster(YarnClusterDescriptor.java:391) ... 22 more 2020-05-29 14:18:25,529 INFO org.apache.flink.yarn.YarnClusterDescriptor - Cancelling deployment from Deployment Failure Hook 2020-05-29 14:18:25,530 INFO org.apache.hadoop.yarn.client.RMProxy - Connecting to ResourceManager at zongteng72/192.168.109.72:8032 2020-05-29 14:18:25,532 INFO org.apache.flink.yarn.YarnClusterDescriptor - Killing YARN application 2020-05-29 14:18:25,540 INFO org.apache.hadoop.yarn.client.api.impl.YarnClientImpl - Killed application application_1590715263014_0033 2020-05-29 14:18:25,641 INFO org.apache.flink.yarn.YarnClusterDescriptor - Deleting files in hdfs://ZONGTENGSERIVCE/user/root/.flink/application_1590715263014_0033. 在 2020-05-29 14:21:39,"tison" <[hidden email]> 写道: >这个问题好诡异啊,一般来说编译会在 env.execute >的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? > >Best, >tison. > > >air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: > >> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 >> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 >> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf >> 求解答 >> >> >> >> >> >> >> >> org.apache.flink.client.program.ProgramInvocationException: The main >> method caused an error: >> org.apache.flink.client.program.ProgramInvocationException: Job failed >> (JobID: e358699c1be6be1472078771e1fd027f) >> >> at >> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) >> >> at >> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) >> >> at >> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) >> >> at >> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) >> >> at >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) >> >> at >> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) >> >> at >> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) >> >> at java.security.AccessController.doPrivileged(Native Method) >> >> at javax.security.auth.Subject.doAs(Subject.java:422) >> >> at >> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) >> >> at >> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) >> >> at >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) >> >> Caused by: java.util.concurrent.ExecutionException: >> org.apache.flink.client.program.ProgramInvocationException: Job failed >> (JobID: e358699c1be6be1472078771e1fd027f) >> >> at >> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) >> >> at >> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) >> >> at >> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) >> >> at >> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) >> >> at >> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) >> >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >> >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> >> at java.lang.reflect.Method.invoke(Method.java:498) >> >> at >> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) >> >> ... 11 more >> >> Caused by: org.apache.flink.client.program.ProgramInvocationException: Job >> failed (JobID: e358699c1be6be1472078771e1fd027f) >> >> at >> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) >> >> at >> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) >> >> at >> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) >> >> at >> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >> >> at >> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >> >> at >> org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) >> >> at >> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >> >> at >> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >> >> at >> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >> >> at >> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >> >> at >> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) >> >> at >> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >> >> at >> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >> >> at >> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >> >> at >> java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) >> >> at >> java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) >> >> at >> java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) >> >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> >> at java.lang.Thread.run(Thread.java:748) >> >> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job >> execution failed. >> >> at >> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) >> >> at >> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) >> >> ... 19 more >> >> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >> by NoRestartBackoffTimeStrategy >> >> at >> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) >> >> at >> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) >> >> at >> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) >> >> at >> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) >> >> at >> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) >> >> at >> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) >> >> at >> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) >> >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >> >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> >> at java.lang.reflect.Method.invoke(Method.java:498) >> >> at >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) >> >> at >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >> >> at >> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) >> >> at >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) >> >> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) >> >> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) >> >> at >> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >> >> at akka.japi.pf >> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >> >> at >> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >> >> at >> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >> >> at >> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >> >> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >> >> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >> >> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >> >> at akka.actor.ActorCell.invoke(ActorCell.scala:561) >> >> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >> >> at akka.dispatch.Mailbox.run(Mailbox.scala:225) >> >> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >> >> at >> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >> >> at >> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >> >> at >> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >> >> at >> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >> >> Caused by: java.net.ConnectException: Connection refused (Connection >> refused) >> >> at java.net.PlainSocketImpl.socketConnect(Native Method) >> >> at java.net >> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) >> >> at java.net >> .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) >> >> at java.net >> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) >> >> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) >> >> at java.net.Socket.connect(Socket.java:606) >> >> at >> org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) >> >> at >> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) >> >> at >> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) >> >> at >> org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) |
In reply to this post by tison
代码就是flink自带的例子。
public class WordCountStreamingByJava { public static void main(String[] args) throws Exception { // 创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 设置socket数据源 DataStreamSource<String> source = env.socketTextStream("zongteng75", 9001, "\n"); // 转化处理数据 DataStream<WordWithCount> dataStream = source.flatMap(new FlatMapFunction<String, WordWithCount>() { @Override public void flatMap(String line, Collector<WordWithCount> collector) throws Exception { System.out.println(line); for (String word : line.split(" ")) { collector.collect(new WordWithCount(word, 1)); } } }).keyBy("word")//以key分组统计 .timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动 .sum("count");//计算时间窗口内的词语个数 // 输出数据到目的端 dataStream.print(); // 执行任务操作 env.execute("Flink Streaming Word Count By Java"); } 我现在加了flink环境变量 这个例子 可以过了。就很奇怪 在 2020-05-29 14:22:39,"tison" <[hidden email]> 写道: >然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x) > >Best, >tison. > > >tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > >> 这个问题好诡异啊,一般来说编译会在 env.execute >> 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? >> >> Best, >> tison. >> >> >> air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: >> >>> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 >>> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 >>> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf >>> 求解答 >>> >>> >>> >>> >>> >>> >>> >>> org.apache.flink.client.program.ProgramInvocationException: The main >>> method caused an error: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) >>> >>> at >>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) >>> >>> at java.security.AccessController.doPrivileged(Native Method) >>> >>> at javax.security.auth.Subject.doAs(Subject.java:422) >>> >>> at >>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) >>> >>> at >>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) >>> >>> Caused by: java.util.concurrent.ExecutionException: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) >>> >>> at >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) >>> >>> at >>> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) >>> >>> ... 11 more >>> >>> Caused by: org.apache.flink.client.program.ProgramInvocationException: >>> Job failed (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) >>> >>> at >>> java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>> >>> at java.lang.Thread.run(Thread.java:748) >>> >>> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job >>> execution failed. >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) >>> >>> ... 19 more >>> >>> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >>> by NoRestartBackoffTimeStrategy >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) >>> >>> at >>> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >>> >>> at akka.japi.pf >>> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >>> >>> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >>> >>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >>> >>> at akka.actor.ActorCell.invoke(ActorCell.scala:561) >>> >>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >>> >>> at akka.dispatch.Mailbox.run(Mailbox.scala:225) >>> >>> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >>> >>> Caused by: java.net.ConnectException: Connection refused (Connection >>> refused) >>> >>> at java.net.PlainSocketImpl.socketConnect(Native Method) >>> >>> at java.net >>> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) >>> >>> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) >>> >>> at java.net.Socket.connect(Socket.java:606) >>> >>> at >>> org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) >>> >>> at >>> org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) >> >> |
你运行的命令是啥?然后在哪个目录下运行的,和 flink 下载下来解压的目录是什么相对关系?
Best, tison. air23 <[hidden email]> 于2020年5月29日周五 下午2:35写道: > 代码就是flink自带的例子。 > > public class WordCountStreamingByJava { > public static void main(String[] args) throws Exception { > > // 创建执行环境 > StreamExecutionEnvironment env = > StreamExecutionEnvironment.getExecutionEnvironment(); > // 设置socket数据源 > DataStreamSource<String> source = env.socketTextStream("zongteng75", 9001, > "\n"); > > // 转化处理数据 > DataStream<WordWithCount> dataStream = source.flatMap(new > FlatMapFunction<String, WordWithCount>() { > @Override > public void flatMap(String line, Collector<WordWithCount> collector) > throws Exception { > > System.out.println(line); > for (String word : line.split(" ")) { > collector.collect(new WordWithCount(word, 1)); > } > } > }).keyBy("word")//以key分组统计 > .timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动 > .sum("count");//计算时间窗口内的词语个数 > > // 输出数据到目的端 > dataStream.print(); > > // 执行任务操作 > env.execute("Flink Streaming Word Count By Java"); > > } > > > > > 我现在加了flink环境变量 这个例子 可以过了。就很奇怪 > > > > > > > > > > > > > > > > > > 在 2020-05-29 14:22:39,"tison" <[hidden email]> 写道: > >然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x) > > > >Best, > >tison. > > > > > >tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > > > >> 这个问题好诡异啊,一般来说编译会在 env.execute > >> 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? > >> > >> Best, > >> tison. > >> > >> > >> air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: > >> > >>> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 > >>> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 > >>> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf > >>> 求解答 > >>> > >>> > >>> > >>> > >>> > >>> > >>> > >>> org.apache.flink.client.program.ProgramInvocationException: The main > >>> method caused an error: > >>> org.apache.flink.client.program.ProgramInvocationException: Job failed > >>> (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) > >>> > >>> at > >>> > org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) > >>> > >>> at > >>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) > >>> > >>> at java.security.AccessController.doPrivileged(Native Method) > >>> > >>> at javax.security.auth.Subject.doAs(Subject.java:422) > >>> > >>> at > >>> > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) > >>> > >>> at > >>> > org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) > >>> > >>> at > >>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) > >>> > >>> Caused by: java.util.concurrent.ExecutionException: > >>> org.apache.flink.client.program.ProgramInvocationException: Job failed > >>> (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > >>> > >>> at > >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) > >>> > >>> at > >>> > org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) > >>> > >>> at > >>> > org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) > >>> > >>> at > >>> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) > >>> > >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > >>> > >>> at > >>> > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > >>> > >>> at > >>> > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > >>> > >>> at java.lang.reflect.Method.invoke(Method.java:498) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) > >>> > >>> ... 11 more > >>> > >>> Caused by: org.apache.flink.client.program.ProgramInvocationException: > >>> Job failed (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > >>> > >>> at > >>> > org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > >>> > >>> at > >>> > org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) > >>> > >>> at > >>> > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > >>> > >>> at > >>> > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > >>> > >>> at java.lang.Thread.run(Thread.java:748) > >>> > >>> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job > >>> execution failed. > >>> > >>> at > >>> > org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) > >>> > >>> at > >>> > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) > >>> > >>> ... 19 more > >>> > >>> Caused by: org.apache.flink.runtime.JobException: Recovery is > suppressed > >>> by NoRestartBackoffTimeStrategy > >>> > >>> at > >>> > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) > >>> > >>> at > >>> > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) > >>> > >>> at > >>> > org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) > >>> > >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > >>> > >>> at > >>> > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > >>> > >>> at > >>> > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > >>> > >>> at java.lang.reflect.Method.invoke(Method.java:498) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) > >>> > >>> at akka.japi.pf > .UnitCaseStatement.apply(CaseStatements.scala:26) > >>> > >>> at akka.japi.pf > .UnitCaseStatement.apply(CaseStatements.scala:21) > >>> > >>> at > >>> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) > >>> > >>> at akka.japi.pf > >>> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >>> > >>> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) > >>> > >>> at > akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > >>> > >>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > >>> > >>> at akka.actor.ActorCell.invoke(ActorCell.scala:561) > >>> > >>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > >>> > >>> at akka.dispatch.Mailbox.run(Mailbox.scala:225) > >>> > >>> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > >>> > >>> at > >>> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > >>> > >>> at > >>> > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > >>> > >>> at > >>> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > >>> > >>> at > >>> > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > >>> > >>> Caused by: java.net.ConnectException: Connection refused (Connection > >>> refused) > >>> > >>> at java.net.PlainSocketImpl.socketConnect(Native Method) > >>> > >>> at java.net > >>> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) > >>> > >>> at java.net > >>> > .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) > >>> > >>> at java.net > >>> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) > >>> > >>> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) > >>> > >>> at java.net.Socket.connect(Socket.java:606) > >>> > >>> at > >>> > org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) > >>> > >>> at > >>> > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) > >>> > >>> at > >>> > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) > >>> > >>> at > >>> > org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) > >> > >> > |
In reply to this post by air23
这个报错:>>> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >>> by NoRestartBackoffTimeStrategy 应该是没有读取到flink conf下面的flink-conf.yaml配置文件,里面有个task失败重启配置参数! 发件人: air23 发送时间: 2020-05-29 14:34 收件人: user-zh 主题: Re:Re: flink1.10 on yarn 问题 代码就是flink自带的例子。 public class WordCountStreamingByJava { public static void main(String[] args) throws Exception { // 创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 设置socket数据源 DataStreamSource<String> source = env.socketTextStream("zongteng75", 9001, "\n"); // 转化处理数据 DataStream<WordWithCount> dataStream = source.flatMap(new FlatMapFunction<String, WordWithCount>() { @Override public void flatMap(String line, Collector<WordWithCount> collector) throws Exception { System.out.println(line); for (String word : line.split(" ")) { collector.collect(new WordWithCount(word, 1)); } } }).keyBy("word")//以key分组统计 .timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动 .sum("count");//计算时间窗口内的词语个数 // 输出数据到目的端 dataStream.print(); // 执行任务操作 env.execute("Flink Streaming Word Count By Java"); } 我现在加了flink环境变量 这个例子 可以过了。就很奇怪 在 2020-05-29 14:22:39,"tison" <[hidden email]> 写道: >然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x) > >Best, >tison. > > >tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > >> 这个问题好诡异啊,一般来说编译会在 env.execute >> 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? >> >> Best, >> tison. >> >> >> air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: >> >>> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 >>> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 >>> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf >>> 求解答 >>> >>> >>> >>> >>> >>> >>> >>> org.apache.flink.client.program.ProgramInvocationException: The main >>> method caused an error: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) >>> >>> at >>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) >>> >>> at java.security.AccessController.doPrivileged(Native Method) >>> >>> at javax.security.auth.Subject.doAs(Subject.java:422) >>> >>> at >>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) >>> >>> at >>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) >>> >>> Caused by: java.util.concurrent.ExecutionException: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) >>> >>> at >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) >>> >>> at >>> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) >>> >>> ... 11 more >>> >>> Caused by: org.apache.flink.client.program.ProgramInvocationException: >>> Job failed (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) >>> >>> at >>> java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>> >>> at java.lang.Thread.run(Thread.java:748) >>> >>> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job >>> execution failed. >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) >>> >>> ... 19 more >>> >>> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >>> by NoRestartBackoffTimeStrategy >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) >>> >>> at >>> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >>> >>> at akka.japi.pf >>> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >>> >>> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >>> >>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >>> >>> at akka.actor.ActorCell.invoke(ActorCell.scala:561) >>> >>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >>> >>> at akka.dispatch.Mailbox.run(Mailbox.scala:225) >>> >>> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >>> >>> Caused by: java.net.ConnectException: Connection refused (Connection >>> refused) >>> >>> at java.net.PlainSocketImpl.socketConnect(Native Method) >>> >>> at java.net >>> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) >>> >>> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) >>> >>> at java.net.Socket.connect(Socket.java:606) >>> >>> at >>> org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) >>> >>> at >>> org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) >> >> |
hello
命令 ./yarn-session.sh -n 8 -jm 1024 -tm 1024 -s 4 -nm FlinkOnYarnSession -d 版本 :flink1.10.0 CDH5.14 我在使用flink on yarn的yarn-session 模式时报错如下 org.apache.flink.client.deployment.ClusterDeploymentException: Couldn't deploy Yarn session cluster at org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:380) at org.apache.flink.yarn.cli.FlinkYarnSessionCli.run(FlinkYarnSessionCli.java:548) at org.apache.flink.yarn.cli.FlinkYarnSessionCli.lambda$main$5(FlinkYarnSessionCli.java:785) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) at org.apache.flink.yarn.cli.FlinkYarnSessionCli.main(FlinkYarnSessionCli.java:785) Caused by: java.net.ConnectException: Call From master/192.168.1.20 to slave1:8020 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:791) at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:731) at org.apache.hadoop.ipc.Client.call(Client.java:1474) at org.apache.hadoop.ipc.Client.call(Client.java:1401) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232) at com.sun.proxy.$Proxy12.getFileInfo(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:752) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy13.getFileInfo(Unknown Source) at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1977) at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1118) at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1114) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114) at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1400) at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:496) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:348) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:338) at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1907) at org.apache.flink.yarn.Utils.uploadLocalFileToRemote(Utils.java:172) at org.apache.flink.yarn.Utils.setupLocalResource(Utils.java:126) at org.apache.flink.yarn.YarnClusterDescriptor.setupSingleLocalResource(YarnClusterDescriptor.java:1062) at org.apache.flink.yarn.YarnClusterDescriptor.uploadAndRegisterFiles(YarnClusterDescriptor.java:1144) at org.apache.flink.yarn.YarnClusterDescriptor.startAppMaster(YarnClusterDescriptor.java:707) at org.apache.flink.yarn.YarnClusterDescriptor.deployInternal(YarnClusterDescriptor.java:488) at org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:373) ... 7 more Caused by: java.net.ConnectException: Connection refused at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:494) at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:609) at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:707) at org.apache.hadoop.ipc.Client$Connection.access$2800(Client.java:370) at org.apache.hadoop.ipc.Client.getConnection(Client.java:1523) at org.apache.hadoop.ipc.Client.call(Client.java:1440) ... 35 more ------------------ 原始邮件 ------------------ 发件人: "[hidden email]"<[hidden email]>; 发送时间: 2020年5月29日(星期五) 下午2:49 收件人: "user-zh"<[hidden email]>; 主题: 回复: Re: flink1.10 on yarn 问题 这个报错:>>> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >>> by NoRestartBackoffTimeStrategy 应该是没有读取到flink conf下面的flink-conf.yaml配置文件,里面有个task失败重启配置参数! 发件人: air23 发送时间: 2020-05-29 14:34 收件人: user-zh 主题: Re:Re: flink1.10 on yarn 问题 代码就是flink自带的例子。 public class WordCountStreamingByJava { public static void main(String[] args) throws Exception { // 创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 设置socket数据源 DataStreamSource<String> source = env.socketTextStream("zongteng75", 9001, "\n"); // 转化处理数据 DataStream<WordWithCount> dataStream = source.flatMap(new FlatMapFunction<String, WordWithCount>() { @Override public void flatMap(String line, Collector<WordWithCount> collector) throws Exception { System.out.println(line); for (String word : line.split(" ")) { collector.collect(new WordWithCount(word, 1)); } } }).keyBy("word")//以key分组统计 .timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动 .sum("count");//计算时间窗口内的词语个数 // 输出数据到目的端 dataStream.print(); // 执行任务操作 env.execute("Flink Streaming Word Count By Java"); } 我现在加了flink环境变量 这个例子 可以过了。就很奇怪 在 2020-05-29 14:22:39,"tison" <[hidden email]> 写道: >然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x) > >Best, >tison. > > >tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > >> 这个问题好诡异啊,一般来说编译会在 env.execute >> 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? >> >> Best, >> tison. >> >> >> air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: >> >>> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 >>> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 >>> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf >>> 求解答 >>> >>> >>> >>> >>> >>> >>> >>> org.apache.flink.client.program.ProgramInvocationException: The main >>> method caused an error: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) >>> >>> at >>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) >>> >>> at java.security.AccessController.doPrivileged(Native Method) >>> >>> at javax.security.auth.Subject.doAs(Subject.java:422) >>> >>> at >>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) >>> >>> at >>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) >>> >>> Caused by: java.util.concurrent.ExecutionException: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) >>> >>> at >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) >>> >>> at >>> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) >>> >>> ... 11 more >>> >>> Caused by: org.apache.flink.client.program.ProgramInvocationException: >>> Job failed (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) >>> >>> at >>> java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>> >>> at java.lang.Thread.run(Thread.java:748) >>> >>> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job >>> execution failed. >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) >>> >>> ... 19 more >>> >>> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >>> by NoRestartBackoffTimeStrategy >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) >>> >>> at >>> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >>> >>> at akka.japi.pf >>> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >>> >>> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >>> >>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >>> >>> at akka.actor.ActorCell.invoke(ActorCell.scala:561) >>> >>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >>> >>> at akka.dispatch.Mailbox.run(Mailbox.scala:225) >>> >>> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >>> >>> Caused by: java.net.ConnectException: Connection refused (Connection >>> refused) >>> >>> at java.net.PlainSocketImpl.socketConnect(Native Method) >>> >>> at java.net >>> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) >>> >>> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) >>> >>> at java.net.Socket.connect(Socket.java:606) >>> >>> at >>> org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) >>> >>> at >>> org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) >> >> |
这个问题已经解决 是我自己的cdh的namenode没有启动成功,我目前有个疑问 命令:./../bin/yarn-session.sh -n 2 -jm 1024 -tm 1024 -d -s 2 flink 用这个命令申请的应该是 2个slot 为什么通过yarn 的ui界面 没有看到slot数 能不能帮我解答一下 谢谢 ------------------ 原始邮件 ------------------ 发件人: "小屁孩"<[hidden email]>; 发送时间: 2020年6月8日(星期一) 下午4:06 收件人: "user-zh"<[hidden email]>; 主题: 回复: Re: flink1.10 on yarn 问题 hello 命令 ./yarn-session.sh -n 8 -jm 1024 -tm 1024 -s 4 -nm FlinkOnYarnSession -d 版本 :flink1.10.0 CDH5.14 我在使用flink on yarn的yarn-session 模式时报错如下 org.apache.flink.client.deployment.ClusterDeploymentException: Couldn't deploy Yarn session cluster at org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:380) at org.apache.flink.yarn.cli.FlinkYarnSessionCli.run(FlinkYarnSessionCli.java:548) at org.apache.flink.yarn.cli.FlinkYarnSessionCli.lambda$main$5(FlinkYarnSessionCli.java:785) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) at org.apache.flink.yarn.cli.FlinkYarnSessionCli.main(FlinkYarnSessionCli.java:785) Caused by: java.net.ConnectException: Call From master/192.168.1.20 to slave1:8020 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:791) at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:731) at org.apache.hadoop.ipc.Client.call(Client.java:1474) at org.apache.hadoop.ipc.Client.call(Client.java:1401) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232) at com.sun.proxy.$Proxy12.getFileInfo(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:752) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy13.getFileInfo(Unknown Source) at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1977) at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1118) at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1114) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114) at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1400) at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:496) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:348) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:338) at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1907) at org.apache.flink.yarn.Utils.uploadLocalFileToRemote(Utils.java:172) at org.apache.flink.yarn.Utils.setupLocalResource(Utils.java:126) at org.apache.flink.yarn.YarnClusterDescriptor.setupSingleLocalResource(YarnClusterDescriptor.java:1062) at org.apache.flink.yarn.YarnClusterDescriptor.uploadAndRegisterFiles(YarnClusterDescriptor.java:1144) at org.apache.flink.yarn.YarnClusterDescriptor.startAppMaster(YarnClusterDescriptor.java:707) at org.apache.flink.yarn.YarnClusterDescriptor.deployInternal(YarnClusterDescriptor.java:488) at org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:373) ... 7 more Caused by: java.net.ConnectException: Connection refused at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:494) at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:609) at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:707) at org.apache.hadoop.ipc.Client$Connection.access$2800(Client.java:370) at org.apache.hadoop.ipc.Client.getConnection(Client.java:1523) at org.apache.hadoop.ipc.Client.call(Client.java:1440) ... 35 more ------------------ 原始邮件 ------------------ 发件人: "[hidden email]"<[hidden email]>; 发送时间: 2020年5月29日(星期五) 下午2:49 收件人: "user-zh"<[hidden email]>; 主题: 回复: Re: flink1.10 on yarn 问题 这个报错:>>> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >>> by NoRestartBackoffTimeStrategy 应该是没有读取到flink conf下面的flink-conf.yaml配置文件,里面有个task失败重启配置参数! 发件人: air23 发送时间: 2020-05-29 14:34 收件人: user-zh 主题: Re:Re: flink1.10 on yarn 问题 代码就是flink自带的例子。 public class WordCountStreamingByJava { public static void main(String[] args) throws Exception { // 创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 设置socket数据源 DataStreamSource<String> source = env.socketTextStream("zongteng75", 9001, "\n"); // 转化处理数据 DataStream<WordWithCount> dataStream = source.flatMap(new FlatMapFunction<String, WordWithCount>() { @Override public void flatMap(String line, Collector<WordWithCount> collector) throws Exception { System.out.println(line); for (String word : line.split(" ")) { collector.collect(new WordWithCount(word, 1)); } } }).keyBy("word")//以key分组统计 .timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动 .sum("count");//计算时间窗口内的词语个数 // 输出数据到目的端 dataStream.print(); // 执行任务操作 env.execute("Flink Streaming Word Count By Java"); } 我现在加了flink环境变量 这个例子 可以过了。就很奇怪 在 2020-05-29 14:22:39,"tison" <[hidden email]> 写道: >然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x) > >Best, >tison. > > >tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > >> 这个问题好诡异啊,一般来说编译会在 env.execute >> 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? >> >> Best, >> tison. >> >> >> air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: >> >>> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 >>> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 >>> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf >>> 求解答 >>> >>> >>> >>> >>> >>> >>> >>> org.apache.flink.client.program.ProgramInvocationException: The main >>> method caused an error: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) >>> >>> at >>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) >>> >>> at java.security.AccessController.doPrivileged(Native Method) >>> >>> at javax.security.auth.Subject.doAs(Subject.java:422) >>> >>> at >>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) >>> >>> at >>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) >>> >>> at >>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) >>> >>> Caused by: java.util.concurrent.ExecutionException: >>> org.apache.flink.client.program.ProgramInvocationException: Job failed >>> (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) >>> >>> at >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) >>> >>> at >>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) >>> >>> at >>> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) >>> >>> ... 11 more >>> >>> Caused by: org.apache.flink.client.program.ProgramInvocationException: >>> Job failed (JobID: e358699c1be6be1472078771e1fd027f) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) >>> >>> at >>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) >>> >>> at >>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) >>> >>> at >>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) >>> >>> at >>> java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) >>> >>> at >>> java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) >>> >>> at >>> java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>> >>> at java.lang.Thread.run(Thread.java:748) >>> >>> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job >>> execution failed. >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) >>> >>> at >>> org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) >>> >>> ... 19 more >>> >>> Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed >>> by NoRestartBackoffTimeStrategy >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) >>> >>> at >>> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) >>> >>> at >>> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) >>> >>> at >>> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) >>> >>> at >>> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) >>> >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>> >>> at >>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) >>> >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>> >>> at java.lang.reflect.Method.invoke(Method.java:498) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) >>> >>> at >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) >>> >>> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >>> >>> at akka.japi.pf >>> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> >>> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >>> >>> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >>> >>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >>> >>> at akka.actor.ActorCell.invoke(ActorCell.scala:561) >>> >>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >>> >>> at akka.dispatch.Mailbox.run(Mailbox.scala:225) >>> >>> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >>> >>> at >>> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >>> >>> Caused by: java.net.ConnectException: Connection refused (Connection >>> refused) >>> >>> at java.net.PlainSocketImpl.socketConnect(Native Method) >>> >>> at java.net >>> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) >>> >>> at java.net >>> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) >>> >>> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) >>> >>> at java.net.Socket.connect(Socket.java:606) >>> >>> at >>> org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) >>> >>> at >>> org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) >>> >>> at >>> org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) >> >> |
yarn session模式应该是lazy的,你不提交任务,它不会真正的去申请container。
小屁孩 <[hidden email]> 于2020年6月8日周一 下午6:23写道: > > 这个问题已经解决 是我自己的cdh的namenode没有启动成功,我目前有个疑问 > > 命令:./../bin/yarn-session.sh -n 2 -jm 1024 -tm 1024 -d -s 2 > flink 用这个命令申请的应该是 2个slot 为什么通过yarn 的ui界面 没有看到slot数 > > > 能不能帮我解答一下 谢谢 > ------------------ 原始邮件 ------------------ > *发件人:* "小屁孩"<[hidden email]>; > *发送时间:* 2020年6月8日(星期一) 下午4:06 > *收件人:* "user-zh"<[hidden email]>; > *主题:* 回复: Re: flink1.10 on yarn 问题 > > hello > 命令 ./yarn-session.sh -n 8 -jm 1024 -tm 1024 -s 4 -nm FlinkOnYarnSession -d > 版本 :flink1.10.0 CDH5.14 > 我在使用flink on yarn的yarn-session 模式时报错如下 > org.apache.flink.client.deployment.ClusterDeploymentException: Couldn't > deploy Yarn session cluster > at > org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:380) > at > org.apache.flink.yarn.cli.FlinkYarnSessionCli.run(FlinkYarnSessionCli.java:548) > at > org.apache.flink.yarn.cli.FlinkYarnSessionCli.lambda$main$5(FlinkYarnSessionCli.java:785) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) > at > org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) > at > org.apache.flink.yarn.cli.FlinkYarnSessionCli.main(FlinkYarnSessionCli.java:785) > Caused by: java.net.ConnectException: Call From master/192.168.1.20 to > slave1:8020 failed on connection exception: java.net.ConnectException: > Connection refused; For more details see: > http://wiki.apache.org/hadoop/ConnectionRefused > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:423) > at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:791) > at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:731) > at org.apache.hadoop.ipc.Client.call(Client.java:1474) > at org.apache.hadoop.ipc.Client.call(Client.java:1401) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232) > at com.sun.proxy.$Proxy12.getFileInfo(Unknown Source) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:752) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) > at com.sun.proxy.$Proxy13.getFileInfo(Unknown Source) > at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1977) > at > org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1118) > at > org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1114) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114) > at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1400) > at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:496) > at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:348) > at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:338) > at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1907) > at org.apache.flink.yarn.Utils.uploadLocalFileToRemote(Utils.java:172) > at org.apache.flink.yarn.Utils.setupLocalResource(Utils.java:126) > at > org.apache.flink.yarn.YarnClusterDescriptor.setupSingleLocalResource(YarnClusterDescriptor.java:1062) > at > org.apache.flink.yarn.YarnClusterDescriptor.uploadAndRegisterFiles(YarnClusterDescriptor.java:1144) > at > org.apache.flink.yarn.YarnClusterDescriptor.startAppMaster(YarnClusterDescriptor.java:707) > at > org.apache.flink.yarn.YarnClusterDescriptor.deployInternal(YarnClusterDescriptor.java:488) > at > org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:373) > ... 7 more > Caused by: java.net.ConnectException: Connection refused > at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) > at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) > at > org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206) > at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530) > at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:494) > at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:609) > at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:707) > at org.apache.hadoop.ipc.Client$Connection.access$2800(Client.java:370) > at org.apache.hadoop.ipc.Client.getConnection(Client.java:1523) > at org.apache.hadoop.ipc.Client.call(Client.java:1440) > ... 35 more > > > > ------------------ 原始邮件 ------------------ > *发件人:* "[hidden email]"<[hidden email]>; > *发送时间:* 2020年5月29日(星期五) 下午2:49 > *收件人:* "user-zh"<[hidden email]>; > *主题:* 回复: Re: flink1.10 on yarn 问题 > > > 这个报错:>>> Caused by: org.apache.flink.runtime.JobException: Recovery is > suppressed > >>> by NoRestartBackoffTimeStrategy > > 应该是没有读取到flink conf下面的flink-conf.yaml配置文件,里面有个task失败重启配置参数! > > > > > 发件人: air23 > 发送时间: 2020-05-29 14:34 > 收件人: user-zh > 主题: Re:Re: flink1.10 on yarn 问题 > 代码就是flink自带的例子。 > > public class WordCountStreamingByJava { > public static void main(String[] args) throws Exception { > > // 创建执行环境 > StreamExecutionEnvironment env = > StreamExecutionEnvironment.getExecutionEnvironment(); > // 设置socket数据源 > DataStreamSource<String> source = env.socketTextStream("zongteng75", 9001, > "\n"); > > // 转化处理数据 > DataStream<WordWithCount> dataStream = source.flatMap(new > FlatMapFunction<String, WordWithCount>() { > @Override > public void flatMap(String line, Collector<WordWithCount> collector) > throws Exception { > > System.out.println(line); > for (String word : line.split(" ")) { > collector.collect(new WordWithCount(word, 1)); > } > } > }).keyBy("word")//以key分组统计 > .timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动 > .sum("count");//计算时间窗口内的词语个数 > > // 输出数据到目的端 > dataStream.print(); > > // 执行任务操作 > env.execute("Flink Streaming Word Count By Java"); > > } > > > > > 我现在加了flink环境变量 这个例子 可以过了。就很奇怪 > > > > > > > > > > > > > > > > > > 在 2020-05-29 14:22:39,"tison" <[hidden email]> 写道: > >然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x) > > > >Best, > >tison. > > > > > >tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > > > >> 这个问题好诡异啊,一般来说编译会在 env.execute > >> 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? > >> > >> Best, > >> tison. > >> > >> > >> air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: > >> > >>> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 > >>> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 > >>> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf > >>> 求解答 > >>> > >>> > >>> > >>> > >>> > >>> > >>> > >>> org.apache.flink.client.program.ProgramInvocationException: The main > >>> method caused an error: > >>> org.apache.flink.client.program.ProgramInvocationException: Job failed > >>> (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) > >>> > >>> at > >>> > org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) > >>> > >>> at > >>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) > >>> > >>> at java.security.AccessController.doPrivileged(Native Method) > >>> > >>> at javax.security.auth.Subject.doAs(Subject.java:422) > >>> > >>> at > >>> > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) > >>> > >>> at > >>> > org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) > >>> > >>> at > >>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) > >>> > >>> Caused by: java.util.concurrent.ExecutionException: > >>> org.apache.flink.client.program.ProgramInvocationException: Job failed > >>> (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > >>> > >>> at > >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) > >>> > >>> at > >>> > org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) > >>> > >>> at > >>> > org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) > >>> > >>> at > >>> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) > >>> > >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > >>> > >>> at > >>> > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > >>> > >>> at > >>> > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > >>> > >>> at java.lang.reflect.Method.invoke(Method.java:498) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) > >>> > >>> ... 11 more > >>> > >>> Caused by: org.apache.flink.client.program.ProgramInvocationException: > >>> Job failed (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > >>> > >>> at > >>> > org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > >>> > >>> at > >>> > org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) > >>> > >>> at > >>> > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > >>> > >>> at > >>> > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > >>> > >>> at java.lang.Thread.run(Thread.java:748) > >>> > >>> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job > >>> execution failed. > >>> > >>> at > >>> > org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) > >>> > >>> at > >>> > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) > >>> > >>> ... 19 more > >>> > >>> Caused by: org.apache.flink.runtime.JobException: Recovery is > suppressed > >>> by NoRestartBackoffTimeStrategy > >>> > >>> at > >>> > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) > >>> > >>> at > >>> > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) > >>> > >>> at > >>> > org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) > >>> > >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > >>> > >>> at > >>> > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > >>> > >>> at > >>> > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > >>> > >>> at java.lang.reflect.Method.invoke(Method.java:498) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) > >>> > >>> at > akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) > >>> > >>> at > akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) > >>> > >>> at > >>> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) > >>> > >>> at akka.japi.pf > >>> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >>> > >>> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) > >>> > >>> at > akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > >>> > >>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > >>> > >>> at akka.actor.ActorCell.invoke(ActorCell.scala:561) > >>> > >>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > >>> > >>> at akka.dispatch.Mailbox.run(Mailbox.scala:225) > >>> > >>> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > >>> > >>> at > >>> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > >>> > >>> at > >>> > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > >>> > >>> at > >>> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > >>> > >>> at > >>> > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > >>> > >>> Caused by: java.net.ConnectException: Connection refused (Connection > >>> refused) > >>> > >>> at java.net.PlainSocketImpl.socketConnect(Native Method) > >>> > >>> at java.net > >>> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) > >>> > >>> at java.net > >>> > .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) > >>> > >>> at java.net > >>> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) > >>> > >>> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) > >>> > >>> at java.net.Socket.connect(Socket.java:606) > >>> > >>> at > >>> > org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) > >>> > >>> at > >>> > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) > >>> > >>> at > >>> > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) > >>> > >>> at > >>> > org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) > >> > >> > -- Best, Benchao Li |
是的,yarn-session模式,如果没有提交任务到yarn-session的applictionid中,是不会分配任何的slot和内存! 发件人: Benchao Li 发送时间: 2020-06-08 18:26 收件人: user-zh 主题: Re: Re: flink1.10 on yarn 问题 yarn session模式应该是lazy的,你不提交任务,它不会真正的去申请container。 小屁孩 <[hidden email]> 于2020年6月8日周一 下午6:23写道: > > 这个问题已经解决 是我自己的cdh的namenode没有启动成功,我目前有个疑问 > > 命令:./../bin/yarn-session.sh -n 2 -jm 1024 -tm 1024 -d -s 2 > flink 用这个命令申请的应该是 2个slot 为什么通过yarn 的ui界面 没有看到slot数 > > > 能不能帮我解答一下 谢谢 > ------------------ 原始邮件 ------------------ > *发件人:* "小屁孩"<[hidden email]>; > *发送时间:* 2020年6月8日(星期一) 下午4:06 > *收件人:* "user-zh"<[hidden email]>; > *主题:* 回复: Re: flink1.10 on yarn 问题 > > hello > 命令 ./yarn-session.sh -n 8 -jm 1024 -tm 1024 -s 4 -nm FlinkOnYarnSession -d > 版本 :flink1.10.0 CDH5.14 > 我在使用flink on yarn的yarn-session 模式时报错如下 > org.apache.flink.client.deployment.ClusterDeploymentException: Couldn't > deploy Yarn session cluster > at > org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:380) > at > org.apache.flink.yarn.cli.FlinkYarnSessionCli.run(FlinkYarnSessionCli.java:548) > at > org.apache.flink.yarn.cli.FlinkYarnSessionCli.lambda$main$5(FlinkYarnSessionCli.java:785) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) > at > org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) > at > org.apache.flink.yarn.cli.FlinkYarnSessionCli.main(FlinkYarnSessionCli.java:785) > Caused by: java.net.ConnectException: Call From master/192.168.1.20 to > slave1:8020 failed on connection exception: java.net.ConnectException: > Connection refused; For more details see: > http://wiki.apache.org/hadoop/ConnectionRefused > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:423) > at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:791) > at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:731) > at org.apache.hadoop.ipc.Client.call(Client.java:1474) > at org.apache.hadoop.ipc.Client.call(Client.java:1401) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232) > at com.sun.proxy.$Proxy12.getFileInfo(Unknown Source) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:752) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) > at com.sun.proxy.$Proxy13.getFileInfo(Unknown Source) > at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1977) > at > org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1118) > at > org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1114) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114) > at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1400) > at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:496) > at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:348) > at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:338) > at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1907) > at org.apache.flink.yarn.Utils.uploadLocalFileToRemote(Utils.java:172) > at org.apache.flink.yarn.Utils.setupLocalResource(Utils.java:126) > at > org.apache.flink.yarn.YarnClusterDescriptor.setupSingleLocalResource(YarnClusterDescriptor.java:1062) > at > org.apache.flink.yarn.YarnClusterDescriptor.uploadAndRegisterFiles(YarnClusterDescriptor.java:1144) > at > org.apache.flink.yarn.YarnClusterDescriptor.startAppMaster(YarnClusterDescriptor.java:707) > at > org.apache.flink.yarn.YarnClusterDescriptor.deployInternal(YarnClusterDescriptor.java:488) > at > org.apache.flink.yarn.YarnClusterDescriptor.deploySessionCluster(YarnClusterDescriptor.java:373) > ... 7 more > Caused by: java.net.ConnectException: Connection refused > at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) > at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) > at > org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206) > at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530) > at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:494) > at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:609) > at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:707) > at org.apache.hadoop.ipc.Client$Connection.access$2800(Client.java:370) > at org.apache.hadoop.ipc.Client.getConnection(Client.java:1523) > at org.apache.hadoop.ipc.Client.call(Client.java:1440) > ... 35 more > > > > ------------------ 原始邮件 ------------------ > *发件人:* "[hidden email]"<[hidden email]>; > *发送时间:* 2020年5月29日(星期五) 下午2:49 > *收件人:* "user-zh"<[hidden email]>; > *主题:* 回复: Re: flink1.10 on yarn 问题 > > > 这个报错:>>> Caused by: org.apache.flink.runtime.JobException: Recovery is > suppressed > >>> by NoRestartBackoffTimeStrategy > > 应该是没有读取到flink conf下面的flink-conf.yaml配置文件,里面有个task失败重启配置参数! > > > > > 发件人: air23 > 发送时间: 2020-05-29 14:34 > 收件人: user-zh > 主题: Re:Re: flink1.10 on yarn 问题 > 代码就是flink自带的例子。 > > public class WordCountStreamingByJava { > public static void main(String[] args) throws Exception { > > // 创建执行环境 > StreamExecutionEnvironment env = > StreamExecutionEnvironment.getExecutionEnvironment(); > // 设置socket数据源 > DataStreamSource<String> source = env.socketTextStream("zongteng75", 9001, > "\n"); > > // 转化处理数据 > DataStream<WordWithCount> dataStream = source.flatMap(new > FlatMapFunction<String, WordWithCount>() { > @Override > public void flatMap(String line, Collector<WordWithCount> collector) > throws Exception { > > System.out.println(line); > for (String word : line.split(" ")) { > collector.collect(new WordWithCount(word, 1)); > } > } > }).keyBy("word")//以key分组统计 > .timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动 > .sum("count");//计算时间窗口内的词语个数 > > // 输出数据到目的端 > dataStream.print(); > > // 执行任务操作 > env.execute("Flink Streaming Word Count By Java"); > > } > > > > > 我现在加了flink环境变量 这个例子 可以过了。就很奇怪 > > > > > > > > > > > > > > > > > > 在 2020-05-29 14:22:39,"tison" <[hidden email]> 写道: > >然后你 execute 前后的代码片段甚至整个 main 如果可以的话通过 gist 贴一下(x) > > > >Best, > >tison. > > > > > >tison <[hidden email]> 于2020年5月29日周五 下午2:21写道: > > > >> 这个问题好诡异啊,一般来说编译会在 env.execute > >> 的时候拦截,不应该真的调度起来才对。你能详细描述一下你提交作业的方法还有这个错误报在哪里吗(client?cluster?)? > >> > >> Best, > >> tison. > >> > >> > >> air23 <[hidden email]> 于2020年5月29日周五 下午1:38写道: > >> > >>> cdh运行flink1.10 on cdh yarn 报错如下。 用1.7.2版本就没有问题 > >>> flink-shaded-hadoop-2-uber-2.6.5-10.0.jar 也加了 > >>> hadoop环境变量 export HADOOP_CONF_DIR=/etc/hadoop/conf > >>> 求解答 > >>> > >>> > >>> > >>> > >>> > >>> > >>> > >>> org.apache.flink.client.program.ProgramInvocationException: The main > >>> method caused an error: > >>> org.apache.flink.client.program.ProgramInvocationException: Job failed > >>> (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205) > >>> > >>> at > >>> > org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:662) > >>> > >>> at > >>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:210) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:893) > >>> > >>> at > >>> > org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:966) > >>> > >>> at java.security.AccessController.doPrivileged(Native Method) > >>> > >>> at javax.security.auth.Subject.doAs(Subject.java:422) > >>> > >>> at > >>> > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) > >>> > >>> at > >>> > org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41) > >>> > >>> at > >>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:966) > >>> > >>> Caused by: java.util.concurrent.ExecutionException: > >>> org.apache.flink.client.program.ProgramInvocationException: Job failed > >>> (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > >>> > >>> at > >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) > >>> > >>> at > >>> > org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:83) > >>> > >>> at > >>> > org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620) > >>> > >>> at > >>> tt.WordCountStreamingByJava.main(WordCountStreamingByJava.java:36) > >>> > >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > >>> > >>> at > >>> > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > >>> > >>> at > >>> > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > >>> > >>> at java.lang.reflect.Method.invoke(Method.java:498) > >>> > >>> at > >>> > org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321) > >>> > >>> ... 11 more > >>> > >>> Caused by: org.apache.flink.client.program.ProgramInvocationException: > >>> Job failed (JobID: e358699c1be6be1472078771e1fd027f) > >>> > >>> at > >>> > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > >>> > >>> at > >>> > org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962) > >>> > >>> at > >>> > org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:561) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:929) > >>> > >>> at > >>> > java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:442) > >>> > >>> at > >>> > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > >>> > >>> at > >>> > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > >>> > >>> at java.lang.Thread.run(Thread.java:748) > >>> > >>> Caused by: org.apache.flink.runtime.client.JobExecutionException: Job > >>> execution failed. > >>> > >>> at > >>> > org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147) > >>> > >>> at > >>> > org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110) > >>> > >>> ... 19 more > >>> > >>> Caused by: org.apache.flink.runtime.JobException: Recovery is > suppressed > >>> by NoRestartBackoffTimeStrategy > >>> > >>> at > >>> > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110) > >>> > >>> at > >>> > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180) > >>> > >>> at > >>> > org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:496) > >>> > >>> at > >>> > org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380) > >>> > >>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > >>> > >>> at > >>> > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > >>> > >>> at > >>> > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > >>> > >>> at java.lang.reflect.Method.invoke(Method.java:498) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:284) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) > >>> > >>> at > >>> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) > >>> > >>> at > akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) > >>> > >>> at > akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) > >>> > >>> at > >>> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) > >>> > >>> at akka.japi.pf > >>> .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >>> > >>> at > >>> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >>> > >>> at akka.actor.Actor$class.aroundReceive(Actor.scala:517) > >>> > >>> at > akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > >>> > >>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > >>> > >>> at akka.actor.ActorCell.invoke(ActorCell.scala:561) > >>> > >>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > >>> > >>> at akka.dispatch.Mailbox.run(Mailbox.scala:225) > >>> > >>> at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > >>> > >>> at > >>> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > >>> > >>> at > >>> > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > >>> > >>> at > >>> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > >>> > >>> at > >>> > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > >>> > >>> Caused by: java.net.ConnectException: Connection refused (Connection > >>> refused) > >>> > >>> at java.net.PlainSocketImpl.socketConnect(Native Method) > >>> > >>> at java.net > >>> .AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) > >>> > >>> at java.net > >>> > .AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) > >>> > >>> at java.net > >>> .AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) > >>> > >>> at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) > >>> > >>> at java.net.Socket.connect(Socket.java:606) > >>> > >>> at > >>> > org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction.run(SocketTextStreamFunction.java:97) > >>> > >>> at > >>> > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:100) > >>> > >>> at > >>> > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:63) > >>> > >>> at > >>> > org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:200) > >> > >> > -- Best, Benchao Li |
Free forum by Nabble | Edit this page |