各位老师好,在HDFS上开启HA的时候,向yarn提交任务的时候,遇到点问题。 cdh版本:5.15.2 hdfs版本:2.6.0 启动模式:flink-on-yarn 配置了HADOOP_CONF_DIR=/etc/hadoop/conf 命令: ./bin/flink run -m yarn-cluster -yt /yarn-conf -p 3 -ytm 2048 -ys 1 -ynm xxx /jars/flink10.jar xxx HDFS不启用HA的时候,能正常提交。 提交任务到yarn的时候,出现如下异常:nameservice2 是HA配置时候自定义的nameservice 2020-09-02 14:53:08,118 DEBUG org.apache.flink.yarn.YarnResourceManager - TM:remote keytab path obtained null 2020-09-02 14:53:08,119 DEBUG org.apache.flink.yarn.YarnResourceManager - TM:remote keytab principal obtained null 2020-09-02 14:53:08,119 DEBUG org.apache.flink.yarn.YarnResourceManager - TM:remote yarn conf path obtained null 2020-09-02 14:53:08,119 DEBUG org.apache.flink.yarn.YarnResourceManager - TM:remote krb5 path obtained null 2020-09-02 14:53:08,120 ERROR org.apache.flink.yarn.YarnResourceManager - Could not start TaskManager in container container_1598944802155_0042_01_000006. java.lang.IllegalArgumentException: java.net.UnknownHostException: nameservice2 at org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:374) at org.apache.hadoop.hdfs.NameNodeProxies.createNonHAProxy(NameNodeProxies.java:312) at org.apache.hadoop.hdfs.NameNodeProxies.createProxy(NameNodeProxies.java:178) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:665) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:601) at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:148) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2596) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:91) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2630) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2612) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296) at org.apache.flink.yarn.Utils.createTaskExecutorContext(Utils.java:469) at org.apache.flink.yarn.YarnResourceManager.createTaskExecutorLaunchContext(YarnResourceManager.java:582) at org.apache.flink.yarn.YarnResourceManager.startTaskExecutorInContainer(YarnResourceManager.java:384) at java.lang.Iterable.forEach(Iterable.java:75) at org.apache.flink.yarn.YarnResourceManager.lambda$onContainersAllocated$1(YarnResourceManager.java:366) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:402) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:195) at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21) at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) at akka.actor.Actor.aroundReceive(Actor.scala:517) at akka.actor.Actor.aroundReceive$(Actor.scala:515) at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) at akka.actor.ActorCell.invoke(ActorCell.scala:561) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) at akka.dispatch.Mailbox.run(Mailbox.scala:225) at akka.dispatch.Mailbox.exec(Mailbox.scala:235) at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Caused by: java.net.UnknownHostException: nameservice2 ... 41 more -- Sent from: http://apache-flink.147419.n8.nabble.com/ |
访问HDFS出现UnknownHostException(其实是nameservice,不是真正的host)的原因大多都是因为
没有正确加载hdfs-site.xml的配置,你可以确认一下JM的classpath里面有没有hadoop配置,没有的话 就是因为Yarn没有透传给container,需要在Yarn层面解决 Best, Yang storm <[hidden email]> 于2020年9月2日周三 下午4:05写道: > > 各位老师好,在HDFS上开启HA的时候,向yarn提交任务的时候,遇到点问题。 > cdh版本:5.15.2 > hdfs版本:2.6.0 > 启动模式:flink-on-yarn > 配置了HADOOP_CONF_DIR=/etc/hadoop/conf > 命令: > ./bin/flink run -m yarn-cluster -yt /yarn-conf -p 3 -ytm 2048 -ys 1 -ynm > xxx > /jars/flink10.jar xxx > > HDFS不启用HA的时候,能正常提交。 > > 提交任务到yarn的时候,出现如下异常:nameservice2 是HA配置时候自定义的nameservice > > 2020-09-02 14:53:08,118 DEBUG org.apache.flink.yarn.YarnResourceManager - > TM:remote keytab path obtained null > 2020-09-02 14:53:08,119 DEBUG org.apache.flink.yarn.YarnResourceManager - > TM:remote keytab principal obtained null > 2020-09-02 14:53:08,119 DEBUG org.apache.flink.yarn.YarnResourceManager - > TM:remote yarn conf path obtained null > 2020-09-02 14:53:08,119 DEBUG org.apache.flink.yarn.YarnResourceManager - > TM:remote krb5 path obtained null > 2020-09-02 14:53:08,120 ERROR org.apache.flink.yarn.YarnResourceManager - > Could not start TaskManager in container > container_1598944802155_0042_01_000006. > java.lang.IllegalArgumentException: java.net.UnknownHostException: > nameservice2 > at > > org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:374) > at > > org.apache.hadoop.hdfs.NameNodeProxies.createNonHAProxy(NameNodeProxies.java:312) > at > > org.apache.hadoop.hdfs.NameNodeProxies.createProxy(NameNodeProxies.java:178) > at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:665) > at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:601) > at > > org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:148) > at > org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2596) > at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:91) > at > org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2630) > at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2612) > at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370) > at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296) > at > org.apache.flink.yarn.Utils.createTaskExecutorContext(Utils.java:469) > at > > org.apache.flink.yarn.YarnResourceManager.createTaskExecutorLaunchContext(YarnResourceManager.java:582) > at > > org.apache.flink.yarn.YarnResourceManager.startTaskExecutorInContainer(YarnResourceManager.java:384) > at java.lang.Iterable.forEach(Iterable.java:75) > at > > org.apache.flink.yarn.YarnResourceManager.lambda$onContainersAllocated$1(YarnResourceManager.java:366) > at > > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:402) > at > > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:195) > at > > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74) > at > > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) > at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) > at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) > at akka.japi.pf > .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > at akka.actor.Actor.aroundReceive(Actor.scala:517) > at akka.actor.Actor.aroundReceive$(Actor.scala:515) > at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > at akka.actor.ActorCell.invoke(ActorCell.scala:561) > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > at akka.dispatch.Mailbox.run(Mailbox.scala:225) > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > at > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > at > > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > at > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > at > > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > Caused by: java.net.UnknownHostException: nameservice2 > ... 41 more > > > > -- > Sent from: http://apache-flink.147419.n8.nabble.com/ > |
Free forum by Nabble | Edit this page |