SQL很简单,如下:
select * from test2 where `dt`=20210124 and `hour`=02 and `supply_id`=2027 limit 1000; 提交之后很快就finished,并且没有查询到任何数据。 但实际是有数据的,通过spark-sql确认过相同的语句可以查询到数据。 看了JM和TM的日志中有No more splits available。 目前来看貌似一个split也没有。这个应该是1.12的新sourceAPI。 不清楚是不是bug,还是有什么使用注意点呢? |
补充(1)FlinkSQL的查询,对于分区字符串字段貌似必须加'',不加就查询不到?如上hour=02这种直接导致no more split。
其次(2)去除这部分问题后,目前可以发现有split了,但是报了orc相关的错误。并且提交SQL会导致JM直接失败。JM日志如下: 2021-01-24 04:41:24,952 ERROR org.apache.flink.runtime.util.FatalExitExceptionHandler [] - FATAL: Thread 'flink-akka.actor.default-dispatcher-2' produced an uncaught exception. Stopping the process... java.util.concurrent.CompletionException: org.apache.flink.util.FlinkRuntimeException: Failed to start the operator coordinators at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) ~[?:1.8.0_251] at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) ~[?:1.8.0_251] at java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:722) ~[?:1.8.0_251] at java.util.concurrent.CompletableFuture.uniRunStage(CompletableFuture.java:731) ~[?:1.8.0_251] at java.util.concurrent.CompletableFuture.thenRun(CompletableFuture.java:2023) ~[?:1.8.0_251] at org.apache.flink.runtime.jobmaster.JobMaster.resetAndStartScheduler(JobMaster.java:935) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:801) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.jobmaster.JobMaster.lambda$start$1(JobMaster.java:357) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleCallAsync(AkkaRpcActor.java:383) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:88) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:154) ~[flink-dists-extended_2.11-1.12.0.jar:?] at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) [flink-dists-extended_2.11-1.12.0.jar:?] at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21) [flink-dists-extended_2.11-1.12.0.jar:?] at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) [flink-dists-extended_2.11-1.12.0.jar:?] at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) [flink-dists-extended_2.11-1.12.0.jar:?] at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.actor.Actor$class.aroundReceive(Actor.scala:517) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.actor.ActorCell.invoke(ActorCell.scala:561) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.dispatch.Mailbox.run(Mailbox.scala:225) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.dispatch.Mailbox.exec(Mailbox.scala:235) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) [flink-dists-extended_2.11-1.12.0.jar:?] at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [flink-dists-extended_2.11-1.12.0.jar:?] Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to start the operator coordinators at org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1100) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) ~[flink-dists-extended_2.11-1.12.0.jar:?] at java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) ~[?:1.8.0_251] ... 27 more Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.hadoop.hive.common.ValidReadTxnList.readFromString(ValidReadTxnList.java:142) ~[?:?] at org.apache.hadoop.hive.common.ValidReadTxnList.<init>(ValidReadTxnList.java:57) ~[?:?] at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$Context.<init>(OrcInputFormat.java:421) ~[?:?] at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:983) ~[?:?] at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048) ~[?:?] at org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:86) ~[?:?] at org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:57) ~[?:?] at org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:140) ~[flink-table_2.11-1.12.0.jar:1.12.0] at org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:115) ~[?:?] at org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:119) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:308) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:72) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:182) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1094) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) ~[flink-dists-extended_2.11-1.12.0.jar:?] at org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) ~[flink-dists-extended_2.11-1.12.0.jar:?] at java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) ~[?:1.8.0_251] ... 27 more 2021-01-24 04:41:24,963 INFO org.apache.flink.runtime.blob.BlobServer [] - Stopped BLOB server at 0.0.0.0:13146 |
此外,还有我发现Parquet格式是可以的,顺便看了下FlinkStreamConnector种,FileSink的ParquetBulkFomart。
然后文档讲到ParquetAvroWriters,这种格式写的文件对应hive表怎么创建?貌似默认stored as parquet的话,不带任何avro的信息呀。 赵一旦 <[hidden email]> 于2021年1月24日周日 上午6:45写道: > 补充(1)FlinkSQL的查询,对于分区字符串字段貌似必须加'',不加就查询不到?如上hour=02这种直接导致no more split。 > 其次(2)去除这部分问题后,目前可以发现有split了,但是报了orc相关的错误。并且提交SQL会导致JM直接失败。JM日志如下: > > 2021-01-24 04:41:24,952 ERROR org.apache.flink.runtime.util.FatalExitExceptionHandler [] - FATAL: Thread 'flink-akka.actor.default-dispatcher-2' produced an uncaught exception. Stopping the process... > > java.util.concurrent.CompletionException: org.apache.flink.util.FlinkRuntimeException: Failed to start the operator coordinators > at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) ~[?:1.8.0_251] > at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) ~[?:1.8.0_251] > at java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:722) ~[?:1.8.0_251] > at java.util.concurrent.CompletableFuture.uniRunStage(CompletableFuture.java:731) ~[?:1.8.0_251] > at java.util.concurrent.CompletableFuture.thenRun(CompletableFuture.java:2023) ~[?:1.8.0_251] > at org.apache.flink.runtime.jobmaster.JobMaster.resetAndStartScheduler(JobMaster.java:935) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:801) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.jobmaster.JobMaster.lambda$start$1(JobMaster.java:357) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleCallAsync(AkkaRpcActor.java:383) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:88) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:154) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) [flink-dists-extended_2.11-1.12.0.jar:?] > at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21) [flink-dists-extended_2.11-1.12.0.jar:?] > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) [flink-dists-extended_2.11-1.12.0.jar:?] > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) [flink-dists-extended_2.11-1.12.0.jar:?] > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.Actor$class.aroundReceive(Actor.scala:517) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.ActorCell.invoke(ActorCell.scala:561) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.Mailbox.run(Mailbox.scala:225) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [flink-dists-extended_2.11-1.12.0.jar:?] > Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to start the operator coordinators > at org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1100) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) ~[?:1.8.0_251] > ... 27 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at org.apache.hadoop.hive.common.ValidReadTxnList.readFromString(ValidReadTxnList.java:142) ~[?:?] > at org.apache.hadoop.hive.common.ValidReadTxnList.<init>(ValidReadTxnList.java:57) ~[?:?] > at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$Context.<init>(OrcInputFormat.java:421) ~[?:?] > at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:983) ~[?:?] > at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048) ~[?:?] > at org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:86) ~[?:?] > at org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:57) ~[?:?] > at org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:140) ~[flink-table_2.11-1.12.0.jar:1.12.0] > at org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:115) ~[?:?] > at org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:119) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:308) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:72) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:182) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1094) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) ~[flink-dists-extended_2.11-1.12.0.jar:?] > at java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) ~[?:1.8.0_251] > ... 27 more > 2021-01-24 04:41:24,963 INFO org.apache.flink.runtime.blob.BlobServer [] - Stopped BLOB server at 0.0.0.0:13146 > > > |
In reply to this post by nobleyd
你好,
关于分区字段的filter,flink与hive的隐式类型转换规则不同,建议在写where条件时按照分区字段的类型来指定常量。 关于读ORC的异常,请问你的hive版本是多少呢?另外hive配置中是否指定过hive.txn.valid.txns参数? On Sun, Jan 24, 2021 at 6:45 AM 赵一旦 <[hidden email]> wrote: > 补充(1)FlinkSQL的查询,对于分区字符串字段貌似必须加'',不加就查询不到?如上hour=02这种直接导致no more split。 > 其次(2)去除这部分问题后,目前可以发现有split了,但是报了orc相关的错误。并且提交SQL会导致JM直接失败。JM日志如下: > > 2021-01-24 04:41:24,952 ERROR > org.apache.flink.runtime.util.FatalExitExceptionHandler [] - > FATAL: Thread 'flink-akka.actor.default-dispatcher-2' produced an > uncaught exception. Stopping the process... > > java.util.concurrent.CompletionException: > org.apache.flink.util.FlinkRuntimeException: Failed to start the > operator coordinators > at > java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) > ~[?:1.8.0_251] > at > java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) > ~[?:1.8.0_251] > at > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:722) > ~[?:1.8.0_251] > at > java.util.concurrent.CompletableFuture.uniRunStage(CompletableFuture.java:731) > ~[?:1.8.0_251] > at > java.util.concurrent.CompletableFuture.thenRun(CompletableFuture.java:2023) > ~[?:1.8.0_251] > at > org.apache.flink.runtime.jobmaster.JobMaster.resetAndStartScheduler(JobMaster.java:935) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:801) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.jobmaster.JobMaster.lambda$start$1(JobMaster.java:357) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleCallAsync(AkkaRpcActor.java:383) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:88) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:154) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.japi.pf > .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.Actor$class.aroundReceive(Actor.scala:517) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.actor.ActorCell.invoke(ActorCell.scala:561) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.Mailbox.run(Mailbox.scala:225) > [flink-dists-extended_2.11-1.12.0.jar:?] > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > [flink-dists-extended_2.11-1.12.0.jar:?] > at > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > [flink-dists-extended_2.11-1.12.0.jar:?] > Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to > start the operator coordinators > at > org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1100) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) > ~[?:1.8.0_251] > ... 27 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.common.ValidReadTxnList.readFromString(ValidReadTxnList.java:142) > ~[?:?] > at > org.apache.hadoop.hive.common.ValidReadTxnList.<init>(ValidReadTxnList.java:57) > ~[?:?] > at org.apache.hadoop.hive.ql.io > .orc.OrcInputFormat$Context.<init>(OrcInputFormat.java:421) > ~[?:?] > at org.apache.hadoop.hive.ql.io > .orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:983) > ~[?:?] > at org.apache.hadoop.hive.ql.io > .orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048) > ~[?:?] > at > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:86) > ~[?:?] > at > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:57) > ~[?:?] > at > org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:140) > ~[flink-table_2.11-1.12.0.jar:1.12.0] > at > org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:115) > ~[?:?] > at > org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:119) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:308) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:72) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:182) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1094) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) > ~[flink-dists-extended_2.11-1.12.0.jar:?] > at > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) > ~[?:1.8.0_251] > ... 27 more > 2021-01-24 04:41:24,963 INFO org.apache.flink.runtime.blob.BlobServer > [] - Stopped BLOB server at 0.0.0.0:13146 > -- Best regards! Rui Li |
我hive版本应该是1.2.1,我看spark部分依赖的1.2.1的包。
此外,关于hive配置。此处我需要问下,flink集群需要有hive的依赖嘛是?我flink集群本身没加任何hive依赖。 只是在flink的sql-client启动的时候通过-l参数指定了部分包,这个包是基于flink官网文档给的那个flink-sql-connector-hive-1.2.2。 此外,在flink-client端的conf中(即classpath中)加了hive-site.xml配置,内部也仅指定了最基础的一些关于metastore的连接信息。 Rui Li <[hidden email]> 于2021年1月25日周一 上午11:32写道: > 你好, > > 关于分区字段的filter,flink与hive的隐式类型转换规则不同,建议在写where条件时按照分区字段的类型来指定常量。 > 关于读ORC的异常,请问你的hive版本是多少呢?另外hive配置中是否指定过hive.txn.valid.txns参数? > > On Sun, Jan 24, 2021 at 6:45 AM 赵一旦 <[hidden email]> wrote: > > > 补充(1)FlinkSQL的查询,对于分区字符串字段貌似必须加'',不加就查询不到?如上hour=02这种直接导致no more split。 > > 其次(2)去除这部分问题后,目前可以发现有split了,但是报了orc相关的错误。并且提交SQL会导致JM直接失败。JM日志如下: > > > > 2021-01-24 04:41:24,952 ERROR > > org.apache.flink.runtime.util.FatalExitExceptionHandler [] - > > FATAL: Thread 'flink-akka.actor.default-dispatcher-2' produced an > > uncaught exception. Stopping the process... > > > > java.util.concurrent.CompletionException: > > org.apache.flink.util.FlinkRuntimeException: Failed to start the > > operator coordinators > > at > > > java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) > > ~[?:1.8.0_251] > > at > > > java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) > > ~[?:1.8.0_251] > > at > > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:722) > > ~[?:1.8.0_251] > > at > > > java.util.concurrent.CompletableFuture.uniRunStage(CompletableFuture.java:731) > > ~[?:1.8.0_251] > > at > > > java.util.concurrent.CompletableFuture.thenRun(CompletableFuture.java:2023) > > ~[?:1.8.0_251] > > at > > > org.apache.flink.runtime.jobmaster.JobMaster.resetAndStartScheduler(JobMaster.java:935) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:801) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.jobmaster.JobMaster.lambda$start$1(JobMaster.java:357) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleCallAsync(AkkaRpcActor.java:383) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:88) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:154) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.japi.pf > > .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.actor.Actor$class.aroundReceive(Actor.scala:517) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.actor.ActorCell.invoke(ActorCell.scala:561) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.dispatch.Mailbox.run(Mailbox.scala:225) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > > [flink-dists-extended_2.11-1.12.0.jar:?] > > Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to > > start the operator coordinators > > at > > > org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1100) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) > > ~[?:1.8.0_251] > > ... 27 more > > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > > at > > > org.apache.hadoop.hive.common.ValidReadTxnList.readFromString(ValidReadTxnList.java:142) > > ~[?:?] > > at > > > org.apache.hadoop.hive.common.ValidReadTxnList.<init>(ValidReadTxnList.java:57) > > ~[?:?] > > at org.apache.hadoop.hive.ql.io > > .orc.OrcInputFormat$Context.<init>(OrcInputFormat.java:421) > > ~[?:?] > > at org.apache.hadoop.hive.ql.io > > .orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:983) > > ~[?:?] > > at org.apache.hadoop.hive.ql.io > > .orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048) > > ~[?:?] > > at > > > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:86) > > ~[?:?] > > at > > > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:57) > > ~[?:?] > > at > > > org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:140) > > ~[flink-table_2.11-1.12.0.jar:1.12.0] > > at > > > org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:115) > > ~[?:?] > > at > > > org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:119) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:308) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:72) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:182) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1094) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > > org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) > > ~[flink-dists-extended_2.11-1.12.0.jar:?] > > at > > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) > > ~[?:1.8.0_251] > > ... 27 more > > 2021-01-24 04:41:24,963 INFO org.apache.flink.runtime.blob.BlobServer > > [] - Stopped BLOB server at 0.0.0.0:13146 > > > > > -- > Best regards! > Rui Li > |
基于这个回答,还有另一个英文email中有个人说我的hive-common和hive-exec不一致的问题。
我分析下来,我本身没依赖任何hive-common、hive-exec。唯一引入可能是flink-sql-connector-hive-1.2.2_2.11_1.12.0中的,我看了pom,其中orc的依赖部分是去除了hive的依赖的。然后我还单独引了一个flink-sql-orc的包。刚刚分析了下,本社flink-sql-orc这个包是为了引入orc相关依赖,而这个包中是没有去除orc的传递依赖hive的。我目前尝试了个方案,居然OK了,拿出来大家分析下能确定原因最好,不能就算了,反正问题估计是可以解决了我。 解决方式是直接不再依赖flink-sql-orc那个包。因为本身我按照官方文档加了flink-sql-connector-hive的包,同时我分析了这个包内已经shade了orc的包。因为并不需要单独搞一个flink-sql-orc的包。刚刚初步试了下,没问题。还没继续多实验。 赵一旦 <[hidden email]> 于2021年1月25日周一 下午12:59写道: > 我hive版本应该是1.2.1,我看spark部分依赖的1.2.1的包。 > 此外,关于hive配置。此处我需要问下,flink集群需要有hive的依赖嘛是?我flink集群本身没加任何hive依赖。 > > 只是在flink的sql-client启动的时候通过-l参数指定了部分包,这个包是基于flink官网文档给的那个flink-sql-connector-hive-1.2.2。 > > 此外,在flink-client端的conf中(即classpath中)加了hive-site.xml配置,内部也仅指定了最基础的一些关于metastore的连接信息。 > > Rui Li <[hidden email]> 于2021年1月25日周一 上午11:32写道: > >> 你好, >> >> 关于分区字段的filter,flink与hive的隐式类型转换规则不同,建议在写where条件时按照分区字段的类型来指定常量。 >> 关于读ORC的异常,请问你的hive版本是多少呢?另外hive配置中是否指定过hive.txn.valid.txns参数? >> >> On Sun, Jan 24, 2021 at 6:45 AM 赵一旦 <[hidden email]> wrote: >> >> > 补充(1)FlinkSQL的查询,对于分区字符串字段貌似必须加'',不加就查询不到?如上hour=02这种直接导致no more split。 >> > 其次(2)去除这部分问题后,目前可以发现有split了,但是报了orc相关的错误。并且提交SQL会导致JM直接失败。JM日志如下: >> > >> > 2021-01-24 04:41:24,952 ERROR >> > org.apache.flink.runtime.util.FatalExitExceptionHandler [] - >> > FATAL: Thread 'flink-akka.actor.default-dispatcher-2' produced an >> > uncaught exception. Stopping the process... >> > >> > java.util.concurrent.CompletionException: >> > org.apache.flink.util.FlinkRuntimeException: Failed to start the >> > operator coordinators >> > at >> > >> java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) >> > ~[?:1.8.0_251] >> > at >> > >> java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) >> > ~[?:1.8.0_251] >> > at >> > >> java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:722) >> > ~[?:1.8.0_251] >> > at >> > >> java.util.concurrent.CompletableFuture.uniRunStage(CompletableFuture.java:731) >> > ~[?:1.8.0_251] >> > at >> > >> java.util.concurrent.CompletableFuture.thenRun(CompletableFuture.java:2023) >> > ~[?:1.8.0_251] >> > at >> > >> org.apache.flink.runtime.jobmaster.JobMaster.resetAndStartScheduler(JobMaster.java:935) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:801) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.jobmaster.JobMaster.lambda$start$1(JobMaster.java:357) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleCallAsync(AkkaRpcActor.java:383) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:88) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:154) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.japi.pf >> .UnitCaseStatement.apply(CaseStatements.scala:26) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.japi.pf >> .UnitCaseStatement.apply(CaseStatements.scala:21) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.japi.pf >> > .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.actor.ActorCell.invoke(ActorCell.scala:561) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.dispatch.Mailbox.run(Mailbox.scala:225) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >> > [flink-dists-extended_2.11-1.12.0.jar:?] >> > Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to >> > start the operator coordinators >> > at >> > >> org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1100) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) >> > ~[?:1.8.0_251] >> > ... 27 more >> > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 >> > at >> > >> org.apache.hadoop.hive.common.ValidReadTxnList.readFromString(ValidReadTxnList.java:142) >> > ~[?:?] >> > at >> > >> org.apache.hadoop.hive.common.ValidReadTxnList.<init>(ValidReadTxnList.java:57) >> > ~[?:?] >> > at org.apache.hadoop.hive.ql.io >> > .orc.OrcInputFormat$Context.<init>(OrcInputFormat.java:421) >> > ~[?:?] >> > at org.apache.hadoop.hive.ql.io >> > .orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:983) >> > ~[?:?] >> > at org.apache.hadoop.hive.ql.io >> > .orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048) >> > ~[?:?] >> > at >> > >> org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:86) >> > ~[?:?] >> > at >> > >> org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:57) >> > ~[?:?] >> > at >> > >> org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:140) >> > ~[flink-table_2.11-1.12.0.jar:1.12.0] >> > at >> > >> org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:115) >> > ~[?:?] >> > at >> > >> org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:119) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:308) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:72) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:182) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1094) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >> > at >> > >> java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) >> > ~[?:1.8.0_251] >> > ... 27 more >> > 2021-01-24 04:41:24,963 INFO org.apache.flink.runtime.blob.BlobServer >> > [] - Stopped BLOB server at 0.0.0.0:13146 >> > >> >> >> -- >> Best regards! >> Rui Li >> > |
这个其实还是挺乱的,我看了下hive-storage-api貌似也不是肯定没用。
我基于flink的data-stream-api的filesink方式写hive,orc格式文件。引入的是flink-orc包,内部依赖hive-storage-api中。这个我刚刚尝试去除换成hive-exec等,结果不行,因为少了部分类,比如 MapColumnVector等。 不过之前测试过写入没问题。所以看样子我data-stream写hive的时候是需要依赖flink-orc包,也就简介引入了hive-storage-api包,这是必须的。 但flink-sql-client启动的时候-l指定的部分,不可以有flink-sql-orc的包。因为和flink-connector-hive中包冲突? 相当于不同地方使用还不一样。 赵一旦 <[hidden email]> 于2021年1月25日周一 下午1:44写道: > 基于这个回答,还有另一个英文email中有个人说我的hive-common和hive-exec不一致的问题。 > > 我分析下来,我本身没依赖任何hive-common、hive-exec。唯一引入可能是flink-sql-connector-hive-1.2.2_2.11_1.12.0中的,我看了pom,其中orc的依赖部分是去除了hive的依赖的。然后我还单独引了一个flink-sql-orc的包。刚刚分析了下,本社flink-sql-orc这个包是为了引入orc相关依赖,而这个包中是没有去除orc的传递依赖hive的。我目前尝试了个方案,居然OK了,拿出来大家分析下能确定原因最好,不能就算了,反正问题估计是可以解决了我。 > > > 解决方式是直接不再依赖flink-sql-orc那个包。因为本身我按照官方文档加了flink-sql-connector-hive的包,同时我分析了这个包内已经shade了orc的包。因为并不需要单独搞一个flink-sql-orc的包。刚刚初步试了下,没问题。还没继续多实验。 > > > 赵一旦 <[hidden email]> 于2021年1月25日周一 下午12:59写道: > >> 我hive版本应该是1.2.1,我看spark部分依赖的1.2.1的包。 >> 此外,关于hive配置。此处我需要问下,flink集群需要有hive的依赖嘛是?我flink集群本身没加任何hive依赖。 >> >> 只是在flink的sql-client启动的时候通过-l参数指定了部分包,这个包是基于flink官网文档给的那个flink-sql-connector-hive-1.2.2。 >> >> 此外,在flink-client端的conf中(即classpath中)加了hive-site.xml配置,内部也仅指定了最基础的一些关于metastore的连接信息。 >> >> Rui Li <[hidden email]> 于2021年1月25日周一 上午11:32写道: >> >>> 你好, >>> >>> 关于分区字段的filter,flink与hive的隐式类型转换规则不同,建议在写where条件时按照分区字段的类型来指定常量。 >>> 关于读ORC的异常,请问你的hive版本是多少呢?另外hive配置中是否指定过hive.txn.valid.txns参数? >>> >>> On Sun, Jan 24, 2021 at 6:45 AM 赵一旦 <[hidden email]> wrote: >>> >>> > 补充(1)FlinkSQL的查询,对于分区字符串字段貌似必须加'',不加就查询不到?如上hour=02这种直接导致no more split。 >>> > 其次(2)去除这部分问题后,目前可以发现有split了,但是报了orc相关的错误。并且提交SQL会导致JM直接失败。JM日志如下: >>> > >>> > 2021-01-24 04:41:24,952 ERROR >>> > org.apache.flink.runtime.util.FatalExitExceptionHandler [] - >>> > FATAL: Thread 'flink-akka.actor.default-dispatcher-2' produced an >>> > uncaught exception. Stopping the process... >>> > >>> > java.util.concurrent.CompletionException: >>> > org.apache.flink.util.FlinkRuntimeException: Failed to start the >>> > operator coordinators >>> > at >>> > >>> java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) >>> > ~[?:1.8.0_251] >>> > at >>> > >>> java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) >>> > ~[?:1.8.0_251] >>> > at >>> > >>> java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:722) >>> > ~[?:1.8.0_251] >>> > at >>> > >>> java.util.concurrent.CompletableFuture.uniRunStage(CompletableFuture.java:731) >>> > ~[?:1.8.0_251] >>> > at >>> > >>> java.util.concurrent.CompletableFuture.thenRun(CompletableFuture.java:2023) >>> > ~[?:1.8.0_251] >>> > at >>> > >>> org.apache.flink.runtime.jobmaster.JobMaster.resetAndStartScheduler(JobMaster.java:935) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:801) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.jobmaster.JobMaster.lambda$start$1(JobMaster.java:357) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleCallAsync(AkkaRpcActor.java:383) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:88) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:154) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.japi.pf >>> .UnitCaseStatement.apply(CaseStatements.scala:26) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.japi.pf >>> .UnitCaseStatement.apply(CaseStatements.scala:21) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.japi.pf >>> > .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.actor.Actor$class.aroundReceive(Actor.scala:517) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.actor.ActorCell.invoke(ActorCell.scala:561) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.dispatch.Mailbox.run(Mailbox.scala:225) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) >>> > [flink-dists-extended_2.11-1.12.0.jar:?] >>> > Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to >>> > start the operator coordinators >>> > at >>> > >>> org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1100) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) >>> > ~[?:1.8.0_251] >>> > ... 27 more >>> > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 >>> > at >>> > >>> org.apache.hadoop.hive.common.ValidReadTxnList.readFromString(ValidReadTxnList.java:142) >>> > ~[?:?] >>> > at >>> > >>> org.apache.hadoop.hive.common.ValidReadTxnList.<init>(ValidReadTxnList.java:57) >>> > ~[?:?] >>> > at org.apache.hadoop.hive.ql.io >>> > .orc.OrcInputFormat$Context.<init>(OrcInputFormat.java:421) >>> > ~[?:?] >>> > at org.apache.hadoop.hive.ql.io >>> > .orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:983) >>> > ~[?:?] >>> > at org.apache.hadoop.hive.ql.io >>> > .orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048) >>> > ~[?:?] >>> > at >>> > >>> org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:86) >>> > ~[?:?] >>> > at >>> > >>> org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:57) >>> > ~[?:?] >>> > at >>> > >>> org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:140) >>> > ~[flink-table_2.11-1.12.0.jar:1.12.0] >>> > at >>> > >>> org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:115) >>> > ~[?:?] >>> > at >>> > >>> org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:119) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:308) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:72) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:182) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1094) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) >>> > ~[flink-dists-extended_2.11-1.12.0.jar:?] >>> > at >>> > >>> java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) >>> > ~[?:1.8.0_251] >>> > ... 27 more >>> > 2021-01-24 04:41:24,963 INFO org.apache.flink.runtime.blob.BlobServer >>> > [] - Stopped BLOB server at 0.0.0.0:13146 >>> > >>> >>> >>> -- >>> Best regards! >>> Rui Li >>> >> |
In reply to this post by nobleyd
Hi,
通过-l参数指定flink-sql-connector-hive-1.2.2 jar包是可以的,这个包里有hive-1.2.2的依赖(包含hive-common、hive-exec等),hive相关的依赖仅需要这一个包,不用额外添加orc或者Parquet的依赖了。 关于添加hive-site.xml,建议的方式是通过HiveCatalog的参数来指定 [1]。 从你之前发的stacktrace上来看,确实可能存在hive-common冲突,比如发生异常的ValidReadTxnList.readFromString(ValidReadTxnList.java:142),在hive-1.2.2版本中是没有这一行的 [2]。所以可能是你的classpath中有另外一个版本的hive-common。 [1] https://ci.apache.org/projects/flink/flink-docs-release-1.12/dev/table/connectors/hive/#connecting-to-hive [2] https://github.com/apache/hive/blob/rel/release-1.2.2/common/src/java/org/apache/hadoop/hive/common/ValidReadTxnList.java On Mon, Jan 25, 2021 at 1:44 PM 赵一旦 <[hidden email]> wrote: > 基于这个回答,还有另一个英文email中有个人说我的hive-common和hive-exec不一致的问题。 > > 我分析下来,我本身没依赖任何hive-common、hive-exec。唯一引入可能是flink-sql-connector-hive-1.2.2_2.11_1.12.0中的,我看了pom,其中orc的依赖部分是去除了hive的依赖的。然后我还单独引了一个flink-sql-orc的包。刚刚分析了下,本社flink-sql-orc这个包是为了引入orc相关依赖,而这个包中是没有去除orc的传递依赖hive的。我目前尝试了个方案,居然OK了,拿出来大家分析下能确定原因最好,不能就算了,反正问题估计是可以解决了我。 > > > 解决方式是直接不再依赖flink-sql-orc那个包。因为本身我按照官方文档加了flink-sql-connector-hive的包,同时我分析了这个包内已经shade了orc的包。因为并不需要单独搞一个flink-sql-orc的包。刚刚初步试了下,没问题。还没继续多实验。 > > > 赵一旦 <[hidden email]> 于2021年1月25日周一 下午12:59写道: > > > 我hive版本应该是1.2.1,我看spark部分依赖的1.2.1的包。 > > 此外,关于hive配置。此处我需要问下,flink集群需要有hive的依赖嘛是?我flink集群本身没加任何hive依赖。 > > > > > 只是在flink的sql-client启动的时候通过-l参数指定了部分包,这个包是基于flink官网文档给的那个flink-sql-connector-hive-1.2.2。 > > > > > 此外,在flink-client端的conf中(即classpath中)加了hive-site.xml配置,内部也仅指定了最基础的一些关于metastore的连接信息。 > > > > Rui Li <[hidden email]> 于2021年1月25日周一 上午11:32写道: > > > >> 你好, > >> > >> 关于分区字段的filter,flink与hive的隐式类型转换规则不同,建议在写where条件时按照分区字段的类型来指定常量。 > >> 关于读ORC的异常,请问你的hive版本是多少呢?另外hive配置中是否指定过hive.txn.valid.txns参数? > >> > >> On Sun, Jan 24, 2021 at 6:45 AM 赵一旦 <[hidden email]> wrote: > >> > >> > 补充(1)FlinkSQL的查询,对于分区字符串字段貌似必须加'',不加就查询不到?如上hour=02这种直接导致no more > split。 > >> > 其次(2)去除这部分问题后,目前可以发现有split了,但是报了orc相关的错误。并且提交SQL会导致JM直接失败。JM日志如下: > >> > > >> > 2021-01-24 04:41:24,952 ERROR > >> > org.apache.flink.runtime.util.FatalExitExceptionHandler [] - > >> > FATAL: Thread 'flink-akka.actor.default-dispatcher-2' produced an > >> > uncaught exception. Stopping the process... > >> > > >> > java.util.concurrent.CompletionException: > >> > org.apache.flink.util.FlinkRuntimeException: Failed to start the > >> > operator coordinators > >> > at > >> > > >> > java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) > >> > ~[?:1.8.0_251] > >> > at > >> > > >> > java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) > >> > ~[?:1.8.0_251] > >> > at > >> > > >> > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:722) > >> > ~[?:1.8.0_251] > >> > at > >> > > >> > java.util.concurrent.CompletableFuture.uniRunStage(CompletableFuture.java:731) > >> > ~[?:1.8.0_251] > >> > at > >> > > >> > java.util.concurrent.CompletableFuture.thenRun(CompletableFuture.java:2023) > >> > ~[?:1.8.0_251] > >> > at > >> > > >> > org.apache.flink.runtime.jobmaster.JobMaster.resetAndStartScheduler(JobMaster.java:935) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:801) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.jobmaster.JobMaster.lambda$start$1(JobMaster.java:357) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleCallAsync(AkkaRpcActor.java:383) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:199) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:88) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:154) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.japi.pf > >> .UnitCaseStatement.apply(CaseStatements.scala:26) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.japi.pf > >> .UnitCaseStatement.apply(CaseStatements.scala:21) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.japi.pf > >> > .UnitCaseStatement.applyOrElse(CaseStatements.scala:21) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.actor.Actor$class.aroundReceive(Actor.scala:517) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.actor.ActorCell.invoke(ActorCell.scala:561) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.dispatch.Mailbox.run(Mailbox.scala:225) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at akka.dispatch.Mailbox.exec(Mailbox.scala:235) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > >> > [flink-dists-extended_2.11-1.12.0.jar:?] > >> > Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to > >> > start the operator coordinators > >> > at > >> > > >> > org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1100) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) > >> > ~[?:1.8.0_251] > >> > ... 27 more > >> > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > >> > at > >> > > >> > org.apache.hadoop.hive.common.ValidReadTxnList.readFromString(ValidReadTxnList.java:142) > >> > ~[?:?] > >> > at > >> > > >> > org.apache.hadoop.hive.common.ValidReadTxnList.<init>(ValidReadTxnList.java:57) > >> > ~[?:?] > >> > at org.apache.hadoop.hive.ql.io > >> > .orc.OrcInputFormat$Context.<init>(OrcInputFormat.java:421) > >> > ~[?:?] > >> > at org.apache.hadoop.hive.ql.io > >> > .orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:983) > >> > ~[?:?] > >> > at org.apache.hadoop.hive.ql.io > >> > .orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048) > >> > ~[?:?] > >> > at > >> > > >> > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:86) > >> > ~[?:?] > >> > at > >> > > >> > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:57) > >> > ~[?:?] > >> > at > >> > > >> > org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:140) > >> > ~[flink-table_2.11-1.12.0.jar:1.12.0] > >> > at > >> > > >> > org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:115) > >> > ~[?:?] > >> > at > >> > > >> > org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:119) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:308) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:72) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:182) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.scheduler.SchedulerBase.startAllOperatorCoordinators(SchedulerBase.java:1094) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:567) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:944) > >> > ~[flink-dists-extended_2.11-1.12.0.jar:?] > >> > at > >> > > >> > java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) > >> > ~[?:1.8.0_251] > >> > ... 27 more > >> > 2021-01-24 04:41:24,963 INFO org.apache.flink.runtime.blob.BlobServer > >> > [] - Stopped BLOB server at 0.0.0.0:13146 > >> > > >> > >> > >> -- > >> Best regards! > >> Rui Li > >> > > > -- Best regards! Rui Li |
Free forum by Nabble | Edit this page |