有没有人看到这个邮件?大概跑10小时左右数据就会出现;
困扰了好几天 我的SQL语句如下,部分字段省略 select a.contact_id, ... a.code_contact_channel from ( select contact_id, service_no, ... code_contact_channel, row_number() over(partition by contact_id,service_no order by operate_time desc) as rn from table1 )a join ( select contact_id , mobile_no, ... row_number() over(partition by contact_id,mobile_no order by create_time desc) as rn from table2 )b on a.contact_id = b.contact_id and a.service_no = b.mobile_no where a.rn = 1 and b.rn = 1 ; 程序部署在yarn上运行几个小时后就会出现错误日志显示如下: [flink-akka.actor.default-dispatcher-8695] INFO org.apache.flink.runtime.executiongraph.ExecutionGraph - Rank(strategy=[AppendFastStrategy], rankType=[ROW_NUMBER], rankRange=[rankStart=1, rankEnd=1], partitionBy=[contact_id, service_no], orderBy=[operate_time DESC], select=[operate_time, contact_id.....]) -> Calc(select=[contact_id, start_time, contact_length, service_no...code_contact_channel]) (1/1) (52b8519ad9a44832a283c1760f385bf6) switched from RUNNING to FAILED. java.lang.ArrayIndexOutOfBoundsException: -1 at java.util.ArrayList.elementData(ArrayList.java:422) at java.util.ArrayList.remove(ArrayList.java:499) at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElementWithoutRowNumber(AppendOnlyTopNFunction.java:205) at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElement(AppendOnlyTopNFunction.java:120) at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElement(AppendOnlyTopNFunction.java:46) at org.apache.flink.streaming.api.operators.KeyedProcessOperator.processElement(KeyedProcessOperator.java:85) at org.apache.flink.streaming.runtime.tasks.OneInputStreamTask$StreamTaskNetworkOutput.emitRecord(OneInputStreamTask.java:173) at org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.processElement(StreamTaskNetworkInput.java:151) at org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.emitNext(StreamTaskNetworkInput.java:128) at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:69) at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:311) at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:187) at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:487) at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:470) at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:707) at org.apache.flink.runtime.taskmanager.Task.run(Task.java:532) at java.lang.Thread.run(Thread.java:748) [flink-akka.actor.default-dispatcher-8695] INFO org.apache.flink.runtime.executiongraph.failover.flip1.RestartPipelinedRegionStrategy - Calculating tasks to restart to recover the failed task 28aa070d07f48addbf378d6ee01a29c6_0. topn函数flinkSQL已经支持很久了,我不清楚为什么会产生这种错误,希望各位老师给出意见 |
Hi,
云邪已经确认这是一个bug,开了issue来跟进这个问题 https://issues.apache.org/jira/browse/FLINK-17625 > -----原始邮件----- > 发件人: 1101300123 <[hidden email]> > 发送时间: 2020-05-14 09:19:41 (星期四) > 收件人: "[hidden email]" <[hidden email]> > 抄送: > 主题: 1.10 使用 flinkSQL 的row_number()函数实现top1 出现数组越界,求助社区大佬 > > 有没有人看到这个邮件?大概跑10小时左右数据就会出现; > 困扰了好几天 > 我的SQL语句如下,部分字段省略 > select > a.contact_id, > ... > a.code_contact_channel > from > ( > select > contact_id, > service_no, > ... > code_contact_channel, > row_number() over(partition by contact_id,service_no order by operate_time desc) as rn > from > table1 > )a > join ( > select > contact_id , > mobile_no, > ... > row_number() over(partition by contact_id,mobile_no order by create_time desc) as rn > from > table2 )b > on > a.contact_id = b.contact_id > and a.service_no = b.mobile_no > where > a.rn = 1 > and b.rn = 1 ; > 程序部署在yarn上运行几个小时后就会出现错误日志显示如下: > > > [flink-akka.actor.default-dispatcher-8695] INFO org.apache.flink.runtime.executiongraph.ExecutionGraph - Rank(strategy=[AppendFastStrategy], rankType=[ROW_NUMBER], rankRange=[rankStart=1, rankEnd=1], partitionBy=[contact_id, service_no], orderBy=[operate_time DESC], select=[operate_time, contact_id.....]) -> Calc(select=[contact_id, start_time, contact_length, service_no...code_contact_channel]) (1/1) (52b8519ad9a44832a283c1760f385bf6) switched from RUNNING to FAILED. > java.lang.ArrayIndexOutOfBoundsException: -1 > at java.util.ArrayList.elementData(ArrayList.java:422) > at java.util.ArrayList.remove(ArrayList.java:499) > at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElementWithoutRowNumber(AppendOnlyTopNFunction.java:205) > at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElement(AppendOnlyTopNFunction.java:120) > at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElement(AppendOnlyTopNFunction.java:46) > at org.apache.flink.streaming.api.operators.KeyedProcessOperator.processElement(KeyedProcessOperator.java:85) > at org.apache.flink.streaming.runtime.tasks.OneInputStreamTask$StreamTaskNetworkOutput.emitRecord(OneInputStreamTask.java:173) > at org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.processElement(StreamTaskNetworkInput.java:151) > at org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.emitNext(StreamTaskNetworkInput.java:128) > at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:69) > at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:311) > at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:187) > at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:487) > at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:470) > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:707) > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:532) > at java.lang.Thread.run(Thread.java:748) > [flink-akka.actor.default-dispatcher-8695] INFO org.apache.flink.runtime.executiongraph.failover.flip1.RestartPipelinedRegionStrategy - Calculating tasks to restart to recover the failed task 28aa070d07f48addbf378d6ee01a29c6_0. > > > topn函数flinkSQL已经支持很久了,我不清楚为什么会产生这种错误,希望各位老师给出意见 > ------------------------------ 刘大龙 浙江大学 控制系 智能系统与控制研究所 工控新楼217 地址:浙江省杭州市浙大路38号浙江大学玉泉校区 Tel:18867547281 |
好的,期待修复,项目快上线了,有点慌,自己也在看相关片段代码
在2020年5月14日 09:43,刘大龙<[hidden email]> 写道: Hi, 云邪已经确认这是一个bug,开了issue来跟进这个问题 https://issues.apache.org/jira/browse/FLINK-17625 -----原始邮件----- 发件人: 1101300123 <[hidden email]> 发送时间: 2020-05-14 09:19:41 (星期四) 收件人: "[hidden email]" <[hidden email]> 抄送: 主题: 1.10 使用 flinkSQL 的row_number()函数实现top1 出现数组越界,求助社区大佬 有没有人看到这个邮件?大概跑10小时左右数据就会出现; 困扰了好几天 我的SQL语句如下,部分字段省略 select a.contact_id, ... a.code_contact_channel from ( select contact_id, service_no, ... code_contact_channel, row_number() over(partition by contact_id,service_no order by operate_time desc) as rn from table1 )a join ( select contact_id , mobile_no, ... row_number() over(partition by contact_id,mobile_no order by create_time desc) as rn from table2 )b on a.contact_id = b.contact_id and a.service_no = b.mobile_no where a.rn = 1 and b.rn = 1 ; 程序部署在yarn上运行几个小时后就会出现错误日志显示如下: [flink-akka.actor.default-dispatcher-8695] INFO org.apache.flink.runtime.executiongraph.ExecutionGraph - Rank(strategy=[AppendFastStrategy], rankType=[ROW_NUMBER], rankRange=[rankStart=1, rankEnd=1], partitionBy=[contact_id, service_no], orderBy=[operate_time DESC], select=[operate_time, contact_id.....]) -> Calc(select=[contact_id, start_time, contact_length, service_no...code_contact_channel]) (1/1) (52b8519ad9a44832a283c1760f385bf6) switched from RUNNING to FAILED. java.lang.ArrayIndexOutOfBoundsException: -1 at java.util.ArrayList.elementData(ArrayList.java:422) at java.util.ArrayList.remove(ArrayList.java:499) at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElementWithoutRowNumber(AppendOnlyTopNFunction.java:205) at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElement(AppendOnlyTopNFunction.java:120) at org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction.processElement(AppendOnlyTopNFunction.java:46) at org.apache.flink.streaming.api.operators.KeyedProcessOperator.processElement(KeyedProcessOperator.java:85) at org.apache.flink.streaming.runtime.tasks.OneInputStreamTask$StreamTaskNetworkOutput.emitRecord(OneInputStreamTask.java:173) at org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.processElement(StreamTaskNetworkInput.java:151) at org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.emitNext(StreamTaskNetworkInput.java:128) at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:69) at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:311) at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:187) at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:487) at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:470) at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:707) at org.apache.flink.runtime.taskmanager.Task.run(Task.java:532) at java.lang.Thread.run(Thread.java:748) [flink-akka.actor.default-dispatcher-8695] INFO org.apache.flink.runtime.executiongraph.failover.flip1.RestartPipelinedRegionStrategy - Calculating tasks to restart to recover the failed task 28aa070d07f48addbf378d6ee01a29c6_0. topn函数flinkSQL已经支持很久了,我不清楚为什么会产生这种错误,希望各位老师给出意见 ------------------------------ 刘大龙 浙江大学 控制系 智能系统与控制研究所 工控新楼217 地址:浙江省杭州市浙大路38号浙江大学玉泉校区 Tel:18867547281 |
Free forum by Nabble | Edit this page |