How to Use Big Query to Find Max Sequence Number

2023/05/054 min read
bookmark this
Responsive image

Assume you have these type of data sets, [1,2,3,4,5], [1,2,3,4,5,6,7], [1,2,3,4,5,6,7,8] and all with key as xxx, you want to find each array's max value from SQL.

Create Sample Data

First let's create the sample data set, for key xxx, we want to get the 5,8,9.

with base_data as
(
select '1' as id, 'xxx' as key union all
select '2' as id, 'xxx' as key union all
select '3' as id, 'xxx' as key union all
select '4' as id, 'xxx' as key union all
select '5' as id, 'xxx' as key union all
select '1' as id, 'xxx' as key union all
select '2' as id, 'xxx' as key union all
select '3' as id, 'xxx' as key union all
select '4' as id, 'xxx' as key union all
select '5' as id, 'xxx' as key union all
select '6' as id, 'xxx' as key union all
select '7' as id, 'xxx' as key union all
select '8' as id, 'xxx' as key union all
select '1' as id, 'xxx' as key union all
select '2' as id, 'xxx' as key union all
select '3' as id, 'xxx' as key union all
select '4' as id, 'xxx' as key union all
select '5' as id, 'xxx' as key union all
select '6' as id, 'xxx' as key union all
select '7' as id, 'xxx' as key union all
select '8' as id, 'xxx' as key union all
select '9' as id, 'xxx' as key union all
select '1' as id, 'yyy' as key union all
select '2' as id, 'yyy' as key union all
select '3' as id, 'yyy' as key union all
select '4' as id, 'yyy' as key union all
select '5' as id, 'yyy' as key union all
select '6' as id, 'yyy' as key union all
select '1' as id, 'zzz' as key
)

Find list of sequence number with join with string_agg

This will return sequence number as string.

, data_agg_find_sequence_no as (
select
(split(
string_agg(id, '_')
, '1_'
)) as pre_id
, key
from base_data
group by key

Find the last number as max

Here, substr the last 2 character as max number of that sequence.

, data_get_max_no_from_sequence as (
select
replace(
substr(x, -2)
, '_'
, '') as id
, key
from data_agg_find_sequence_no, unnest(pre_id) as x

Final Script

This's the full script, should be able to see aggregated key with max number from the sequence number.

with base_data as
(
select '1' as id, 'xxx' as key union all
select '2' as id, 'xxx' as key union all
select '3' as id, 'xxx' as key union all
select '4' as id, 'xxx' as key union all
select '5' as id, 'xxx' as key union all
select '1' as id, 'xxx' as key union all
select '2' as id, 'xxx' as key union all
select '3' as id, 'xxx' as key union all
select '4' as id, 'xxx' as key union all
select '5' as id, 'xxx' as key union all
select '6' as id, 'xxx' as key union all
select '7' as id, 'xxx' as key union all
select '8' as id, 'xxx' as key union all
select '1' as id, 'xxx' as key union all
select '2' as id, 'xxx' as key union all
select '3' as id, 'xxx' as key union all
select '4' as id, 'xxx' as key union all
select '5' as id, 'xxx' as key union all
select '6' as id, 'xxx' as key union all
select '7' as id, 'xxx' as key union all
select '8' as id, 'xxx' as key union all
select '9' as id, 'xxx' as key union all
select '1' as id, 'yyy' as key union all
select '2' as id, 'yyy' as key union all
select '3' as id, 'yyy' as key union all
select '4' as id, 'yyy' as key union all
select '5' as id, 'yyy' as key union all
select '6' as id, 'yyy' as key union all
select '1' as id, 'zzz' as key
)
, data_agg_find_sequence_no as (
select
(split(
string_agg(id, '_')
, '1_'
)) as pre_id
, key
from base_data
group by key
)
, data_get_max_no_from_sequence as (
select
replace(
substr(x, -2)
, '_'
, '') as id
, key
from data_agg_find_sequence_no, unnest(pre_id) as x
)
select * from data_get_max_no_from_sequence
where id != '';