Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -802,4 +802,121 @@ mod tests {
Some(QueryError::SpatialDurationSmall),
);
}

// ── Multi-projection SELECT (group cols + aggregate) ─────────────────────
//
// ClickHouse and standard SQL allow `SELECT g1, g2, agg(v) FROM t GROUP BY g1, g2`
// (one row per group with the grouping keys included alongside the aggregate).
// The pattern parser must also accept this shape and produce the same structural
// SQLQueryData as the single-projection form `SELECT agg(v) FROM t GROUP BY g1, g2`.

#[test]
fn test_multi_projection_groupcols_then_aggregate() {
let query = parse_sql_query(
"SELECT L1, L2, L3, L4, SUM(value) FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
GROUP BY L1, L2, L3, L4",
)
.expect("multi-projection SELECT with group cols + aggregate should parse");
assert_eq!(query.metric, "cpu_usage");
assert_eq!(query.aggregation_info.get_name(), "SUM");
assert!(query.labels.contains("L1"));
assert!(query.labels.contains("L4"));
}

#[test]
fn test_multi_projection_aggregate_first() {
let query = parse_sql_query(
"SELECT SUM(value), L1, L2, L3, L4 FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
GROUP BY L1, L2, L3, L4",
)
.expect("aggregate-first multi-projection SELECT should parse");
assert_eq!(query.aggregation_info.get_name(), "SUM");
}

#[test]
fn test_multi_projection_quantile_clickhouse_syntax() {
// The exact shape of the user's netflow query: ClickHouse parametric quantile
// with grouping columns alongside the aggregate in SELECT.
let query = parse_sql_query(
"SELECT L1, L2, quantile(0.99)(value) AS p99 FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -11, NOW()) AND DATEADD(s, -10, NOW()) \
GROUP BY L1, L2",
)
.expect("multi-projection ClickHouse parametric quantile should parse");
assert_eq!(query.aggregation_info.get_name(), "QUANTILE");
assert_eq!(query.aggregation_info.get_args()[0], "0.99");
}

#[test]
fn test_multi_projection_matches_single_projection_template() {
// A template registered as single-projection should structurally match an
// incoming query that lists the group cols in SELECT alongside the aggregate.
let template = parse_sql_query(
"SELECT SUM(value) FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
GROUP BY L1, L2, L3, L4",
)
.expect("single-projection template should parse");
let incoming = parse_sql_query(
"SELECT L1, L2, L3, L4, SUM(value) FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' \
GROUP BY L1, L2, L3, L4",
)
.expect("multi-projection incoming should parse");
assert!(incoming.matches_sql_pattern(&template));
}

#[test]
fn test_multi_projection_rejects_two_aggregates() {
// Two aggregate functions in the projection list — the parser only tracks one
// statistic so this must be rejected to avoid silently dropping one.
assert!(parse_sql_query(
"SELECT SUM(value), AVG(value), L1 FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
GROUP BY L1",
)
.is_none());
}

#[test]
fn test_multi_projection_rejects_arbitrary_expr() {
// Non-identifier, non-function projection items (computed expressions, literals, …)
// are not supported by the pattern model and must be rejected.
assert!(parse_sql_query(
"SELECT (L1 + 1), SUM(value) FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
GROUP BY L1",
)
.is_none());
}

#[test]
fn test_multi_projection_rejects_select_col_not_in_groupby() {
// L2 is in SELECT but not in GROUP BY. Standard SQL rejects this; we must too,
// otherwise the column would be silently dropped from the output.
assert!(parse_sql_query(
"SELECT L2, SUM(value) FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
GROUP BY L1",
)
.is_none());
}

#[test]
fn test_multi_projection_accepts_select_subset_of_groupby() {
// SELECT lists a subset of group-by keys (L1) while the GROUP BY uses two
// (L1, L2). Allowed: every SELECT identifier is in GROUP BY; the remaining
// group-by key is just absent from the projection.
let query = parse_sql_query(
"SELECT L1, SUM(value) FROM cpu_usage \
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
GROUP BY L1, L2",
)
.expect("SELECT subset of GROUP BY should parse");
assert!(query.labels.contains("L1"));
assert!(query.labels.contains("L2"));
assert_eq!(query.aggregation_info.get_name(), "SUM");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ impl SQLPatternParser {

let group_bys = self.get_groupbys(select)?;

if !self.select_identifiers_subset_of(select, &group_bys) {
return None;
}

if !has_subquery {
let time_info = self.get_time_info(select, &metric)?;

Expand Down Expand Up @@ -126,6 +130,9 @@ impl SQLPatternParser {
SetExpr::Select(inner_select) => {
let inner_aggregation = self.get_aggregation(inner_select)?;
let inner_group_bys = self.get_groupbys(inner_select)?;
if !self.select_identifiers_subset_of(inner_select, &inner_group_bys) {
return None;
}
let time_info = self.get_time_info(inner_select, &metric)?;

Some(Box::new(SQLQueryData {
Expand Down Expand Up @@ -210,89 +217,122 @@ impl SQLPatternParser {
}
}

/// Returns true iff every non-aggregate identifier in `select.projection` is
/// also present in `group_bys`. Used to reject queries like
/// `SELECT srcip, SUM(v) FROM t GROUP BY proto`, where standard SQL would
/// require `srcip` to appear in the GROUP BY clause; without this check the
/// pattern parser would silently drop `srcip` from the output.
fn select_identifiers_subset_of(&self, select: &Select, group_bys: &HashSet<String>) -> bool {
for item in &select.projection {
let expr = match item {
SelectItem::UnnamedExpr(expr) => expr,
SelectItem::ExprWithAlias { expr, .. } => expr,
_ => continue,
};
if let Expr::Identifier(ident) = expr {
if !group_bys.contains(&ident.value) {
return false;
}
}
}
true
}

fn get_aggregation(&self, select: &Select) -> Option<AggregationInfo> {
if select.projection.len() != 1 {
return None;
// Find the (single) aggregate function in the projection list. Other
// projection items must be plain column references — these are expected to
// be group-by keys (e.g. `SELECT g1, g2, SUM(v) FROM t GROUP BY g1, g2`).
// Anything else (multiple aggregates, computed expressions, literals, *)
// is rejected since the structural pattern model only tracks one statistic.
let mut agg_func: Option<&Function> = None;
for item in &select.projection {
let expr = match item {
SelectItem::UnnamedExpr(expr) => expr,
SelectItem::ExprWithAlias { expr, .. } => expr,
_ => return None,
};
match expr {
Expr::Function(f) => {
if agg_func.is_some() {
return None;
}
agg_func = Some(f);
}
Expr::Identifier(_) | Expr::CompoundIdentifier(_) => {}
_ => return None,
}
}
let func = agg_func?;

match &select.projection[0] {
SelectItem::UnnamedExpr(Expr::Function(func))
| SelectItem::ExprWithAlias {
expr: Expr::Function(func),
..
} => {
let name = func.name.to_string().to_uppercase();

let args = self.get_quantile_args(func);

// Get the column being aggregated
let col = match &func.args {
FunctionArguments::None => return None,
FunctionArguments::Subquery(_) => return None,
FunctionArguments::List(func_args) => {
if name == "QUANTILE" {
if let FunctionArguments::List(params) = &func.parameters {
if !params.args.is_empty() {
// ClickHouse parametric syntax: quantile(0.95)(column)
// Column is the sole argument in func.args.
match func_args.args.first() {
Some(FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::Identifier(ident),
))) => ident.value.clone(),
_ => return None,
}
} else {
return None;
}
} else {
// ASAP syntax: QUANTILE(0.95, value) - column is second argument
if func_args.args.len() < 2 {
return None;
}
match &func_args.args[1] {
FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::Identifier(ident),
)) => ident.value.clone(),
_ => return None,
}
}
} else if name == "PERCENTILE" {
// PERCENTILE(value, 95) - column is first argument
if func_args.args.is_empty() {
return None;
}
match &func_args.args[0] {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
ident,
let name = func.name.to_string().to_uppercase();

let args = self.get_quantile_args(func);

// Get the column being aggregated
let col = match &func.args {
FunctionArguments::None => return None,
FunctionArguments::Subquery(_) => return None,
FunctionArguments::List(func_args) => {
if name == "QUANTILE" {
if let FunctionArguments::List(params) = &func.parameters {
if !params.args.is_empty() {
// ClickHouse parametric syntax: quantile(0.95)(column)
// Column is the sole argument in func.args.
match func_args.args.first() {
Some(FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::Identifier(ident),
))) => ident.value.clone(),
_ => return None,
}
} else {
// For other aggregations - column is first argument
if func_args.args.is_empty() {
return None;
}
match &func_args.args[0] {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
ident,
))) => ident.value.clone(),
_ => return None,
}
return None;
}
} else {
// ASAP syntax: QUANTILE(0.95, value) - column is second argument
if func_args.args.len() < 2 {
return None;
}
match &func_args.args[1] {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
ident,
))) => ident.value.clone(),
_ => return None,
}
}
};

// Always store PERCENTILE as QUANTILE internally
let normalized_name = if name == "PERCENTILE" {
"QUANTILE".to_string()
} else if name == "PERCENTILE" {
// PERCENTILE(value, 95) - column is first argument
if func_args.args.is_empty() {
return None;
}
match &func_args.args[0] {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => {
ident.value.clone()
}
_ => return None,
}
} else {
name
};

Some(AggregationInfo::new(normalized_name, col, args))
// For other aggregations - column is first argument
if func_args.args.is_empty() {
return None;
}
match &func_args.args[0] {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => {
ident.value.clone()
}
_ => return None,
}
}
}
_ => None,
}
};

// Always store PERCENTILE as QUANTILE internally
let normalized_name = if name == "PERCENTILE" {
"QUANTILE".to_string()
} else {
name
};

Some(AggregationInfo::new(normalized_name, col, args))
}

fn get_metric(&self, select: &Select) -> Option<(String, bool)> {
Expand Down
Loading