Skip to content

Commit

Permalink
[CALCITE-6044] RelMetadataQuery should regard single-row relational e…
Browse files Browse the repository at this point in the history
…xpressions as unique

A single-row relation can result from a LIMIT 1 or an
aggregation without GROUP BY. Every column in one of these
relations should be unique by virtue of having a max row
count of 1.

When joining with a single-row relation on a key field, the
join result should no longer require that key field for
uniqueness. For example, suppose the emp table had a composite
key (empno,hiredate). If we join on hiredate=max(hiredate)
then empno alone should be a unique column.

Close apache#3495
  • Loading branch information
PaulJackson123 authored and julianhyde committed Jan 13, 2024
1 parent ad2a4e6 commit b2be7e1
Show file tree
Hide file tree
Showing 13 changed files with 718 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,21 @@ public interface UniqueKeys extends Metadata {
* represented as an {@link org.apache.calcite.util.ImmutableBitSet}, where
* each bit position represents a 0-based output column ordinal.
*
* <p>Note that a unique key plus other columns is still unique.
* Therefore, all columns are unique in a table with a unique key
* consisting of the empty set, as is the case for zero-row and
* single-row tables. The converse is not true: a table with all
* columns unique does necessary have the empty set as a key -
* that is never true with multi-row tables.
*
* <p>Nulls can be ignored if the relational expression has filtered out
* null values.
*
* @param ignoreNulls if true, ignore null values when determining
* whether the keys are unique
* @return set of keys, or null if this information cannot be determined
* (whereas empty set indicates definitely no keys at all)
* (whereas empty set indicates definitely no keys at all, and a set
* containing the empty set implies every column is unique)
*/
@Nullable Set<ImmutableBitSet> getUniqueKeys(boolean ignoreNulls);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.calcite.plan.RelOptPredicateList;
import org.apache.calcite.plan.volcano.RelSubset;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelShuttleImpl;
import org.apache.calcite.rel.SingleRel;
import org.apache.calcite.rel.convert.Converter;
import org.apache.calcite.rel.core.Aggregate;
Expand All @@ -37,26 +38,35 @@
import org.apache.calcite.rel.core.TableModify;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.core.Values;
import org.apache.calcite.rel.logical.LogicalFilter;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexProgram;
import org.apache.calcite.rex.RexSlot;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;

import org.apache.commons.lang3.mutable.MutableBoolean;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;

import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
* RelMdColumnUniqueness supplies a default implementation of
Expand All @@ -68,6 +78,15 @@ public class RelMdColumnUniqueness
ReflectiveRelMetadataProvider.reflectiveSource(
new RelMdColumnUniqueness(), BuiltInMetadata.ColumnUniqueness.Handler.class);

/**
* The set of aggregate functions A such that if x is unique then A(x) will also be unique.
* An aggregate function with this property is called 'passthrough'. This quality is not
* guaranteed in the presence of an OVER clause. NOTE: if a multi-argument function is added,
* methods that use this Set must be enhanced to select the appropriate column to pass through.
*/
static final Set<SqlKind> PASSTHROUGH_AGGREGATIONS =
ImmutableSet.of(SqlKind.MIN, SqlKind.MAX, SqlKind.ANY_VALUE);

//~ Constructors -----------------------------------------------------------

private RelMdColumnUniqueness() {}
Expand Down Expand Up @@ -151,6 +170,10 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,

public @Nullable Boolean areColumnsUnique(Sort rel, RelMetadataQuery mq,
ImmutableBitSet columns, boolean ignoreNulls) {
Double maxRowCount = mq.getMaxRowCount(rel);
if (maxRowCount != null && maxRowCount <= 1.0d) {
return true;
}
columns = decorateWithConstantColumnsFromPredicates(columns, rel, mq);
return mq.areColumnsUnique(rel.getInput(), columns, ignoreNulls);
}
Expand Down Expand Up @@ -266,21 +289,13 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,
}
}

// If no columns can affect uniqueness, then return unknown
if (childColumns.cardinality() == 0) {
return null;
}

return mq.areColumnsUnique(rel.getInput(), childColumns.build(),
ignoreNulls);
}

public @Nullable Boolean areColumnsUnique(Join rel, RelMetadataQuery mq,
ImmutableBitSet columns, boolean ignoreNulls) {
columns = decorateWithConstantColumnsFromPredicates(columns, rel, mq);
if (columns.cardinality() == 0) {
return false;
}

final RelNode left = rel.getLeft();
final RelNode right = rel.getRight();
Expand All @@ -290,13 +305,13 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,
return mq.areColumnsUnique(left, columns, ignoreNulls);
}

final int leftColumnCount = rel.getLeft().getRowType().getFieldCount();
// Divide up the input column mask into column masks for the left and
// right sides of the join
final Pair<ImmutableBitSet, ImmutableBitSet> leftAndRightColumns =
splitLeftAndRightColumns(rel.getLeft().getRowType().getFieldCount(),
columns);
final ImmutableBitSet leftColumns = leftAndRightColumns.left;
final ImmutableBitSet rightColumns = leftAndRightColumns.right;
splitLeftAndRightColumns(leftColumnCount, columns);
ImmutableBitSet leftColumns = leftAndRightColumns.left;
ImmutableBitSet rightColumns = leftAndRightColumns.right;

// for FULL OUTER JOIN if columns contain column from both inputs it is not
// guaranteed that the result will be unique
Expand All @@ -305,6 +320,18 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,
return false;
}

final JoinInfo joinInfo = rel.analyzeCondition();

// Joining with a singleton constrains the keys on the other table
final Double rightMaxRowCount = mq.getMaxRowCount(right);
if (rightMaxRowCount != null && rightMaxRowCount <= 1.0) {
leftColumns = leftColumns.union(joinInfo.leftSet());
}
final Double leftMaxRowCount = mq.getMaxRowCount(left);
if (leftMaxRowCount != null && leftMaxRowCount <= 1.0) {
rightColumns = rightColumns.union(joinInfo.rightSet());
}

// If the original column mask contains columns from both the left and
// right hand side, then the columns are unique if and only if they're
// unique for their respective join inputs
Expand All @@ -325,7 +352,6 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,
// the columns are unique for the entire join if they're unique for
// the corresponding join input, provided that input is not null
// generating.
final JoinInfo joinInfo = rel.analyzeCondition();
if (leftColumns.cardinality() > 0) {
if (rel.getJoinType().generatesNullsOnLeft()) {
return false;
Expand All @@ -348,11 +374,15 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,
return leftJoinColsUnique && rightUnique;
}

throw new AssertionError();
return false;
}

public @Nullable Boolean areColumnsUnique(Aggregate rel, RelMetadataQuery mq,
ImmutableBitSet columns, boolean ignoreNulls) {
Double maxRowCount = mq.getMaxRowCount(rel);
if (maxRowCount != null && maxRowCount <= 1.0d) {
return true;
}
if (Aggregate.isSimple(rel) || ignoreNulls) {
columns = decorateWithConstantColumnsFromPredicates(columns, rel, mq);
// group by keys form a unique key
Expand All @@ -364,6 +394,22 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,
return false;
}

if (Aggregate.isSimple(rel)) {
// Map columns to input columns
ImmutableBitSet inputCols = ImmutableBitSet.builder()
.addAll(columns.intersect(rel.getGroupSet()))
.addAll(columns.toList()
.stream()
.map(col -> col - rel.getGroupSet().length())
.filter(col -> col >= 0)
.map(col -> rel.getAggCallList().get(col))
.filter(call -> PASSTHROUGH_AGGREGATIONS.contains(call.getAggregation().getKind()))
.map(call -> call.getArgList().get(0))
.collect(Collectors.toSet()))
.build();
return mq.areColumnsUnique(rel.getInput(), inputCols, ignoreNulls);
}

final ImmutableBitSet commonKeys = columns.intersect(groupKey);
if (commonKeys.isEmpty()) {
return false;
Expand All @@ -380,10 +426,10 @@ public Boolean areColumnsUnique(Intersect rel, RelMetadataQuery mq,

public Boolean areColumnsUnique(Values rel, RelMetadataQuery mq,
ImmutableBitSet columns, boolean ignoreNulls) {
columns = decorateWithConstantColumnsFromPredicates(columns, rel, mq);
if (rel.tuples.size() < 2) {
return true;
}
columns = decorateWithConstantColumnsFromPredicates(columns, rel, mq);
final Set<List<Comparable>> set = new HashSet<>();
final List<Comparable> values = new ArrayList<>(columns.cardinality());
for (ImmutableList<RexLiteral> tuple : rel.tuples) {
Expand Down Expand Up @@ -478,17 +524,74 @@ private static ImmutableBitSet decorateWithConstantColumnsFromPredicates(
ImmutableBitSet checkingColumns, RelNode rel, RelMetadataQuery mq) {
final RelOptPredicateList predicates = mq.getPulledUpPredicates(rel);
if (!RelOptPredicateList.isEmpty(predicates)) {
final Set<Integer> constantIndexes = new HashSet<>();
predicates.constantMap.keySet().forEach(rex -> {
if (rex instanceof RexInputRef) {
constantIndexes.add(((RexInputRef) rex).getIndex());
}
});
ImmutableBitSet constantIndexes = getConstantColumnSet(predicates);
if (!constantIndexes.isEmpty()) {
return checkingColumns.union(ImmutableBitSet.of(constantIndexes));
}
}
// If no constant columns deduced, return the original "checkingColumns".
return checkingColumns;
}

/**
* Returns the set of columns that are set to a constant literal or a scalar query (as
* in a correlated subquery). Examples of constants are {@code x} in the following:
* <pre>SELECT x FROM table WHERE x = 5</pre>
* and
* <pre>SELECT x, y FROM table WHERE x = (SELECT MAX(x) FROM table)</pre>
*
* <p>NOTE: Subqueries that reference correlating variables are not considered constant:
* <pre>SELECT x, y FROM table A WHERE x = (SELECT MAX(x) FROM table B WHERE A.y = B.y)</pre>
*/
static ImmutableBitSet getConstantColumnSet(RelOptPredicateList relOptPredicateList) {
ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
relOptPredicateList.constantMap.keySet()
.stream()
.filter(RexInputRef.class::isInstance)
.map(RexInputRef.class::cast)
.map(RexSlot::getIndex)
.forEach(builder::set);

relOptPredicateList.pulledUpPredicates.forEach(rex -> {
if (rex.getKind() == SqlKind.EQUALS
|| rex.getKind() == SqlKind.IS_NOT_DISTINCT_FROM) {
List<RexNode> ops = ((RexCall) rex).getOperands();
RexNode op0 = ops.get(0);
RexNode op1 = ops.get(1);
addInputRefIfOtherConstant(builder, op0, op1);
addInputRefIfOtherConstant(builder, op1, op0);
}
});

return builder.build();
}

private static void addInputRefIfOtherConstant(ImmutableBitSet.Builder builder, RexNode inputRef,
RexNode other) {
if (inputRef instanceof RexInputRef
&& (other.getKind() == SqlKind.LITERAL || isConstantScalarQuery(other))) {
builder.set(((RexInputRef) inputRef).getIndex());
}
}

/**
* Returns whether the supplied {@link RexNode} is a constant scalar subquery - one that does not
* reference any correlating variables.
*/
private static boolean isConstantScalarQuery(RexNode rexNode) {
if (rexNode.getKind() == SqlKind.SCALAR_QUERY) {
MutableBoolean hasCorrelatingVars = new MutableBoolean(false);
((RexSubQuery) rexNode).rel.accept(new RelShuttleImpl() {
@Override public RelNode visit(final LogicalFilter filter) {
if (RexUtil.containsCorrelation(filter.getCondition())) {
hasCorrelatingVars.setTrue();
return filter;
}
return super.visit(filter);
}
});
return hasCorrelatingVars.isFalse();
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.calcite.rel.core.TableModify;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rel.core.Values;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexExecutor;
Expand Down Expand Up @@ -76,10 +77,12 @@
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static java.util.Objects.requireNonNull;

Expand Down Expand Up @@ -365,7 +368,7 @@ public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) {
// it is not valid to pull up predicates. In particular, consider the
// predicate "false": it is valid on all input rows (trivially - there are
// no rows!) but not on the output (there is one row).
return RelOptPredicateList.EMPTY;
return RelOptPredicateList.of(rexBuilder, aggPullUpPredicates);
}
Mapping m =
Mappings.create(MappingType.PARTIAL_FUNCTION,
Expand Down Expand Up @@ -533,6 +536,44 @@ public RelOptPredicateList getPredicates(Exchange exchange,
return mq.getPulledUpPredicates(input);
}

/**
* Infers predicates for a Values.
*
* <p>The predicates on {@code T (x, y, z)} with rows
* {@code (1, 2, null), (1, 2, null), (5, 2, null)} are {@code 'y = 2'} and {@code 'z is null'}.
*/
public RelOptPredicateList getPredicates(Values values, RelMetadataQuery mq) {
ImmutableList<ImmutableList<RexLiteral>> tuples = values.tuples;
if (tuples.size() > 0) {
Set<Integer> constants = new HashSet<>();
IntStream.range(0, tuples.size()).boxed().forEach(constants::add);
List<RexLiteral> firstTuple = new ArrayList<>(tuples.get(0));
for (ImmutableList<RexLiteral> tuple : tuples) {
if (constants.isEmpty()) {
break;
}
for (int i = 0; i < tuple.size(); i++) {
if (!Objects.equals(tuple.get(i), firstTuple.get(i))) {
constants.remove(i);
}
}
}
RexBuilder rexBuilder = values.getCluster().getRexBuilder();
List<RexNode> predicates = new ArrayList<>();
for (int i = 0; i < firstTuple.size(); i++) {
if (constants.contains(i)) {
RexLiteral literal = firstTuple.get(i);
predicates.add(
rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
rexBuilder.makeInputRef(literal.getType(), i),
literal));
}
}
return RelOptPredicateList.of(rexBuilder, predicates);
}
return RelOptPredicateList.EMPTY;
}

// CHECKSTYLE: IGNORE 1
/**
* Returns the
Expand Down
Loading

0 comments on commit b2be7e1

Please sign in to comment.