Skip to content

Commit

Permalink
Transform OR-clauses to SAOP's during index matching
Browse files Browse the repository at this point in the history
Replace "(indexkey op C1) OR (indexkey op C2) ... (indexkey op CN)" with
"indexkey op ANY(ARRAY[C1, C2, ...])" (ScalarArrayOpExpr node) during matching
a clause to index.

Here Ci is an i-th constant or parameters expression, 'expr' is non-constant
expression, 'op' is an operator which returns boolean result and has a commuter
(for the case of reverse order of constant and non-constant parts of the
expression, like 'Cn op expr').

This transformation allows handling long OR-clauses with single IndexScan
avoiding slower bitmap scans.

Discussion: https://postgr.es/m/567ED6CA.2040504%40sigaev.ru
Author: Alena Rybakina <lena.ribackina@yandex.ru>
Author: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
Reviewed-by: Peter Geoghegan <pg@bowt.ie>
Reviewed-by: Ranier Vilela <ranier.vf@gmail.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Jian He <jian.universality@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Nikolay Shaplov <dhyan@nataraj.su>

to  Transform OR-clauses
  • Loading branch information
akorotkov authored and pashkinelfe committed Nov 6, 2024
1 parent 6aeb4e4 commit 7a4d7ca
Show file tree
Hide file tree
Showing 11 changed files with 729 additions and 23 deletions.
281 changes: 277 additions & 4 deletions src/backend/optimizer/path/indxpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "access/stratnum.h"
#include "access/sysattr.h"
#include "catalog/pg_am.h"
#include "catalog/pg_amop.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
Expand All @@ -32,8 +33,10 @@
#include "optimizer/paths.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"


/* XXX see PartCollMatchesExprColl */
Expand Down Expand Up @@ -167,6 +170,10 @@ static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
IndexOptInfo *index);
static IndexClause *match_orclause_to_indexcol(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
IndexOptInfo *index);
static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
Expand Down Expand Up @@ -2186,7 +2193,10 @@ match_clause_to_index(PlannerInfo *root,
* (3) must match the collation of the index, if collation is relevant.
*
* Our definition of "const" is exceedingly liberal: we allow anything that
* doesn't involve a volatile function or a Var of the index's relation.
* doesn't involve a volatile function or a Var of the index's relation
* except for a boolean OR expression input: due to a trade-off between the
* expected execution speedup and planning complexity, we limit or->saop
* transformation by obvious cases when an index scan can get a profit.
* In particular, Vars belonging to other relations of the query are
* accepted here, since a clause of that form can be used in a
* parameterized indexscan. It's the responsibility of higher code levels
Expand Down Expand Up @@ -2216,6 +2226,10 @@ match_clause_to_index(PlannerInfo *root,
* It is also possible to match ScalarArrayOpExpr clauses to indexes, when
* the clause is of the form "indexkey op ANY (arrayconst)".
*
* It is also possible to match a list of OR clauses if it might be
* transformed into a single ScalarArrayOpExpr clause. On success,
* the returning index clause will contain a trasformed clause.
*
* For boolean indexes, it is also possible to match the clause directly
* to the indexkey; or perhaps the clause is (NOT indexkey).
*
Expand Down Expand Up @@ -2265,9 +2279,9 @@ match_clause_to_indexcol(PlannerInfo *root,
}

/*
* Clause must be an opclause, funcclause, ScalarArrayOpExpr, or
* RowCompareExpr. Or, if the index supports it, we can handle IS
* NULL/NOT NULL clauses.
* Clause must be an opclause, funcclause, ScalarArrayOpExpr,
* RowCompareExpr, or OR-clause that could be converted to SAOP. Or, if
* the index supports it, we can handle IS NULL/NOT NULL clauses.
*/
if (IsA(clause, OpExpr))
{
Expand All @@ -2285,6 +2299,10 @@ match_clause_to_indexcol(PlannerInfo *root,
{
return match_rowcompare_to_indexcol(root, rinfo, indexcol, index);
}
else if (restriction_is_or_clause(rinfo))
{
return match_orclause_to_indexcol(root, rinfo, indexcol, index);
}
else if (index->amsearchnulls && IsA(clause, NullTest))
{
NullTest *nt = (NullTest *) clause;
Expand Down Expand Up @@ -2808,6 +2826,261 @@ match_rowcompare_to_indexcol(PlannerInfo *root,
return NULL;
}

/*
* match_orclause_to_indexcol()
* Handles the OR-expr case for match_clause_to_indexcol() in the case
* when it could be transformed to ScalarArrayOpExpr.
*
* Given a list of OR-clause args, attempts to transform this BoolExpr into
* a single SAOP expression. On success, returns an IndexClause, containing
* the transformed expression or NULL, if failed.
*/
static IndexClause *
match_orclause_to_indexcol(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
IndexOptInfo *index)
{
ListCell *lc;
BoolExpr *orclause = (BoolExpr *) rinfo->orclause;
Node *indexExpr = NULL;
List *consts = NIL;
Node *arrayNode = NULL;
ScalarArrayOpExpr *saopexpr = NULL;
Oid matchOpno = InvalidOid;
IndexClause *iclause;
Oid consttype = InvalidOid;
Oid arraytype = InvalidOid;
Oid inputcollid = InvalidOid;
bool firstTime = true;
bool have_param = false;

Assert(IsA(orclause, BoolExpr));
Assert(orclause->boolop == OR_EXPR);

/*
* Try to convert a list of OR-clauses to a single SAOP expression. Each
* OR entry must be in the form: (indexkey operator constant) or (constant
* operator indexkey). Operators of all the entries must match. Constant
* might be either Const or Param. To be effective, give up on the first
* non-matching entry. Exit is implemented as a break from the loop, which
* is catched afterwards.
*/
foreach(lc, orclause->args)
{
RestrictInfo *subRinfo;
OpExpr *subClause;
Oid opno;
Node *leftop,
*rightop;
Node *constExpr;

if (!IsA(lfirst(lc), RestrictInfo))
break;

subRinfo = (RestrictInfo *) lfirst(lc);

/* Only operator clauses can match */
if (!IsA(subRinfo->clause, OpExpr))
break;

subClause = (OpExpr *) subRinfo->clause;
opno = subClause->opno;

/* Only binary operators can match */
if (list_length(subClause->args) != 2)
break;

/*
* The parameters below must match between sub-rinfo and its parent as
* make_restrictinfo() fills them with the same values, and further
* modifications are also the same for the whole subtree. However,
* still make a sanity check.
*/
Assert(subRinfo->is_pushed_down == rinfo->is_pushed_down);
Assert(subRinfo->is_clone == rinfo->is_clone);
Assert(subRinfo->security_level == rinfo->security_level);
Assert(bms_equal(subRinfo->incompatible_relids, rinfo->incompatible_relids));
Assert(bms_equal(subRinfo->outer_relids, rinfo->outer_relids));

/*
* Also, check that required_relids in sub-rinfo is subset of parent's
* required_relids.
*/
Assert(bms_is_subset(subRinfo->required_relids, rinfo->required_relids));

/* Only operator returning boolean suits the transformation */
if (get_op_rettype(opno) != BOOLOID)
break;

/*
* Check for clauses of the form: (indexkey operator constant) or
* (constant operator indexkey). Determine indexkey side first, check
* the constant later.
*/
leftop = (Node *) linitial(subClause->args);
rightop = (Node *) lsecond(subClause->args);
if (match_index_to_operand(leftop, indexcol, index))
{
indexExpr = leftop;
constExpr = rightop;
}
else if (match_index_to_operand(rightop, indexcol, index))
{
opno = get_commutator(opno);
if (!OidIsValid(opno))
{
/* commutator doesn't exist, we can't reverse the order */
break;
}
indexExpr = rightop;
constExpr = leftop;
}
else
{
break;
}

/*
* Ignore any RelabelType node above the operands. This is needed to
* be able to apply indexscanning in binary-compatible-operator cases.
* Note: we can assume there is at most one RelabelType node;
* eval_const_expressions() will have simplified if more than one.
*/
if (IsA(constExpr, RelabelType))
constExpr = (Node *) ((RelabelType *) constExpr)->arg;
if (IsA(indexExpr, RelabelType))
indexExpr = (Node *) ((RelabelType *) indexExpr)->arg;

/* We allow constant to be Const or Param */
if (!IsA(constExpr, Const) && !IsA(constExpr, Param))
break;

/* Forbid transformation for composite types, records. */
if (type_is_rowtype(exprType(constExpr)) ||
type_is_rowtype(exprType(indexExpr)))
break;

/*
* Save information about the operator, type, and collation for the
* first matching qual. Then, check that subsequent quals match the
* first.
*/
if (firstTime)
{
matchOpno = opno;
consttype = exprType(constExpr);
arraytype = get_array_type(consttype);
inputcollid = subClause->inputcollid;

/*
* Check that the operator is presented in the opfamily and that
* the expression collation matches the index collation. Also,
* there must be an array type to construct an array later.
*/
if (!IndexCollMatchesExprColl(index->indexcollations[indexcol], inputcollid) ||
!op_in_opfamily(matchOpno, index->opfamily[indexcol]) ||
!OidIsValid(arraytype))
break;
firstTime = false;
}
else
{
if (opno != matchOpno ||
inputcollid != subClause->inputcollid ||
consttype != exprType(constExpr))
break;
}

if (IsA(constExpr, Param))
have_param = true;
consts = lappend(consts, constExpr);
}

/*
* Catch the break from the loop above. Normally, a foreach() loop ends
* up with a NULL list cell. A non-NULL list cell indicates a break from
* the foreach() loop. Free the consts list and return NULL then.
*/
if (lc != NULL)
{
list_free(consts);
return NULL;
}

/*
* Assemble an array from the list of constants. It seems more profitable
* to build a const array. But in the presence of parameters, we don't
* have a specific value here and must employ an ArrayExpr instead.
*/

if (have_param)
{
ArrayExpr *arrayExpr = makeNode(ArrayExpr);

/* array_collid will be set by parse_collate.c */
arrayExpr->element_typeid = consttype;
arrayExpr->array_typeid = arraytype;
arrayExpr->multidims = false;
arrayExpr->elements = consts;
arrayExpr->location = -1;

arrayNode = (Node *) arrayExpr;
}
else
{
int16 typlen;
bool typbyval;
char typalign;
Datum *elems;
int i = 0;
ArrayType *arrayConst;

get_typlenbyvalalign(consttype, &typlen, &typbyval, &typalign);

elems = (Datum *) palloc(sizeof(Datum) * list_length(consts));
foreach(lc, consts)
elems[i++] = ((Const *) lfirst(lc))->constvalue;

arrayConst = construct_array(elems, i, consttype,
typlen, typbyval, typalign);
arrayNode = (Node *) makeConst(arraytype, -1, inputcollid,
-1, PointerGetDatum(arrayConst),
false, false);

pfree(elems);
list_free(consts);
}

/* Build the SAOP expression node */
saopexpr = makeNode(ScalarArrayOpExpr);
saopexpr->opno = matchOpno;
saopexpr->opfuncid = get_opcode(matchOpno);
saopexpr->hashfuncid = InvalidOid;
saopexpr->negfuncid = InvalidOid;
saopexpr->useOr = true;
saopexpr->inputcollid = inputcollid;
saopexpr->args = list_make2(indexExpr, arrayNode);
saopexpr->location = -1;

/*
* Finally, build an IndexClause based on the SAOP node. Use
* make_simple_restrictinfo() to get RestrictInfo with clean selectivity
* estimations because it may differ from the estimation made for an OR
* clause. Although it is not a lossy expression, keep the old version of
* rinfo in iclause->rinfo to detect duplicates and recheck the original
* clause.
*/
iclause = makeNode(IndexClause);
iclause->rinfo = rinfo;
iclause->indexquals = list_make1(make_simple_restrictinfo(root,
&saopexpr->xpr));
iclause->lossy = false;
iclause->indexcol = indexcol;
iclause->indexcols = NIL;
return iclause;
}

/*
* expand_indexqual_rowcompare --- expand a single indexqual condition
* that is a RowCompareExpr
Expand Down
Loading

0 comments on commit 7a4d7ca

Please sign in to comment.