Skip to content

Commit 4f5d80a

Browse files
committed
Better processing and optimization if IN <list> predicates (#7707)
* WIP * Original (circa 2022) implementation of the IN LIST optimization, with some post-fixes and minor adjustments * Make it possible to optimize IN <list> for middle segments in compund indices * Avoid modifying the retrieval structure at runtime, it may be shared among concurrent requests * Simplify the code a little. Better cost calculation. Support both root-based and sibling-based list scans inside the same plan node. * Removed the unneeded const casts and other changed as suggested by Adriano
1 parent c130731 commit 4f5d80a

File tree

19 files changed

+1000
-232
lines changed

19 files changed

+1000
-232
lines changed

src/dsql/BoolNodes.cpp

Lines changed: 295 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "../dsql/gen_proto.h"
4242
#include "../dsql/make_proto.h"
4343
#include "../dsql/pass1_proto.h"
44+
#include "../dsql/DSqlDataTypeUtil.h"
4445

4546
using namespace Firebird;
4647
using namespace Jrd;
@@ -49,8 +50,8 @@ namespace Jrd {
4950

5051

5152
// Maximum members in "IN" list. For eg. SELECT * FROM T WHERE F IN (1, 2, 3, ...)
52-
// Bug 10061, bsriram - 19-Apr-1999
53-
static const int MAX_MEMBER_LIST = 1500;
53+
// Beware: raising the limit beyond the 16-bit boundaries would be an incompatible BLR change.
54+
static const unsigned MAX_MEMBER_LIST = MAX_USHORT;
5455

5556

5657
//--------------------
@@ -306,7 +307,20 @@ ComparativeBoolNode::ComparativeBoolNode(MemoryPool& pool, UCHAR aBlrOp,
306307
arg1(aArg1),
307308
arg2(aArg2),
308309
arg3(aArg3),
309-
dsqlSpecialArg(NULL)
310+
dsqlSpecialArg(nullptr)
311+
{
312+
}
313+
314+
ComparativeBoolNode::ComparativeBoolNode(MemoryPool& pool, UCHAR aBlrOp,
315+
ValueExprNode* aArg1, DsqlFlag aDsqlFlag, ExprNode* aSpecialArg)
316+
: TypedNode<BoolExprNode, ExprNode::TYPE_COMPARATIVE_BOOL>(pool),
317+
blrOp(aBlrOp),
318+
dsqlCheckBoolean(false),
319+
dsqlFlag(aDsqlFlag),
320+
arg1(aArg1),
321+
arg2(nullptr),
322+
arg3(nullptr),
323+
dsqlSpecialArg(aSpecialArg)
310324
{
311325
}
312326

@@ -355,34 +369,35 @@ BoolExprNode* ComparativeBoolNode::dsqlPass(DsqlCompilerScratch* dsqlScratch)
355369

356370
if (dsqlSpecialArg)
357371
{
358-
ValueListNode* listNode = nodeAs<ValueListNode>(dsqlSpecialArg);
359-
if (listNode)
372+
if (const auto listNode = nodeAs<ValueListNode>(dsqlSpecialArg))
360373
{
361-
int listItemCount = 0;
362-
BoolExprNode* resultNode = NULL;
363-
NestConst<ValueExprNode>* ptr = listNode->items.begin();
374+
if (listNode->items.getCount() > MAX_MEMBER_LIST)
375+
{
376+
ERRD_post(Arg::Gds(isc_sqlerr) << Arg::Num(-901) <<
377+
Arg::Gds(isc_imp_exc) <<
378+
Arg::Gds(isc_dsql_too_many_values) << Arg::Num(MAX_MEMBER_LIST));
379+
}
364380

365-
for (const NestConst<ValueExprNode>* const end = listNode->items.end();
366-
ptr != end;
367-
++listItemCount, ++ptr)
381+
if (listNode->items.getCount() == 1)
368382
{
369-
if (listItemCount >= MAX_MEMBER_LIST)
370-
{
371-
ERRD_post(Arg::Gds(isc_sqlerr) << Arg::Num(-901) <<
372-
Arg::Gds(isc_imp_exc) <<
373-
Arg::Gds(isc_dsql_too_many_values) << Arg::Num(MAX_MEMBER_LIST));
374-
}
383+
// Convert A IN (B) into A = B
384+
385+
ComparativeBoolNode* const resultNode = FB_NEW_POOL(dsqlScratch->getPool())
386+
ComparativeBoolNode(dsqlScratch->getPool(),
387+
blr_eql, arg1, listNode->items.front());
375388

376-
ComparativeBoolNode* temp = FB_NEW_POOL(dsqlScratch->getPool()) ComparativeBoolNode(
377-
dsqlScratch->getPool(), blrOp, procArg1, *ptr);
378-
resultNode = PASS1_compose(resultNode, temp, blr_or);
389+
return resultNode->dsqlPass(dsqlScratch);
379390
}
380391

392+
// Generate the IN LIST boolean
393+
394+
InListBoolNode* const resultNode = FB_NEW_POOL(dsqlScratch->getPool())
395+
InListBoolNode(dsqlScratch->getPool(), procArg1, listNode);
396+
381397
return resultNode->dsqlPass(dsqlScratch);
382398
}
383399

384-
SelectExprNode* selNode = nodeAs<SelectExprNode>(dsqlSpecialArg);
385-
if (selNode)
400+
if (const auto selNode = nodeAs<SelectExprNode>(dsqlSpecialArg))
386401
{
387402
fb_assert(!(selNode->dsqlFlags & RecordSourceNode::DFLAG_SINGLETON));
388403
UCHAR newBlrOp = blr_any;
@@ -573,18 +588,13 @@ BoolExprNode* ComparativeBoolNode::pass1(thread_db* tdbb, CompilerScratch* csb)
573588
if ((nodFlags & FLAG_INVARIANT) &&
574589
(!nodeIs<LiteralNode>(arg2) || (arg3 && !nodeIs<LiteralNode>(arg3))))
575590
{
576-
ExprNode* const* ctx_node;
577-
ExprNode* const* end;
578-
579-
for (ctx_node = csb->csb_current_nodes.begin(), end = csb->csb_current_nodes.end();
580-
ctx_node != end; ++ctx_node)
591+
for (const auto& ctxNode : csb->csb_current_nodes)
581592
{
582-
if (nodeAs<RseNode>(*ctx_node))
583-
break;
593+
if (nodeIs<RseNode>(ctxNode))
594+
return this;
584595
}
585596

586-
if (ctx_node >= end)
587-
nodFlags &= ~FLAG_INVARIANT;
597+
nodFlags &= ~FLAG_INVARIANT;
588598
}
589599
}
590600

@@ -1143,6 +1153,260 @@ BoolExprNode* ComparativeBoolNode::createRseNode(DsqlCompilerScratch* dsqlScratc
11431153
//--------------------
11441154

11451155

1156+
static RegisterBoolNode<InListBoolNode> regInListBoolNode({blr_in_list});
1157+
1158+
InListBoolNode::InListBoolNode(MemoryPool& pool, ValueExprNode* aArg, ValueListNode* aList)
1159+
: TypedNode<BoolExprNode, ExprNode::TYPE_IN_LIST_BOOL>(pool),
1160+
arg(aArg),
1161+
list(aList)
1162+
{
1163+
}
1164+
1165+
DmlNode* InListBoolNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
1166+
{
1167+
const auto arg = PAR_parse_value(tdbb, csb);
1168+
1169+
const auto count = csb->csb_blr_reader.getWord();
1170+
const auto list = PAR_args(tdbb, csb, count, count);
1171+
1172+
return FB_NEW_POOL(pool) InListBoolNode(pool, arg, list);
1173+
}
1174+
1175+
string InListBoolNode::internalPrint(NodePrinter& printer) const
1176+
{
1177+
BoolExprNode::internalPrint(printer);
1178+
1179+
NODE_PRINT(printer, blrOp);
1180+
NODE_PRINT(printer, arg);
1181+
NODE_PRINT(printer, list);
1182+
1183+
return "InListBoolNode";
1184+
}
1185+
1186+
BoolExprNode* InListBoolNode::dsqlPass(DsqlCompilerScratch* dsqlScratch)
1187+
{
1188+
const auto procArg = doDsqlPass(dsqlScratch, arg);
1189+
const auto procList = doDsqlPass(dsqlScratch, list);
1190+
1191+
const auto node = FB_NEW_POOL(dsqlScratch->getPool())
1192+
InListBoolNode(dsqlScratch->getPool(), procArg, procList);
1193+
1194+
dsc argDesc;
1195+
DsqlDescMaker::fromNode(dsqlScratch, &argDesc, procArg);
1196+
1197+
dsc listDesc;
1198+
DsqlDescMaker::fromList(dsqlScratch, &listDesc, procList, "IN LIST");
1199+
1200+
if (argDesc.isText() && listDesc.isText())
1201+
{
1202+
const dsc* descs[] = {&argDesc, &listDesc};
1203+
dsc commonDesc;
1204+
DSqlDataTypeUtil(dsqlScratch).makeFromList(&commonDesc, "IN LIST",
1205+
FB_NELEM(descs), descs);
1206+
1207+
if (IS_INTL_DATA(&argDesc) || IS_INTL_DATA(&listDesc))
1208+
{
1209+
const auto charset1 = argDesc.getCharSet();
1210+
const auto charset2 = listDesc.getCharSet();
1211+
1212+
if ((charset1 != CS_BINARY) && (charset2 != CS_BINARY) &&
1213+
((charset1 != CS_ASCII) || (charset2 != CS_ASCII)) &&
1214+
((charset1 != CS_NONE) || (charset2 != CS_NONE)))
1215+
{
1216+
const auto ttype = MAX(argDesc.getTextType(), listDesc.getTextType());
1217+
commonDesc.setTextType(ttype);
1218+
}
1219+
}
1220+
1221+
listDesc = commonDesc;
1222+
}
1223+
1224+
for (auto& item : procList->items)
1225+
{
1226+
const auto desc = item->getDsqlDesc();
1227+
1228+
if (!DSC_EQUIV(&listDesc, &desc, true))
1229+
{
1230+
auto field = FB_NEW_POOL(dsqlScratch->getPool())
1231+
dsql_fld(dsqlScratch->getPool());
1232+
1233+
field->dtype = listDesc.dsc_dtype;
1234+
field->scale = listDesc.dsc_scale;
1235+
field->subType = listDesc.dsc_sub_type;
1236+
field->length = listDesc.dsc_length;
1237+
field->flags = (listDesc.dsc_flags & DSC_nullable) ? FLD_nullable : 0;
1238+
1239+
if (desc.isText() || desc.isBlob())
1240+
{
1241+
field->textType = listDesc.getTextType();
1242+
field->charSetId = listDesc.getCharSet();
1243+
field->collationId = listDesc.getCollation();
1244+
}
1245+
1246+
const auto castNode = FB_NEW_POOL(dsqlScratch->getPool())
1247+
CastNode(dsqlScratch->getPool(), item, field);
1248+
item = castNode->dsqlPass(dsqlScratch);
1249+
}
1250+
}
1251+
1252+
// Try to force arg to be same type as list eg: ? = (FIELD, ...) case
1253+
for (auto item : procList->items)
1254+
PASS1_set_parameter_type(dsqlScratch, node->arg, item, false);
1255+
1256+
// Try to force list to be same type as arg eg: FIELD = (?, ...) case
1257+
for (auto item : procList->items)
1258+
PASS1_set_parameter_type(dsqlScratch, item, node->arg, false);
1259+
1260+
return node;
1261+
}
1262+
1263+
void InListBoolNode::genBlr(DsqlCompilerScratch* dsqlScratch)
1264+
{
1265+
dsqlScratch->appendUChar(blrOp);
1266+
1267+
GEN_expr(dsqlScratch, arg);
1268+
1269+
fb_assert(list->items.getCount() <= MAX_USHORT);
1270+
dsqlScratch->appendUShort(list->items.getCount());
1271+
1272+
for (auto item : list->items)
1273+
GEN_expr(dsqlScratch, item);
1274+
}
1275+
1276+
bool InListBoolNode::dsqlMatch(DsqlCompilerScratch* dsqlScratch, const ExprNode* other, bool ignoreMapCast) const
1277+
{
1278+
if (!BoolExprNode::dsqlMatch(dsqlScratch, other, ignoreMapCast))
1279+
return false;
1280+
1281+
return nodeIs<InListBoolNode>(other);
1282+
}
1283+
1284+
bool InListBoolNode::sameAs(const ExprNode* other, bool ignoreStreams) const
1285+
{
1286+
const auto otherNode = nodeAs<InListBoolNode>(other);
1287+
1288+
if (!otherNode)
1289+
return false;
1290+
1291+
return (arg->sameAs(otherNode->arg, ignoreStreams) &&
1292+
list->sameAs(otherNode->list, ignoreStreams));
1293+
}
1294+
1295+
BoolExprNode* InListBoolNode::copy(thread_db* tdbb, NodeCopier& copier) const
1296+
{
1297+
const auto newArg = copier.copy(tdbb, arg);
1298+
const auto newList = copier.copy(tdbb, list);
1299+
1300+
const auto node = FB_NEW_POOL(*tdbb->getDefaultPool())
1301+
InListBoolNode(*tdbb->getDefaultPool(), newArg, newList);
1302+
node->nodFlags = nodFlags;
1303+
1304+
return node;
1305+
}
1306+
1307+
BoolExprNode* InListBoolNode::pass1(thread_db* tdbb, CompilerScratch* csb)
1308+
{
1309+
doPass1(tdbb, csb, arg.getAddress());
1310+
1311+
nodFlags |= FLAG_INVARIANT;
1312+
csb->csb_current_nodes.push(this);
1313+
1314+
doPass1(tdbb, csb, list.getAddress());
1315+
1316+
csb->csb_current_nodes.pop();
1317+
1318+
if (nodFlags & FLAG_INVARIANT)
1319+
{
1320+
// If there is no top-level RSE present and list items are not constant, unmark node as invariant
1321+
// because it may be dependent on data or variables
1322+
1323+
for (const auto& ctxNode : csb->csb_current_nodes)
1324+
{
1325+
if (nodeIs<RseNode>(ctxNode))
1326+
return this;
1327+
}
1328+
1329+
for (auto item : list->items)
1330+
{
1331+
while (auto castNode = nodeAs<CastNode>(item))
1332+
item = castNode->source;
1333+
1334+
if (!nodeIs<LiteralNode>(item) && !nodeIs<ParameterNode>(item))
1335+
{
1336+
nodFlags &= ~FLAG_INVARIANT;
1337+
break;
1338+
}
1339+
}
1340+
}
1341+
1342+
return this;
1343+
}
1344+
1345+
void InListBoolNode::pass2Boolean(thread_db* tdbb, CompilerScratch* csb, std::function<void ()> process)
1346+
{
1347+
if (nodFlags & FLAG_INVARIANT)
1348+
csb->csb_invariants.push(&impureOffset);
1349+
1350+
process();
1351+
1352+
if (const auto keyNode = nodeAs<RecordKeyNode>(arg))
1353+
{
1354+
if (keyNode->aggregate)
1355+
ERR_post(Arg::Gds(isc_bad_dbkey));
1356+
}
1357+
1358+
dsc descriptor_a, descriptor_b;
1359+
arg->getDesc(tdbb, csb, &descriptor_a);
1360+
list->getDesc(tdbb, csb, &descriptor_b);
1361+
1362+
if (DTYPE_IS_DATE(descriptor_a.dsc_dtype))
1363+
arg->nodFlags |= FLAG_DATE;
1364+
else if (DTYPE_IS_DATE(descriptor_b.dsc_dtype))
1365+
{
1366+
for (auto item : list->items)
1367+
item->nodFlags |= FLAG_DATE;
1368+
}
1369+
1370+
if (nodFlags & FLAG_INVARIANT)
1371+
{
1372+
impureOffset = csb->allocImpure<impure_value>();
1373+
lookup = FB_NEW_POOL(csb->csb_pool) LookupValueList(csb->csb_pool, list, impureOffset);
1374+
}
1375+
}
1376+
1377+
bool InListBoolNode::execute(thread_db* tdbb, Request* request) const
1378+
{
1379+
if (const auto argDesc = EVL_expr(tdbb, request, arg))
1380+
{
1381+
if (nodFlags & FLAG_INVARIANT)
1382+
{
1383+
const auto res = lookup->find(tdbb, request, arg, argDesc);
1384+
1385+
if (res.isAssigned())
1386+
return res.value;
1387+
1388+
fb_assert(list->items.hasData());
1389+
request->req_flags |= req_null;
1390+
return false;
1391+
}
1392+
1393+
for (const auto value : list->items)
1394+
{
1395+
if (const auto valueDesc = EVL_expr(tdbb, request, value))
1396+
{
1397+
if (!MOV_compare(tdbb, argDesc, valueDesc))
1398+
return true;
1399+
}
1400+
}
1401+
}
1402+
1403+
return false;
1404+
}
1405+
1406+
1407+
//--------------------
1408+
1409+
11461410
static RegisterBoolNode<MissingBoolNode> regMissingBoolNode({blr_missing});
11471411

11481412
MissingBoolNode::MissingBoolNode(MemoryPool& pool, ValueExprNode* aArg, bool aDsqlUnknown)

0 commit comments

Comments
 (0)