Skip to content

Commit

Permalink
add adaptive CUDA stack limit enhancement
Browse files Browse the repository at this point in the history
related to issue #812
  • Loading branch information
kaigai committed Nov 21, 2024
1 parent 8ed2aba commit 3c68144
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 19 deletions.
90 changes: 88 additions & 2 deletions src/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -1637,6 +1637,9 @@ __try_inject_temporary_expression(codegen_context *context,
*
* ----------------------------------------------------------------
*/
#define XPUCODE_STACK_USAGE_NORMAL 128 /* stack usage by normal function calls */
#define XPUCODE_STACK_USAGE_RECURSIVE 2048 /* stack usage by recursive function calls */

#define __Elog(fmt,...) \
do { \
ereport(context->elevel, \
Expand Down Expand Up @@ -1845,6 +1848,8 @@ __codegen_func_expression(codegen_context *context,
devtype_info *dtype;
kern_expression kexp;
int pos = -1;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
ListCell *lc;

dfunc = pgstrom_devfunc_lookup(func_oid, func_args, func_collid);
Expand All @@ -1869,7 +1874,12 @@ __codegen_func_expression(codegen_context *context,

if (codegen_expression_walker(context, buf, curr_depth, arg) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
if ((dfunc->func_flags & DEVFUNC__HAS_RECURSION) != 0)
context->stack_usage += XPUCODE_STACK_USAGE_RECURSIVE;
if (buf)
__appendKernExpMagicAndLength(buf, pos);
return 0;
Expand Down Expand Up @@ -1931,8 +1941,10 @@ codegen_bool_expression(codegen_context *context,
BoolExpr *b)
{
kern_expression kexp;
int pos = -1;
ListCell *lc;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
int pos = -1;
ListCell *lc;

memset(&kexp, 0, sizeof(kexp));
switch (b->boolop)
Expand Down Expand Up @@ -1969,7 +1981,10 @@ codegen_bool_expression(codegen_context *context,

if (codegen_expression_walker(context, buf, curr_depth, arg) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
if (buf)
__appendKernExpMagicAndLength(buf, pos);
return 0;
Expand Down Expand Up @@ -2178,6 +2193,8 @@ codegen_coerceviaio_expression(codegen_context *context,
dtype->type_namespace == PG_CATALOG_NAMESPACE)))
{
ListCell *lc;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
int pos = -1;

kexp.opcode = jsonref_catalog[i].opcode;
Expand All @@ -2193,7 +2210,10 @@ codegen_coerceviaio_expression(codegen_context *context,

if (codegen_expression_walker(context, buf, curr_depth, arg) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
if (buf)
__appendKernExpMagicAndLength(buf, pos);
return 0;
Expand All @@ -2214,6 +2234,8 @@ codegen_coalesce_expression(codegen_context *context,
devtype_info *dtype, *__dtype;
kern_expression kexp;
int pos = -1;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
ListCell *lc;

dtype = pgstrom_devtype_lookup(cl->coalescetype);
Expand Down Expand Up @@ -2241,7 +2263,10 @@ codegen_coalesce_expression(codegen_context *context,
nodeToString(cl));
if (codegen_expression_walker(context, buf, curr_depth, expr) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
if (buf)
__appendKernExpMagicAndLength(buf, pos);
return 0;
Expand All @@ -2258,6 +2283,8 @@ codegen_minmax_expression(codegen_context *context,
devtype_info *dtype, *__dtype;
kern_expression kexp;
int pos = -1;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
ListCell *lc;

dtype = pgstrom_devtype_lookup(mm->minmaxtype);
Expand Down Expand Up @@ -2290,7 +2317,10 @@ codegen_minmax_expression(codegen_context *context,
nodeToString(mm));
if (codegen_expression_walker(context, buf, curr_depth, expr) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
if (buf)
__appendKernExpMagicAndLength(buf, pos);
return 0;
Expand Down Expand Up @@ -2376,6 +2406,8 @@ codegen_casewhen_expression(codegen_context *context,
kern_expression kexp;
devtype_info *dtype;
ListCell *lc;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
int pos = -1;
int saved_casetest_key_slot_id = codegen_casetest_key_slot_id;

Expand Down Expand Up @@ -2413,6 +2445,8 @@ codegen_casewhen_expression(codegen_context *context,
if (buf)
kexp.args_offset = (__appendZeroStringInfo(buf, 0) - pos);

stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
codegen_casetest_key_slot_id = kvdef->kv_slot_id;
}

Expand All @@ -2428,10 +2462,15 @@ codegen_casewhen_expression(codegen_context *context,
curr_depth,
casewhen->expr) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;

if (codegen_expression_walker(context, buf,
curr_depth,
casewhen->result) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
/* ELSE <expression> */
if (caseexpr->defresult)
Expand All @@ -2443,7 +2482,10 @@ codegen_casewhen_expression(codegen_context *context,
curr_depth,
caseexpr->defresult) < 0)
return -1;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
}
PG_CATCH();
{
Expand Down Expand Up @@ -2615,6 +2657,7 @@ codegen_expression_walker(codegen_context *context,
if (!expr)
return 0;

context->stack_usage += XPUCODE_STACK_USAGE_NORMAL;
switch (nodeTag(expr))
{
case T_Const:
Expand Down Expand Up @@ -2912,6 +2955,7 @@ codegen_build_scan_quals(codegen_context *context, List *dev_quals)
StringInfoData buf;
bytea *xpucode = NULL;
Expr *expr;
uint32_t stack_usage_saved = context->stack_usage;
int saved_depth = context->curr_depth;

Assert(context->elevel >= ERROR);
Expand All @@ -2924,12 +2968,14 @@ codegen_build_scan_quals(codegen_context *context, List *dev_quals)

initStringInfo(&buf);
context->curr_depth = 0;
context->stack_usage = 0;
if (codegen_expression_walker(context, &buf, 0, expr) == 0)
{
xpucode = palloc(VARHDRSZ+buf.len);
memcpy(xpucode->vl_dat, buf.data, buf.len);
SET_VARSIZE(xpucode, VARHDRSZ+buf.len);
}
context->stack_usage = Max(stack_usage_saved, context->stack_usage);
pfree(buf.data);
context->curr_depth = saved_depth;

Expand Down Expand Up @@ -2976,6 +3022,8 @@ __codegen_build_joinquals(codegen_context *context,
StringInfoData buf;
kern_expression kexp;
ListCell *lc;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
uint32_t kexp_flags__saved;

if (join_quals == NIL && other_quals == NIL)
Expand All @@ -2998,6 +3046,8 @@ __codegen_build_joinquals(codegen_context *context,
elog(ERROR, "Bub? JOIN quals must be boolean");
if (codegen_expression_walker(context, &buf, curr_depth, qual) < 0)
return NULL;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}

kexp_flags__saved = context->kexp_flags;
Expand All @@ -3010,7 +3060,10 @@ __codegen_build_joinquals(codegen_context *context,
elog(ERROR, "Bub? JOIN quals must be boolean");
if (codegen_expression_walker(context, &buf, curr_depth, qual) < 0)
return NULL;
stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
context->kexp_flags = kexp_flags__saved;
__appendKernExpMagicAndLength(&buf, 0);

Expand All @@ -3030,6 +3083,8 @@ codegen_build_packed_joinquals(codegen_context *context,
int depth;
int nrels;
size_t sz;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
ListCell *lc1, *lc2;
char *result = NULL;

Expand Down Expand Up @@ -3066,7 +3121,11 @@ codegen_build_packed_joinquals(codegen_context *context,
pfree(karg);
}
depth++;

stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;
Assert(depth == nrels+1);

if (kexp->nr_args > 0)
Expand All @@ -3092,6 +3151,8 @@ __codegen_build_hash_value(codegen_context *context,
kern_expression *kexp;
StringInfoData buf;
size_t sz = MAXALIGN(SizeOfKernExpr(0));
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
ListCell *lc;

if (hash_keys == NIL)
Expand All @@ -3112,7 +3173,12 @@ __codegen_build_hash_value(codegen_context *context,
Expr *expr = lfirst(lc);

codegen_expression_walker(context, &buf, curr_depth, expr);

stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;

memcpy(buf.data, kexp, sz);
__appendKernExpMagicAndLength(&buf, 0);

Expand All @@ -3128,6 +3194,7 @@ codegen_build_packed_hashkeys(codegen_context *context,
int depth;
int nrels;
size_t sz;
uint32_t stack_usage_max = context->stack_usage;
ListCell *lc;
char *result = NULL;

Expand All @@ -3150,15 +3217,18 @@ codegen_build_packed_hashkeys(codegen_context *context,
kern_expression *karg;
List *hash_keys = lfirst(lc);

context->stack_usage = 0;
karg = __codegen_build_hash_value(context, hash_keys, depth);
if (karg)
{
kexp->u.pack.offset[depth]
= __appendBinaryStringInfo(&buf, karg, karg->len);
kexp->nr_args++;
}
stack_usage_max = Max(stack_usage_max, context->stack_usage);
depth++;
}
context->stack_usage = stack_usage_max;
Assert(depth == nrels+1);

if (kexp->nr_args > 0)
Expand Down Expand Up @@ -3301,6 +3371,8 @@ codegen_build_packed_gistevals(codegen_context *context,
StringInfoData buf;
kern_expression *kexp;
size_t head_sz;
uint32_t stack_usage_saved = context->stack_usage;
uint32_t stack_usage_max = stack_usage_saved;
bytea *result = NULL;

head_sz = MAXALIGN(offsetof(kern_expression,
Expand Down Expand Up @@ -3357,7 +3429,11 @@ codegen_build_packed_gistevals(codegen_context *context,
gist_func_arg);
kexp->u.pack.offset[i+1] = off;
kexp->nr_args++;

stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = stack_usage_saved;
}
context->stack_usage = stack_usage_max;

if (buf.len > head_sz)
{
Expand Down Expand Up @@ -3385,6 +3461,7 @@ codegen_build_projection(codegen_context *context,
bool meet_resjunk = false;
int nattrs = 0;
int sz;
uint32_t stack_usage_max = context->stack_usage;
ListCell *lc;

/* count nattrs */
Expand Down Expand Up @@ -3420,7 +3497,12 @@ codegen_build_projection(codegen_context *context,
&buf,
tle->expr);
kexp->u.proj.slot_id[kexp->u.proj.nattrs++] = kvdef->kv_slot_id;

stack_usage_max = Max(stack_usage_max, context->stack_usage);
context->stack_usage = 0;
}
context->stack_usage = stack_usage_max;

/* hash-value (optional; for pinned inner buffer) */
if (proj_hash != NIL)
{
Expand Down Expand Up @@ -3827,9 +3909,11 @@ codegen_build_groupby_actions(codegen_context *context,
groupby_keys_final = codegen_build_groupby_keyload(context, pp_info);
if (groupby_keys_input != NIL &&
groupby_keys_final != NIL)
{
codegen_build_groupby_keycomp(context, pp_info,
groupby_keys_input,
groupby_keys_final);
}
__codegen_build_groupby_actions(context, pp_info);
}

Expand Down Expand Up @@ -3903,6 +3987,8 @@ estimate_cuda_stack_size(codegen_context *context)
stack_sz += TYPEALIGN(CUDA_ALLOCA_ALIGN,
kvdef->kv_xdatum_sizeof);
}
/* other expressions */
stack_sz += context->stack_usage;
return stack_sz;
#undef CUDA_ALLOCA_ALIGN
}
Expand Down
1 change: 1 addition & 0 deletions src/pg_strom.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ typedef struct
uint32_t extra_bufsz;
uint32_t device_cost;
uint32_t kexp_flags;
uint32_t stack_usage;
List *kvars_deflist;
List *tlist_dev;
int kvecs_ndims;
Expand Down
1 change: 1 addition & 0 deletions src/xpu_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -2069,6 +2069,7 @@ typedef struct
* no locale configuration */
#define DEVKERN__SESSION_TIMEZONE 0x00000200U /* Device function needs session
* timezone */
#define DEVFUNC__HAS_RECURSION 0x00000400U /* Device function has recursive calls */
#define DEVTYPE__HAS_COMPARE 0x00000800U /* Device type has compare handler */
#define DEVTASK__PINNED_HASH_RESULTS 0x00001000U/* Pinned results in HASH format */
#define DEVTASK__PINNED_ROW_RESULTS 0x00002000U /* Pinned results in ROW format */
Expand Down
Loading

0 comments on commit 3c68144

Please sign in to comment.