Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16971 cart: add RPC origin address #15820

Merged
merged 8 commits into from
Feb 6, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/cart/crt_bulk.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2022 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -183,7 +184,8 @@ crt_bulk_transfer(struct crt_bulk_desc *bulk_desc, crt_bulk_cb_t complete_cb,

rc = crt_hg_bulk_transfer(bulk_desc, complete_cb, arg, opid, false);
if (rc != 0)
DL_ERROR(rc, "crt_hg_bulk_transfer() failed");
DL_ERROR(rc, "%p:%s crt_hg_bulk_transfer() failed", bulk_desc->bd_rpc,
crt_req_origin_addr_get(bulk_desc->bd_rpc));

out:
return rc;
@@ -203,7 +205,8 @@ crt_bulk_bind_transfer(struct crt_bulk_desc *bulk_desc,

rc = crt_hg_bulk_transfer(bulk_desc, complete_cb, arg, opid, true);
if (rc != 0)
D_ERROR("crt_hg_bulk_transfer() failed, rc: %d.\n", rc);
DL_ERROR(rc, "%p:%s crt_hg_bulk_transfer() failed.", bulk_desc->bd_rpc,
crt_req_origin_addr_get(bulk_desc->bd_rpc));

out:
return rc;
42 changes: 40 additions & 2 deletions src/cart/crt_hg.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -1454,8 +1455,8 @@ crt_hg_reply_send_cb(const struct hg_cb_info *hg_cbinfo)
* see CART-146 for details
*/
if (hg_ret != HG_SUCCESS)
D_WARN("hg_cbinfo->ret: " DF_HG_RC ", opc: %#x.\n",
DP_HG_RC(hg_ret), opc);
D_WARN("hg_cbinfo->ret: " DF_HG_RC ", opc: %#x from %s.\n", DP_HG_RC(hg_ret), opc,
crt_rpc_priv_get_origin_addr(rpc_priv));

/* corresponding to the crt_req_addref in crt_hg_reply_send */
RPC_DECREF(rpc_priv);
@@ -1867,3 +1868,40 @@ crt_hg_bulk_transfer(struct crt_bulk_desc *bulk_desc, crt_bulk_cb_t complete_cb,
out:
return rc;
}

char *
crt_rpc_priv_get_origin_addr(struct crt_rpc_priv *rpc_priv)
{
const struct hg_info *hg_info;
char addr[48];
hg_size_t addr_size = 48;
int rc;

if (rpc_priv->crp_orig_uri != NULL)
return rpc_priv->crp_orig_uri;

hg_info = HG_Get_info(rpc_priv->crp_hg_hdl);
if (hg_info == NULL)
return "NOINFO";

rc = HG_Addr_to_string(hg_info->hg_class, addr, (hg_size_t *)&addr_size, hg_info->addr);
if (rc != 0)
return "NONE";

D_ALLOC(rpc_priv->crp_orig_uri, addr_size);
if (rpc_priv->crp_orig_uri == NULL)
return "NOMEM";

memcpy(rpc_priv->crp_orig_uri, addr, addr_size);

return rpc_priv->crp_orig_uri;
}

char *
crt_req_origin_addr_get(crt_rpc_t *rpc_pub)
{
struct crt_rpc_priv *rpc_priv;

rpc_priv = container_of(rpc_pub, struct crt_rpc_priv, crp_pub);
return crt_rpc_priv_get_origin_addr(rpc_priv);
}
19 changes: 11 additions & 8 deletions src/cart/crt_internal.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -36,10 +37,11 @@
break; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_DEBUG(mask, (rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_DEBUG(mask, (rpc), \
"[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/* Log an error with an RPC descriptor */
@@ -49,10 +51,10 @@
char *_opc; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_ERROR((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_ERROR((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/* Log a warning with an RPC descriptor */
@@ -62,10 +64,10 @@
char *_opc; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/* Log an info message with an RPC descriptor */
@@ -75,11 +77,12 @@
char *_opc; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/**
* If \a cond is false, this is equivalent to an RPC_ERROR (i.e., \a mask is
* ignored). If \a cond is true, this is equivalent to an RPC_TRACE.
3 changes: 3 additions & 0 deletions src/cart/crt_internal_fns.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -71,4 +72,6 @@ void
crt_trigger_event_cbs(d_rank_t rank, uint64_t incarnation, enum crt_event_source src,
enum crt_event_type type);

char *
crt_rpc_priv_get_origin_addr(struct crt_rpc_priv *rpc_priv);
#endif /* __CRT_INTERNAL_FNS_H__ */
8 changes: 6 additions & 2 deletions src/cart/crt_rpc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -578,6 +579,9 @@ crt_rpc_priv_free(struct crt_rpc_priv *rpc_priv)

RPC_TRACE(DB_TRACE, rpc_priv, "destroying\n");

if (rpc_priv->crp_orig_uri != NULL)
D_FREE(rpc_priv->crp_orig_uri);

D_FREE(rpc_priv);
}

@@ -1596,8 +1600,8 @@ crt_reply_send(crt_rpc_t *req)
RPC_TRACE(DB_ALL, rpc_priv, "reply_send\n");
rc = crt_hg_reply_send(rpc_priv);
if (rc != 0)
D_ERROR("crt_hg_reply_send failed, rc: %d,opc: %#x.\n",
rc, rpc_priv->crp_pub.cr_opc);
D_ERROR("crt_hg_reply_send failed, rc: %d,opc: %#x.: %s\n", rc,
rpc_priv->crp_pub.cr_opc, crt_req_origin_addr_get(req));
}

rpc_priv->crp_reply_pending = 0;
2 changes: 2 additions & 0 deletions src/cart/crt_rpc.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -154,6 +155,7 @@ struct crt_rpc_priv {
hg_addr_t crp_hg_addr; /* target na address */
struct crt_hg_hdl *crp_hdl_reuse; /* reused hg_hdl */
crt_phy_addr_t crp_tgt_uri; /* target uri address */
char *crp_orig_uri; /* where the RPC comes from */
crt_rpc_t *crp_ul_req; /* uri lookup request */

uint32_t crp_ul_retry; /* uri lookup retry counter */
10 changes: 10 additions & 0 deletions src/include/cart/api.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -2375,6 +2376,15 @@ int crt_context_quota_limit_get(crt_context_t crt_ctx, crt_quota_type_t quota, i
int
crt_req_get_proto_ver(crt_rpc_t *req);

/**
* Get the rpc origin address.
*
* \param[in] rpc pointer to RPC request
* \return the origin address of the RPC
*/
char *
crt_req_origin_addr_get(crt_rpc_t *rpc);

/** @}
*/

48 changes: 27 additions & 21 deletions src/object/srv_obj.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -182,9 +183,8 @@ obj_rw_reply(crt_rpc_t *rpc, int status, uint64_t epoch, bool release_input,
orwo->orw_epoch = max(epoch, orwo->orw_epoch);
}

D_DEBUG(DB_IO, "rpc %p opc %d send reply, pmv %d, epoch "DF_X64
", status %d\n", rpc, opc_get(rpc->cr_opc),
ioc->ioc_map_ver, orwo->orw_epoch, status);
D_DEBUG(DB_IO, "rpc %p opc %d send reply, pmv %d, epoch " DF_X64 ", status %d\n", rpc,
opc_get(rpc->cr_opc), ioc->ioc_map_ver, orwo->orw_epoch, status);

if (!ioc->ioc_lost_reply) {
if (release_input)
@@ -241,11 +241,13 @@ obj_bulk_comp_cb(const struct crt_bulk_cb_info *cb_info)
struct crt_bulk_desc *bulk_desc;
crt_rpc_t *rpc;

if (cb_info->bci_rc != 0)
D_ERROR("bulk transfer failed: %d\n", cb_info->bci_rc);

bulk_desc = cb_info->bci_bulk_desc;
rpc = bulk_desc->bd_rpc;

if (cb_info->bci_rc != 0)
D_ERROR("rpc: %p:%s bulk transfer failed: %d\n", rpc, crt_req_origin_addr_get(rpc),
cb_info->bci_rc);

arg = (struct obj_bulk_args *)cb_info->bci_arg;
/**
* Note: only one thread will access arg.result, so
@@ -5776,17 +5778,19 @@ ds_obj_coll_punch_handler(crt_rpc_t *rpc)
max_ver = version;

DL_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, DLOG_ERR, DB_IO, rc,
"(%s) handled collective punch RPC %p for obj "DF_UOID" on XS %u/%u in "DF_UUID"/"
DF_UUID"/"DF_UUID" with epc "DF_X64", pmv %u/%u, dti "DF_DTI", bulk_tgt_sz %u, "
"(%s) handled collective punch RPC %p:%s for obj " DF_UOID
" on XS %u/%u in " DF_UUID "/" DF_UUID "/" DF_UUID " with epc " DF_X64
", pmv %u/%u, dti " DF_DTI ", bulk_tgt_sz %u, "
"bulk_tgt_nr %u, tgt_nr %u, forward width %u, forward depth %u, flags %x",
(ocpi->ocpi_flags & ORF_LEADER) ? "leader" :
(ocpi->ocpi_tgts.ca_count == 1 ? "non-leader" : "relay-engine"), rpc,
DP_UOID(ocpi->ocpi_oid), dmi->dmi_xs_id, dmi->dmi_tgt_id,
DP_UUID(ocpi->ocpi_po_uuid), DP_UUID(ocpi->ocpi_co_hdl),
DP_UUID(ocpi->ocpi_co_uuid), ocpi->ocpi_epoch,
ocpi->ocpi_map_ver, max_ver, DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_bulk_tgt_sz,
ocpi->ocpi_bulk_tgt_nr, (unsigned int)ocpi->ocpi_tgts.ca_count,
ocpi->ocpi_disp_width, ocpi->ocpi_disp_depth, ocpi->ocpi_flags);
(ocpi->ocpi_flags & ORF_LEADER)
? "leader"
: (ocpi->ocpi_tgts.ca_count == 1 ? "non-leader" : "relay-engine"),
rpc, crt_req_origin_addr_get(rpc), DP_UOID(ocpi->ocpi_oid), dmi->dmi_xs_id,
dmi->dmi_tgt_id, DP_UUID(ocpi->ocpi_po_uuid), DP_UUID(ocpi->ocpi_co_hdl),
DP_UUID(ocpi->ocpi_co_uuid), ocpi->ocpi_epoch, ocpi->ocpi_map_ver, max_ver,
DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_bulk_tgt_sz, ocpi->ocpi_bulk_tgt_nr,
(unsigned int)ocpi->ocpi_tgts.ca_count, ocpi->ocpi_disp_width,
ocpi->ocpi_disp_depth, ocpi->ocpi_flags);

obj_punch_complete(rpc, rc, max_ver);

@@ -5918,11 +5922,13 @@ ds_obj_coll_query_handler(crt_rpc_t *rpc)
rc = dtx_leader_end(dlh, ioc.ioc_coh, rc);

out:
D_DEBUG(DB_IO, "Handled collective query RPC %p %s forwarding for obj "DF_UOID
" on rank %u XS %u/%u epc "DF_X64" pmv %u, with dti "DF_DTI", dct_nr %u, "
"forward width %u, forward depth %u\n: "DF_RC"\n", rpc,
ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid),
myrank, dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver,
D_DEBUG(DB_IO,
"Handled collective query RPC %p:%s %s forwarding for obj " DF_UOID
" on rank %u XS %u/%u epc " DF_X64 " pmv %u, with dti " DF_DTI ", dct_nr %u, "
"forward width %u, forward depth %u\n: " DF_RC "\n",
rpc, crt_req_origin_addr_get(rpc),
ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid), myrank,
dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver,
DP_DTI(&ocqi->ocqi_xid), (unsigned int)ocqi->ocqi_tgts.ca_count,
ocqi->ocqi_disp_width, ocqi->ocqi_disp_depth, DP_RC(rc));