Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16971 cart: add origin address to RPC debug log #15825

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/cart/crt_bulk.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2022 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -183,7 +184,8 @@ crt_bulk_transfer(struct crt_bulk_desc *bulk_desc, crt_bulk_cb_t complete_cb,

rc = crt_hg_bulk_transfer(bulk_desc, complete_cb, arg, opid, false);
if (rc != 0)
DL_ERROR(rc, "crt_hg_bulk_transfer() failed");
DL_ERROR(rc, "%p:%s crt_hg_bulk_transfer() failed", bulk_desc->bd_rpc,
crt_req_origin_addr_get(bulk_desc->bd_rpc));

out:
return rc;
Expand All @@ -203,7 +205,8 @@ crt_bulk_bind_transfer(struct crt_bulk_desc *bulk_desc,

rc = crt_hg_bulk_transfer(bulk_desc, complete_cb, arg, opid, true);
if (rc != 0)
D_ERROR("crt_hg_bulk_transfer() failed, rc: %d.\n", rc);
DL_ERROR(rc, "%p:%s crt_hg_bulk_transfer() failed.", bulk_desc->bd_rpc,
crt_req_origin_addr_get(bulk_desc->bd_rpc));

out:
return rc;
Expand Down
10 changes: 10 additions & 0 deletions src/cart/crt_hg.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -1872,3 +1873,12 @@ crt_hg_bulk_transfer(struct crt_bulk_desc *bulk_desc, crt_bulk_cb_t complete_cb,
out:
return rc;
}

char *
crt_req_origin_addr_get(crt_rpc_t *rpc_pub)
{
struct crt_rpc_priv *rpc_priv;

rpc_priv = container_of(rpc_pub, struct crt_rpc_priv, crp_pub);
return crt_rpc_priv_get_origin_addr(rpc_priv);
}
54 changes: 46 additions & 8 deletions src/cart/crt_internal.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand All @@ -27,6 +28,34 @@
#include "crt_self_test.h"
#include "crt_swim.h"

static inline char *
crt_rpc_priv_get_origin_addr(struct crt_rpc_priv *rpc_priv)
{
const struct hg_info *hg_info;
char addr[48];
hg_size_t addr_size = 48;
int rc;

if (rpc_priv->crp_orig_uri != NULL)
return rpc_priv->crp_orig_uri;

hg_info = HG_Get_info(rpc_priv->crp_hg_hdl);
if (hg_info == NULL)
return "NOINFO";

rc = HG_Addr_to_string(hg_info->hg_class, addr, (hg_size_t *)&addr_size, hg_info->addr);
if (rc != 0)
return "NONE";

D_ALLOC(rpc_priv->crp_orig_uri, addr_size);
if (rpc_priv->crp_orig_uri == NULL)
return "NOMEM";

memcpy(rpc_priv->crp_orig_uri, addr, addr_size);

return rpc_priv->crp_orig_uri;
}

/* A wrapper around D_TRACE_DEBUG that ensures the ptr option is a RPC */
#define RPC_TRACE(mask, rpc, fmt, ...) \
do { \
Expand All @@ -38,13 +67,15 @@
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
if ((rpc)->crp_coll) { \
D_TRACE_DEBUG(mask, (rpc), "[opc=%#x (%s:%s) rpcid=%#lx CORPC] " fmt, \
D_TRACE_DEBUG(mask, (rpc), "[opc=%#x (%s:%s:%s) rpcid=%#lx CORPC] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, ##__VA_ARGS__); \
} else { \
D_TRACE_DEBUG(mask, (rpc), \
"[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
"[opc=%#x (%s:%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, (rpc)->crp_pub.cr_ep.ep_rank, \
(rpc)->crp_pub.cr_ep.ep_tag, ##__VA_ARGS__); \
} \
Expand All @@ -58,12 +89,15 @@
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
if ((rpc)->crp_coll) { \
D_TRACE_ERROR((rpc), "[opc=%#x (%s:%s) rpcid=%#lx CORPC] " fmt, \
D_TRACE_ERROR((rpc), "[opc=%#x (%s:%s:%s) rpcid=%#lx CORPC] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, ##__VA_ARGS__); \
} else { \
D_TRACE_ERROR((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_ERROR((rpc), \
"[opc=%#x (%s:%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, (rpc)->crp_pub.cr_ep.ep_rank, \
(rpc)->crp_pub.cr_ep.ep_tag, ##__VA_ARGS__); \
} \
Expand All @@ -77,12 +111,14 @@
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
if ((rpc)->crp_coll) { \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s) rpcid=%#lx CORPC] " fmt, \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s:%s) rpcid=%#lx CORPC] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, ##__VA_ARGS__); \
} else { \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, (rpc)->crp_pub.cr_ep.ep_rank, \
(rpc)->crp_pub.cr_ep.ep_tag, ##__VA_ARGS__); \
} \
Expand All @@ -96,12 +132,14 @@
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
if ((rpc)->crp_coll) { \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s) rpcid=%#lx CORPC] " fmt, \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s:%s) rpcid=%#lx CORPC] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, ##__VA_ARGS__); \
} else { \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, \
crt_rpc_priv_get_origin_addr((rpc)), \
(rpc)->crp_req_hdr.cch_rpcid, (rpc)->crp_pub.cr_ep.ep_rank, \
(rpc)->crp_pub.cr_ep.ep_tag, ##__VA_ARGS__); \
} \
Expand Down
8 changes: 6 additions & 2 deletions src/cart/crt_rpc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -578,6 +579,9 @@ crt_rpc_priv_free(struct crt_rpc_priv *rpc_priv)

RPC_TRACE(DB_TRACE, rpc_priv, "destroying\n");

if (rpc_priv->crp_orig_uri != NULL)
D_FREE(rpc_priv->crp_orig_uri);

D_FREE(rpc_priv);
}

Expand Down Expand Up @@ -1591,8 +1595,8 @@ crt_reply_send(crt_rpc_t *req)
RPC_TRACE(DB_ALL, rpc_priv, "reply_send\n");
rc = crt_hg_reply_send(rpc_priv);
if (rc != 0)
D_ERROR("crt_hg_reply_send failed, rc: %d,opc: %#x.\n",
rc, rpc_priv->crp_pub.cr_opc);
D_ERROR("crt_hg_reply_send failed, rc: %d,opc: %#x.: %s\n", rc,
rpc_priv->crp_pub.cr_opc, crt_req_origin_addr_get(req));
}

rpc_priv->crp_reply_pending = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/cart/crt_rpc.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -154,6 +155,7 @@ struct crt_rpc_priv {
hg_addr_t crp_hg_addr; /* target na address */
struct crt_hg_hdl *crp_hdl_reuse; /* reused hg_hdl */
char *crp_tgt_uri; /* target uri address */
char *crp_orig_uri; /* where the RPC comes from */
crt_rpc_t *crp_ul_req; /* uri lookup request */

uint32_t crp_ul_retry; /* uri lookup retry counter */
Expand Down
10 changes: 10 additions & 0 deletions src/include/cart/api.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -2345,6 +2346,15 @@ int crt_context_quota_limit_get(crt_context_t crt_ctx, crt_quota_type_t quota, i
int
crt_req_get_proto_ver(crt_rpc_t *req);

/**
* Get the rpc origin address.
*
* \param[in] rpc pointer to RPC request
* \return the origin address of the RPC
*/
char *
crt_req_origin_addr_get(crt_rpc_t *rpc);

/** @}
*/

Expand Down
48 changes: 27 additions & 21 deletions src/object/srv_obj.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -183,9 +184,8 @@ obj_rw_reply(crt_rpc_t *rpc, int status, uint64_t epoch, bool release_input,
orwo->orw_epoch = max(epoch, orwo->orw_epoch);
}

D_DEBUG(DB_IO, "rpc %p opc %d send reply, pmv %d, epoch "DF_X64
", status %d\n", rpc, opc_get(rpc->cr_opc),
ioc->ioc_map_ver, orwo->orw_epoch, status);
D_DEBUG(DB_IO, "rpc %p opc %d send reply, pmv %d, epoch " DF_X64 ", status %d\n", rpc,
opc_get(rpc->cr_opc), ioc->ioc_map_ver, orwo->orw_epoch, status);

if (!ioc->ioc_lost_reply) {
if (release_input)
Expand Down Expand Up @@ -242,11 +242,13 @@ obj_bulk_comp_cb(const struct crt_bulk_cb_info *cb_info)
struct crt_bulk_desc *bulk_desc;
crt_rpc_t *rpc;

if (cb_info->bci_rc != 0)
D_ERROR("bulk transfer failed: %d\n", cb_info->bci_rc);

bulk_desc = cb_info->bci_bulk_desc;
rpc = bulk_desc->bd_rpc;

if (cb_info->bci_rc != 0)
D_ERROR("rpc: %p:%s bulk transfer failed: %d\n", rpc, crt_req_origin_addr_get(rpc),
cb_info->bci_rc);

arg = (struct obj_bulk_args *)cb_info->bci_arg;
/**
* Note: only one thread will access arg.result, so
Expand Down Expand Up @@ -5831,17 +5833,19 @@ ds_obj_coll_punch_handler(crt_rpc_t *rpc)
max_ver = version;

DL_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, DLOG_ERR, DB_IO, rc,
"(%s) handled collective punch RPC %p for obj "DF_UOID" on XS %u/%u in "DF_UUID"/"
DF_UUID"/"DF_UUID" with epc "DF_X64", pmv %u/%u, dti "DF_DTI", bulk_tgt_sz %u, "
"(%s) handled collective punch RPC %p:%s for obj " DF_UOID
" on XS %u/%u in " DF_UUID "/" DF_UUID "/" DF_UUID " with epc " DF_X64
", pmv %u/%u, dti " DF_DTI ", bulk_tgt_sz %u, "
"bulk_tgt_nr %u, tgt_nr %u, forward width %u, forward depth %u, flags %x",
(ocpi->ocpi_flags & ORF_LEADER) ? "leader" :
(ocpi->ocpi_tgts.ca_count == 1 ? "non-leader" : "relay-engine"), rpc,
DP_UOID(ocpi->ocpi_oid), dmi->dmi_xs_id, dmi->dmi_tgt_id,
DP_UUID(ocpi->ocpi_po_uuid), DP_UUID(ocpi->ocpi_co_hdl),
DP_UUID(ocpi->ocpi_co_uuid), ocpi->ocpi_epoch,
ocpi->ocpi_map_ver, max_ver, DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_bulk_tgt_sz,
ocpi->ocpi_bulk_tgt_nr, (unsigned int)ocpi->ocpi_tgts.ca_count,
ocpi->ocpi_disp_width, ocpi->ocpi_disp_depth, ocpi->ocpi_flags);
(ocpi->ocpi_flags & ORF_LEADER)
? "leader"
: (ocpi->ocpi_tgts.ca_count == 1 ? "non-leader" : "relay-engine"),
rpc, crt_req_origin_addr_get(rpc), DP_UOID(ocpi->ocpi_oid), dmi->dmi_xs_id,
dmi->dmi_tgt_id, DP_UUID(ocpi->ocpi_po_uuid), DP_UUID(ocpi->ocpi_co_hdl),
DP_UUID(ocpi->ocpi_co_uuid), ocpi->ocpi_epoch, ocpi->ocpi_map_ver, max_ver,
DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_bulk_tgt_sz, ocpi->ocpi_bulk_tgt_nr,
(unsigned int)ocpi->ocpi_tgts.ca_count, ocpi->ocpi_disp_width,
ocpi->ocpi_disp_depth, ocpi->ocpi_flags);

obj_punch_complete(rpc, rc, max_ver);

Expand Down Expand Up @@ -5973,11 +5977,13 @@ ds_obj_coll_query_handler(crt_rpc_t *rpc)
rc = dtx_leader_end(dlh, ioc.ioc_coh, rc);

out:
D_DEBUG(DB_IO, "Handled collective query RPC %p %s forwarding for obj "DF_UOID
" on rank %u XS %u/%u epc "DF_X64" pmv %u, with dti "DF_DTI", dct_nr %u, "
"forward width %u, forward depth %u\n: "DF_RC"\n", rpc,
ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid),
myrank, dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver,
D_DEBUG(DB_IO,
"Handled collective query RPC %p:%s %s forwarding for obj " DF_UOID
" on rank %u XS %u/%u epc " DF_X64 " pmv %u, with dti " DF_DTI ", dct_nr %u, "
"forward width %u, forward depth %u\n: " DF_RC "\n",
rpc, crt_req_origin_addr_get(rpc),
ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid), myrank,
dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver,
DP_DTI(&ocqi->ocqi_xid), (unsigned int)ocqi->ocqi_tgts.ca_count,
ocqi->ocqi_disp_width, ocqi->ocqi_disp_depth, DP_RC(rc));

Expand Down