From owner-p4-projects@FreeBSD.ORG Tue Mar 4 03:37:46 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 76CA510656A6; Tue, 4 Mar 2008 03:37:46 +0000 (UTC) Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 3347B1065692 for ; Tue, 4 Mar 2008 03:37:46 +0000 (UTC) (envelope-from swise@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 316418FC2D for ; Tue, 4 Mar 2008 03:37:46 +0000 (UTC) (envelope-from swise@FreeBSD.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id m243bkEh044306 for ; Tue, 4 Mar 2008 03:37:46 GMT (envelope-from swise@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id m243bkCs044304 for perforce@freebsd.org; Tue, 4 Mar 2008 03:37:46 GMT (envelope-from swise@FreeBSD.org) Date: Tue, 4 Mar 2008 03:37:46 GMT Message-Id: <200803040337.m243bkCs044304@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to swise@FreeBSD.org using -f From: Steve Wise To: Perforce Change Reviews Cc: Subject: PERFORCE change 136780 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 04 Mar 2008 03:37:47 -0000 http://perforce.freebsd.org/chv.cgi?CH=136780 Change 136780 by swise@swise:vic10:iwarp on 2008/03/04 03:37:25 Added wlat, rlat, and bw tests to krping. Affected files ... .. //depot/projects/iwarp/sys/contrib/rdma/krping/krping.c#11 edit .. //depot/projects/iwarp/sys/contrib/rdma/krping/krping.h#5 edit Differences ... ==== //depot/projects/iwarp/sys/contrib/rdma/krping/krping.c#11 (text+ko) ==== @@ -76,6 +76,10 @@ {"client", OPT_NOPARAM, 'c'}, {"dmamr", OPT_NOPARAM, 'D'}, {"debug", OPT_NOPARAM, 'd'}, + {"wlat", OPT_NOPARAM, 'l'}, + {"rlat", OPT_NOPARAM, 'L'}, + {"bw", OPT_NOPARAM, 'B'}, + {"tx-depth", OPT_INT, 't'}, {NULL, 0, 0} }; @@ -102,8 +106,8 @@ /* * Default max buffer size for IO... */ -#define RPING_BUFSIZE 64*1024 -#define RPING_SQ_DEPTH 16 +#define RPING_BUFSIZE 128*1024 +#define RPING_SQ_DEPTH 32 /* lifted from netinet/libalias/alias_proxy.c */ @@ -346,7 +350,8 @@ mtx_unlock_spin(&cb->lock); return; } - ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); + if (!cb->wlat && !cb->rlat && !cb->bw) + ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { if (wc.status) { DEBUG_LOG(PFX "cq completion failed status %d\n", @@ -381,7 +386,10 @@ DEBUG_LOG(PFX "recv completion\n"); cb->stats.recv_bytes += sizeof(cb->recv_buf); cb->stats.recv_msgs++; - ret = cb->server ? server_recv(cb, &wc) : + if (cb->wlat || cb->rlat || cb->bw) + ret = server_recv(cb, &wc); + else + ret = cb->server ? server_recv(cb, &wc) : client_recv(cb, &wc); if (ret) { DEBUG_LOG(PFX "recv wc error: %d\n", ret); @@ -431,10 +439,12 @@ return ret; } - krping_wait(cb, CONNECTED); - if (cb->state == ERROR) { - DEBUG_LOG(PFX "wait for CONNECTED state %d\n", cb->state); - return -1; + if (!cb->wlat && !cb->rlat && !cb->bw) { + krping_wait(cb, CONNECTED); + if (cb->state == ERROR) { + DEBUG_LOG(PFX "wait for CONNECTED state %d\n", cb->state); + return -1; + } } return 0; } @@ -473,7 +483,7 @@ cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; cb->rdma_sq_wr.num_sge = 1; - if (!cb->server) { + if (!cb->server || cb->wlat || cb->rlat || cb->bw) { cb->start_addr = virt_to_phys(cb->start_buf); } } @@ -546,7 +556,7 @@ } } - if (!cb->server) { + if (!cb->server || cb->wlat || cb->rlat || cb->bw) { cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, (1L<<22), 4096, 1024*1024); if (!cb->start_buf) { @@ -555,12 +565,15 @@ goto err2; } if (!cb->use_dmamr) { + unsigned flags = IB_ACCESS_REMOTE_READ; + if (cb->wlat || cb->rlat || cb->bw) + flags |= IB_ACCESS_REMOTE_WRITE; buf.addr = virt_to_phys(cb->start_buf); buf.size = cb->size; iovbase = virt_to_phys(cb->start_buf); cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, - IB_ACCESS_REMOTE_READ, + flags, &iovbase); if (IS_ERR(cb->start_mr)) { @@ -607,7 +620,7 @@ cb->size, DMA_BIDIRECTIONAL); #endif contigfree(cb->rdma_buf, cb->size, M_DEVBUF); - if (!cb->server) { + if (!cb->server || cb->wlat || cb->rlat || cb->bw) { dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, start_mapping), cb->size, DMA_BIDIRECTIONAL); @@ -630,7 +643,7 @@ int ret; memset(&init_attr, 0, sizeof(init_attr)); - init_attr.cap.max_send_wr = RPING_SQ_DEPTH; + init_attr.cap.max_send_wr = cb->txdepth; init_attr.cap.max_recv_wr = 2; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; @@ -669,7 +682,7 @@ DEBUG_LOG(PFX "created pd %p\n", cb->pd); cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, - cb, RPING_SQ_DEPTH * 2, 0); + cb, cb->txdepth * 2, 0); if (IS_ERR(cb->cq)) { DEBUG_LOG(PFX "ib_create_cq failed\n"); ret = PTR_ERR(cb->cq); @@ -677,10 +690,12 @@ } DEBUG_LOG(PFX "created cq %p\n", cb->cq); - ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); - if (ret) { - DEBUG_LOG(PFX "ib_create_cq failed\n"); - goto err2; + if (!cb->wlat && !cb->rlat && !cb->bw) { + ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); + if (ret) { + DEBUG_LOG(PFX "ib_create_cq failed\n"); + goto err2; + } } ret = krping_create_qp(cb); @@ -809,6 +824,451 @@ } } +static void rlat_test(struct krping_cb *cb) +{ + int scnt; + int iters = cb->count; + struct timeval start_tv, stop_tv; + int ret; + struct ib_wc wc; + struct ib_send_wr *bad_wr; + int ne; + + scnt = 0; + cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; + cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; + cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; + cb->rdma_sq_wr.sg_list->length = cb->size; + + microtime(&start_tv); + while (scnt < iters) { + + ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); + if (ret) { + printf(PFX + "Couldn't post send: ret=%d scnt %d\n", + ret, scnt); + return; + } + + do { + ne = ib_poll_cq(cb->cq, 1, &wc); + if (cb->state == ERROR) { + printf(PFX + "state == ERROR...bailing scnt %d\n", scnt); + return; + } + } while (ne == 0); + + if (ne < 0) { + printf(PFX "poll CQ failed %d\n", ne); + return; + } + if (wc.status != IB_WC_SUCCESS) { + printf(PFX "Completion wth error at %s:\n", + cb->server ? "server" : "client"); + printf(PFX "Failed status %d: wr_id %d\n", + wc.status, (int) wc.wr_id); + return; + } + ++scnt; + } + microtime(&stop_tv); + + if (stop_tv.tv_usec < start_tv.tv_usec) { + stop_tv.tv_usec += 1000000; + stop_tv.tv_sec -= 1; + } + + printf(PFX "delta sec %lu delta usec %lu iter %d size %d\n", + stop_tv.tv_sec - start_tv.tv_sec, + stop_tv.tv_usec - start_tv.tv_usec, + scnt, cb->size); +} + +static int alloc_cycle_mem(int cycle_iters, + cycles_t **post_cycles_start, + cycles_t **post_cycles_stop, + cycles_t **poll_cycles_start, + cycles_t **poll_cycles_stop, + cycles_t **last_poll_cycles_start) +{ + *post_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); + if (!*post_cycles_start) { + goto fail1; + } + *post_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); + if (!*post_cycles_stop) { + goto fail2; + } + *poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); + if (!*poll_cycles_start) { + goto fail3; + } + *poll_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); + if (!*poll_cycles_stop) { + goto fail4; + } + *last_poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); + if (!*last_poll_cycles_start) { + goto fail5; + } + return 0; +fail5: + free(*poll_cycles_stop, M_DEVBUF); +fail4: + free(*poll_cycles_start, M_DEVBUF); +fail3: + free(*post_cycles_stop, M_DEVBUF); +fail2: + free(*post_cycles_start, M_DEVBUF); +fail1: + printf(PFX "%s malloc failed\n", __FUNCTION__); + return ENOMEM; +} + +static void free_cycle_mem(cycles_t *post_cycles_start, + cycles_t *post_cycles_stop, + cycles_t *poll_cycles_start, + cycles_t *poll_cycles_stop, + cycles_t *last_poll_cycles_start) +{ + free(last_poll_cycles_start, M_DEVBUF); + free(poll_cycles_stop, M_DEVBUF); + free(poll_cycles_start, M_DEVBUF); + free(post_cycles_stop, M_DEVBUF); + free(post_cycles_start, M_DEVBUF); +} + +static void wlat_test(struct krping_cb *cb) +{ + int ccnt, scnt, rcnt; + int iters=cb->count; + volatile char *poll_buf = (char *) cb->start_buf; + char *buf = (char *)cb->rdma_buf; + ccnt = 0; + scnt = 0; + rcnt = 0; + struct timeval start_tv, stop_tv; + cycles_t *post_cycles_start, *post_cycles_stop; + cycles_t *poll_cycles_start, *poll_cycles_stop; + cycles_t *last_poll_cycles_start; + cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; + int i; + int cycle_iters = 1000; + int err; + + err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop, + &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start); + + if (err) { + printf(PFX "%s malloc failed\n", __FUNCTION__); + return; + } + + cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; + cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; + cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; + cb->rdma_sq_wr.sg_list->length = cb->size; + + if (cycle_iters > iters) + cycle_iters = iters; + microtime(&start_tv); + while (scnt < iters || ccnt < iters || rcnt < iters) { + + /* Wait till buffer changes. */ + if (rcnt < iters && !(scnt < 1 && !cb->server)) { + ++rcnt; + while (*poll_buf != (char)rcnt) { + if (cb->state == ERROR) { + printf(PFX "state = ERROR, bailing\n"); + return; + } + } + } + + if (scnt < iters) { + struct ib_send_wr *bad_wr; + + *buf = (char)scnt+1; + if (scnt < cycle_iters) + post_cycles_start[scnt] = get_cycles(); + if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { + printf(PFX "Couldn't post send: scnt=%d\n", + scnt); + return; + } + if (scnt < cycle_iters) + post_cycles_stop[scnt] = get_cycles(); + scnt++; + } + + if (ccnt < iters) { + struct ib_wc wc; + int ne; + + if (ccnt < cycle_iters) + poll_cycles_start[ccnt] = get_cycles(); + do { + if (ccnt < cycle_iters) + last_poll_cycles_start[ccnt] = get_cycles(); + ne = ib_poll_cq(cb->cq, 1, &wc); + } while (ne == 0); + if (ccnt < cycle_iters) + poll_cycles_stop[ccnt] = get_cycles(); + ++ccnt; + + if (ne < 0) { + printf(PFX "poll CQ failed %d\n", ne); + return; + } + if (wc.status != IB_WC_SUCCESS) { + printf(PFX "Completion wth error at %s:\n", + cb->server ? "server" : "client"); + printf(PFX "Failed status %d: wr_id %d\n", + wc.status, (int) wc.wr_id); + printf(PFX "scnt=%d, rcnt=%d, ccnt=%d\n", + scnt, rcnt, ccnt); + return; + } + } + } + microtime(&stop_tv); + + if (stop_tv.tv_usec < start_tv.tv_usec) { + stop_tv.tv_usec += 1000000; + stop_tv.tv_sec -= 1; + } + + for (i=0; i < cycle_iters; i++) { + sum_post += post_cycles_stop[i] - post_cycles_start[i]; + sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; + sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i]; + } + + printf(PFX "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n", + stop_tv.tv_sec - start_tv.tv_sec, + stop_tv.tv_usec - start_tv.tv_usec, + scnt, cb->size, cycle_iters, + (unsigned long long)sum_post, (unsigned long long)sum_poll, + (unsigned long long)sum_last_poll); + + free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, + poll_cycles_stop, last_poll_cycles_start); +} + +static void bw_test(struct krping_cb *cb) +{ + int ccnt, scnt, rcnt; + int iters=cb->count; + ccnt = 0; + scnt = 0; + rcnt = 0; + struct timeval start_tv, stop_tv; + cycles_t *post_cycles_start, *post_cycles_stop; + cycles_t *poll_cycles_start, *poll_cycles_stop; + cycles_t *last_poll_cycles_start; + cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; + int i; + int cycle_iters = 1000; + int err; + + err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop, + &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start); + + if (err) { + printf(PFX "%s kmalloc failed\n", __FUNCTION__); + return; + } + + cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; + cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; + cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; + cb->rdma_sq_wr.sg_list->length = cb->size; + + if (cycle_iters > iters) + cycle_iters = iters; + microtime(&start_tv); + while (scnt < iters || ccnt < iters) { + + while (scnt < iters && scnt - ccnt < cb->txdepth) { + struct ib_send_wr *bad_wr; + + if (scnt < cycle_iters) + post_cycles_start[scnt] = get_cycles(); + if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { + printf(PFX "Couldn't post send: scnt=%d\n", + scnt); + return; + } + if (scnt < cycle_iters) + post_cycles_stop[scnt] = get_cycles(); + ++scnt; + } + + if (ccnt < iters) { + int ne; + struct ib_wc wc; + + if (ccnt < cycle_iters) + poll_cycles_start[ccnt] = get_cycles(); + do { + if (ccnt < cycle_iters) + last_poll_cycles_start[ccnt] = get_cycles(); + ne = ib_poll_cq(cb->cq, 1, &wc); + } while (ne == 0); + if (ccnt < cycle_iters) + poll_cycles_stop[ccnt] = get_cycles(); + ccnt += 1; + + if (ne < 0) { + printf(PFX "poll CQ failed %d\n", ne); + return; + } + if (wc.status != IB_WC_SUCCESS) { + printf(PFX "Completion wth error at %s:\n", + cb->server ? "server" : "client"); + printf(PFX "Failed status %d: wr_id %d\n", + wc.status, (int) wc.wr_id); + return; + } + } + } + microtime(&stop_tv); + + if (stop_tv.tv_usec < start_tv.tv_usec) { + stop_tv.tv_usec += 1000000; + stop_tv.tv_sec -= 1; + } + + for (i=0; i < cycle_iters; i++) { + sum_post += post_cycles_stop[i] - post_cycles_start[i]; + sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; + sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i]; + } + + printf(PFX "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n", + stop_tv.tv_sec - start_tv.tv_sec, + stop_tv.tv_usec - start_tv.tv_usec, + scnt, cb->size, cycle_iters, + (unsigned long long)sum_post, (unsigned long long)sum_poll, + (unsigned long long)sum_last_poll); + + free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, + poll_cycles_stop, last_poll_cycles_start); +} + +static void krping_rlat_test_server(struct krping_cb *cb) +{ + struct ib_send_wr *bad_wr; + struct ib_wc wc; + int ret; + + /* Spin waiting for client's Start STAG/TO/Len */ + while (cb->state < RDMA_READ_ADV) { + krping_cq_event_handler(cb->cq, cb); + } + + /* Send STAG/TO/Len to client */ + if (cb->dma_mr) + krping_format_send(cb, cb->start_addr, cb->dma_mr); + else + krping_format_send(cb, cb->start_addr, cb->start_mr); + ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + printf(PFX "post send error %d\n", ret); + return; + } + + /* Spin waiting for send completion */ + while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); + if (ret < 0) { + printf(PFX "poll error %d\n", ret); + return; + } + if (wc.status) { + printf(PFX "send completiong error %d\n", wc.status); + return; + } + + krping_wait(cb, ERROR); +} + +static void krping_wlat_test_server(struct krping_cb *cb) +{ + struct ib_send_wr *bad_wr; + struct ib_wc wc; + int ret; + + /* Spin waiting for client's Start STAG/TO/Len */ + while (cb->state < RDMA_READ_ADV) { + krping_cq_event_handler(cb->cq, cb); + } + + /* Send STAG/TO/Len to client */ + if (cb->dma_mr) + krping_format_send(cb, cb->start_addr, cb->dma_mr); + else + krping_format_send(cb, cb->start_addr, cb->start_mr); + ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + printf(PFX "post send error %d\n", ret); + return; + } + + /* Spin waiting for send completion */ + while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); + if (ret < 0) { + printf(PFX "poll error %d\n", ret); + return; + } + if (wc.status) { + printf(PFX "send completiong error %d\n", wc.status); + return; + } + + wlat_test(cb); + +} + +static void krping_bw_test_server(struct krping_cb *cb) +{ + struct ib_send_wr *bad_wr; + struct ib_wc wc; + int ret; + + /* Spin waiting for client's Start STAG/TO/Len */ + while (cb->state < RDMA_READ_ADV) { + krping_cq_event_handler(cb->cq, cb); + } + + /* Send STAG/TO/Len to client */ + if (cb->dma_mr) + krping_format_send(cb, cb->start_addr, cb->dma_mr); + else + krping_format_send(cb, cb->start_addr, cb->start_mr); + ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + printf(PFX "post send error %d\n", ret); + return; + } + + /* Spin waiting for send completion */ + while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); + if (ret < 0) { + printf(PFX "poll error %d\n", ret); + return; + } + if (wc.status) { + printf(PFX "send completiong error %d\n", wc.status); + return; + } + + if (cb->duplex) + bw_test(cb); + krping_wait(cb, ERROR); +} + static int krping_bind_server(struct krping_cb *cb) { struct sockaddr_in sin; @@ -877,7 +1337,15 @@ goto err2; } - krping_test_server(cb); + if (cb->wlat) + krping_wlat_test_server(cb); + else if (cb->rlat) + krping_rlat_test_server(cb); + else if (cb->bw) + krping_bw_test_server(cb); + else + krping_test_server(cb); + rdma_disconnect(cb->child_cm_id); rdma_destroy_id(cb->child_cm_id); err2: @@ -960,6 +1428,171 @@ } } +static void krping_rlat_test_client(struct krping_cb *cb) +{ + struct ib_send_wr *bad_wr; + struct ib_wc wc; + int ret; + + cb->state = RDMA_READ_ADV; + + /* Send STAG/TO/Len to client */ + if (cb->dma_mr) + krping_format_send(cb, cb->start_addr, cb->dma_mr); + else + krping_format_send(cb, cb->start_addr, cb->rdma_mr); + ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + printf(PFX "post send error %d\n", ret); + return; + } + + /* Spin waiting for send completion */ + while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); + if (ret < 0) { + printf(PFX "poll error %d\n", ret); + return; + } + if (wc.status) { + printf(PFX "send completion error %d\n", wc.status); + return; + } + + /* Spin waiting for server's Start STAG/TO/Len */ + while (cb->state < RDMA_WRITE_ADV) { + krping_cq_event_handler(cb->cq, cb); + } + +#if 0 +{ + int i; + struct timeval start, stop; + time_t sec; + suseconds_t usec; + unsigned long long elapsed; + struct ib_wc wc; + struct ib_send_wr *bad_wr; + int ne; + + cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; + cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; + cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; + cb->rdma_sq_wr.sg_list->length = 0; + cb->rdma_sq_wr.num_sge = 0; + + microtime(&start); + for (i=0; i < 100000; i++) { + if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { + printf(PFX "Couldn't post send\n"); + return; + } + do { + ne = ib_poll_cq(cb->cq, 1, &wc); + } while (ne == 0); + if (ne < 0) { + printf(PFX "poll CQ failed %d\n", ne); + return; + } + if (wc.status != IB_WC_SUCCESS) { + printf(PFX "Completion wth error at %s:\n", + cb->server ? "server" : "client"); + printf(PFX "Failed status %d: wr_id %d\n", + wc.status, (int) wc.wr_id); + return; + } + } + microtime(&stop); + + if (stop.tv_usec < start.tv_usec) { + stop.tv_usec += 1000000; + stop.tv_sec -= 1; + } + sec = stop.tv_sec - start.tv_sec; + usec = stop.tv_usec - start.tv_usec; + elapsed = sec * 1000000 + usec; + printf(PFX "0B-write-lat iters 100000 usec %llu\n", elapsed); +} +#endif + + rlat_test(cb); +} + +static void krping_wlat_test_client(struct krping_cb *cb) +{ + struct ib_send_wr *bad_wr; + struct ib_wc wc; + int ret; + + cb->state = RDMA_READ_ADV; + + /* Send STAG/TO/Len to client */ + if (cb->dma_mr) + krping_format_send(cb, cb->start_addr, cb->dma_mr); + else + krping_format_send(cb, cb->start_addr, cb->start_mr); + ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + printf(PFX "post send error %d\n", ret); + return; + } + + /* Spin waiting for send completion */ + while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); + if (ret < 0) { + printf(PFX "poll error %d\n", ret); + return; + } + if (wc.status) { + printf(PFX "send completion error %d\n", wc.status); + return; + } + + /* Spin waiting for server's Start STAG/TO/Len */ + while (cb->state < RDMA_WRITE_ADV) { + krping_cq_event_handler(cb->cq, cb); + } + + wlat_test(cb); +} + +static void krping_bw_test_client(struct krping_cb *cb) +{ + struct ib_send_wr *bad_wr; + struct ib_wc wc; + int ret; + + cb->state = RDMA_READ_ADV; + + /* Send STAG/TO/Len to client */ + if (cb->dma_mr) + krping_format_send(cb, cb->start_addr, cb->dma_mr); + else + krping_format_send(cb, cb->start_addr, cb->start_mr); + ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + printf(PFX "post send error %d\n", ret); + return; + } + + /* Spin waiting for send completion */ + while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); + if (ret < 0) { + printf(PFX "poll error %d\n", ret); + return; + } + if (wc.status) { + printf(PFX "send completion error %d\n", wc.status); + return; + } + + /* Spin waiting for server's Start STAG/TO/Len */ + while (cb->state < RDMA_WRITE_ADV) { + krping_cq_event_handler(cb->cq, cb); + } + + bw_test(cb); +} + static int krping_connect_client(struct krping_cb *cb) { struct rdma_conn_param conn_param; @@ -1049,7 +1682,14 @@ goto err2; } - krping_test_client(cb); + if (cb->wlat) + krping_wlat_test_client(cb); + else if (cb->rlat) + krping_rlat_test_client(cb); + else if (cb->bw) + krping_bw_test_client(cb); + else + krping_test_client(cb); rdma_disconnect(cb->cm_id); err2: krping_free_buffers(cb); @@ -1064,6 +1704,7 @@ int ret = 0; char *optarg; unsigned long optint; + debug = 0; cb = malloc(sizeof(*cb), M_DEVBUF, M_WAITOK); if (!cb) @@ -1077,6 +1718,7 @@ cb->server = -1; cb->state = IDLE; cb->size = 64; + cb->txdepth = RPING_SQ_DEPTH; mtx_init(&cb->lock, "krping mtx", NULL, MTX_DUPOK|MTX_SPIN); while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, @@ -1134,6 +1776,19 @@ cb->validate++; DEBUG_LOG(PFX "validate data\n"); break; + case 'L': + cb->rlat++; + break; + case 'l': + cb->wlat++; + break; + case 'B': + cb->bw++; + break; + case 't': + cb->txdepth = optint; + DEBUG_LOG(PFX "txdepth %d\n", cb->txdepth); + break; case 'd': debug++; break; @@ -1151,6 +1806,12 @@ ret = EINVAL; goto out; } + if ((cb->bw + cb->rlat + cb->wlat) > 1) { + printf(PFX "Pick only one test: bw, rlat, wlat\n"); + ret = -EINVAL; + goto out; + } + cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); if (IS_ERR(cb->cm_id)) { ==== //depot/projects/iwarp/sys/contrib/rdma/krping/krping.h#5 (text+ko) ==== @@ -101,8 +101,24 @@ /* listener on service side. */ struct rdma_cm_id *child_cm_id; /* connection on server side */ TAILQ_ENTRY(krping_cb) list; + + int rlat; /* run read latency test */ + int wlat; /* run write latency test */ + int bw; /* run write bw test */ + int duplex; /* run write bw full duplex test */ + int txdepth; }; +static __inline uint64_t +get_cycles(void) +{ + u_int32_t low, high; + __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); + return (low | ((u_int64_t)high << 32)); +} + +typedef uint64_t cycles_t; + extern struct mtx krping_mutex; TAILQ_HEAD(krping_cb_list, krping_cb); extern struct krping_cb_list krping_cbs;