From 66122818b7dbed561cdef6c23b4ab9a49d4f0985 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Thu, 14 Jan 2016 00:54:53 +0900 Subject: [PATCH] refs #6: Revive no-opped kernel execution and more comments. * Now the performance gets affected by computation. --- src/lib/elementgraph.cc | 3 ++- src/lib/io.cc | 3 ++- src/lib/offloadtask.cc | 12 ++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/lib/elementgraph.cc b/src/lib/elementgraph.cc index f97f726..efb938f 100644 --- a/src/lib/elementgraph.cc +++ b/src/lib/elementgraph.cc @@ -110,12 +110,13 @@ void ElementGraph::send_offload_task_to_device(OffloadTask *task) task->prepare_read_buffer(); task->prepare_write_buffer(); task->state = TASK_PREPARED; - } /* endif(!task.prepared) */ + } /* Send the offload task to device thread. */ assert(task->state == TASK_PREPARED); int ret = rte_ring_enqueue(ctx->offload_input_queues[dev_idx], (void*) task); if (ret == -ENOBUFS) { + /* The input queue is full. Delay the task. */ enqueue_offload_task(task, task->tracker.element, task->tracker.input_port); } else { /* It may return -EDQUOT, but here we ignore this HWM signal. diff --git a/src/lib/io.cc b/src/lib/io.cc index 54d50f0..53f5fa9 100644 --- a/src/lib/io.cc +++ b/src/lib/io.cc @@ -154,7 +154,8 @@ static void comp_offload_task_completion_cb(struct ev_loop *loop, struct ev_asyn ((float) task_cycles / total_batch_size - ((float) task->batches[b]->delay_time / task->batches[b]->count)); } - /* We need to rewind the state so that it gets executed by ElemGraph. */ + /* Rewind the state so that it gets "prepared" by ElemGraph. + * (e.g., update datablock list used by next element) */ task->state = TASK_INITIALIZED; ctx->elem_graph->enqueue_offload_task(task, ctx->elem_graph->get_first_next(task->elem), diff --git a/src/lib/offloadtask.cc b/src/lib/offloadtask.cc index 3851a0a..4ddaf04 100644 --- a/src/lib/offloadtask.cc +++ b/src/lib/offloadtask.cc @@ -327,13 +327,13 @@ void OffloadTask::execute() arg = {(void *) &checkbits_d, sizeof(void *), alignof(void *)}; cctx->push_kernel_arg(arg); - //offload_compute_handler &handler = elem->offload_compute_handlers[cctx->type_name]; - //handler(cctx, &res); + offload_compute_handler &handler = elem->offload_compute_handlers[cctx->type_name]; + handler(cctx, &res); /* Skip kernel execution. */ - res.num_workitems = 0; - res.num_threads_per_workgroup = 1; - res.num_workgroups = 1; - cctx->get_host_checkbits()[0] = 1; + //res.num_workitems = 0; + //res.num_threads_per_workgroup = 1; + //res.num_workgroups = 1; + //cctx->get_host_checkbits()[0] = 1; } else {