From f354563fcf92ccf4a3554e8326bb3eeacc0136f3 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Thu, 9 Nov 2023 11:49:12 -0800 Subject: [PATCH 01/28] eature:support Continues mode and latency iterations to CXL host exerciser (#3040) -Add Continues mode test -Support command line argument latency iterations and run scenario test in loop -Support command line argument cache line count Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 869 ++++++++++++++---- samples/cxl_host_exerciser/cxl_he_cmd.h | 34 +- .../cxl_host_exerciser/cxl_host_exerciser.h | 2 +- 3 files changed, 699 insertions(+), 206 deletions(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index 4e478bb678de..b9857321d678 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -51,7 +51,8 @@ class he_cache_cmd : public he_cmd { he_cache_cmd() : he_continuousmode_(false), he_contmodetime_(0), he_linerep_count_(0), he_stride_(0), he_test_(0), he_test_all_(false), he_dev_instance_(0), - he_stride_cmd_(false) {} + he_stride_cmd_(false), he_cls_count_(FPGA_512CACHE_LINES), + he_latency_iterations_(0) {} virtual ~he_cache_cmd() {} @@ -115,31 +116,37 @@ class he_cache_cmd : public he_cmd { ->transform(CLI::Range(1, 256)) ->default_val("10"); - // Test all - app->add_option("--testall", he_test_all_, "Run all tests") - ->default_val("false"); + // Cache lines count + app->add_option("--clscount", he_cls_count_, "Cache lines count") + ->transform(CLI::Range(1, 512)) + ->default_val("512"); + + // Number of latency test iterations + app->add_option("--latency_iterations", he_latency_iterations_, + "Number of latency test iterations") + ->transform(CLI::Range(0, 5000)) + ->default_val("0"); + } int he_run_fpga_rd_cache_hit_test() { cout << "********** FPGA Read cache hit test start**********" << endl; /* STEPS - 1) Allocate DSM, Read buffer // flush - 2) set cache lines 32kb/64 - 3) set line repeat count - 4) Set RdShared (CXL) config - 5) Run test ( AFU copies cache from host memory to FPGA cache) - 6) set line repeat count - 7) Set RdShared (CXL) config - 8) Run test ( AFU read cache from FPGA cache) + 1) Allocate DSM, Read buffer + 2) Scenario setup: + 1) Set cache lines and line repeat count + 2) Set RdShared (CXL) config + 3) Run test (AFU copies cache from host/fpga memory to FPGA cache) + 3) Run test (AFU read cache from FPGA cache) */ // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, he_cls_count_); - cout << "Read number Lines:" << FPGA_512CACHE_LINES << endl; + cout << "Read number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Numa node:" << numa_node_ << endl; @@ -180,10 +187,10 @@ class he_cache_cmd : public he_cmd { } // Start test - he_start_test(); + he_start_test(HE_PRTEST_SCENARIO); // wait for completion - if (!he_wait_test_completion(HE_PRTEST_SCENARIO)) { + if (!he_wait_test_completion()) { he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read(); @@ -201,7 +208,6 @@ class he_cache_cmd : public he_cmd { he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_S; - host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; @@ -216,21 +222,80 @@ class he_cache_cmd : public he_cmd { rd_table_ctl_.enable_address_stride = 1; rd_table_ctl_.stride = he_stride_; } - host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - // Start test - he_start_test(); + // Continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read(); - host_exe_->free_dsm(); - return -1; + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(HE_CXL_RD_LATENCY); + + } else if(he_latency_iterations_ > 0) { + // Latency iterations test + double perf_data = 0; + double latency = 0; + double total_perf_data = 0; + double total_latency = 0; + + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = 1; + + host_exe_->write64(HE_RD_NUM_LINES, 1); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { + total_perf_data = total_perf_data + perf_data; + total_latency = total_latency + latency; + } + host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ + BandWidth: {2:0.3f} GB/s", i, latency, perf_data); + } //end for loop + host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", + total_latency / he_latency_iterations_); + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga read cache hit test + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(HE_CXL_RD_LATENCY); } - he_perf_counters(HE_CXL_RD_LATENCY); host_exe_->free_dsm(); host_exe_->free_cache_read(); @@ -245,22 +310,21 @@ class he_cache_cmd : public he_cmd { cout << "********** FPGA Write cache hit test start**********" << endl; /* STEPS - 1) Allocate DSM, Read buffer, Write buffer // flush - 2) set cache lines 32kb/64 - 3) set line repeat count - 4) Set RdShared (CXL) config - 5) Run test ( AFU copies cache from host memory to FPGA cache) - 6) set line repeat count - 7) Set WrLine_M/WrPart_M (CXL) config - 8) Run test ( AFU writes to FPGA cache) + 1) Allocate DSM, Read/Write buffer + 2) Scenario setup: + 1) Set cache lines and line repeat count + 2) Set RdShared CXL config + 3) Run test (AFU copies cache from host/fpga memory to FPGA cache) + 3) Set Write CXL config + 4) Run test (AFU writes to FPGA cache) */ // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, he_cls_count_); - cout << "Read/write number Lines:" << FPGA_512CACHE_LINES << endl; + cout << "Read/write number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -302,10 +366,10 @@ class he_cache_cmd : public he_cmd { } // Start test - he_start_test(); + he_start_test(HE_PRTEST_SCENARIO); // wait for completion - if (!he_wait_test_completion(HE_PRTEST_SCENARIO)) { + if (!he_wait_test_completion()) { he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read_write(); @@ -323,7 +387,6 @@ class he_cache_cmd : public he_cmd { he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; he_wr_cfg_.opcode = WR_LINE_M; - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); // set RD_ADDR_TABLE_CTRL he_rd_cfg_.value = 0; @@ -344,22 +407,76 @@ class he_cache_cmd : public he_cmd { wr_table_ctl_.enable_address_stride = 1; wr_table_ctl_.stride = he_stride_; } - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); - // Start test - he_start_test(); + // continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read_write(); - host_exe_->free_dsm(); - return -1; + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(); + + } else if (he_latency_iterations_ > 0) { + // Latency iterations test + double perf_data = 0; + double total_perf_data = 0; + + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = 1; + host_exe_->write64(HE_WR_NUM_LINES, 1); + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, NULL)) { + total_perf_data = total_perf_data + perf_data; + } + host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", + i, perf_data); + } //end for loop + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga Write cache hit test + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(); } - he_perf_counters(); cout << "********** AFU Write to FPGA Cache successfully ********** " << endl; @@ -376,18 +493,18 @@ class he_cache_cmd : public he_cmd { cout << "********** FPGA Read cache miss test start**********" << endl; /* STEPS - 1) Allocate DSM, Read buffer, Write buffer - 2) Write number of lines more then 32kb 2mb/64 - 3) Set RdShared (CXL) config - 4) Run test (Buffer is not present in FPGA - FPGA read Cache miss ) + 1) Allocate DSM, Read buffer + 2) Set cache lines and line repeat count + 3) Set Read CXL config + 4) Run test (Buffer is not present in FPGA - FPGA read cache miss) */ // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, he_cls_count_); - cout << "Read number Lines:" << FPGA_512CACHE_LINES << endl; + cout << "Read number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; @@ -396,7 +513,6 @@ class he_cache_cmd : public he_cmd { he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_S; - host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; @@ -411,7 +527,6 @@ class he_cache_cmd : public he_cmd { rd_table_ctl_.enable_address_stride = 1; rd_table_ctl_.stride = he_stride_; } - host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Allocate DSM buffer if (!host_exe_->allocate_dsm()) { @@ -426,19 +541,79 @@ class he_cache_cmd : public he_cmd { return -1; } - // Start test - he_start_test(); + // continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read(); - host_exe_->free_dsm(); - return -1; + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(HE_CXL_RD_LATENCY); + + } else if (he_latency_iterations_ > 0) { + + // Latency loop test + double perf_data = 0; + double latency = 0; + double total_perf_data = 0; + double total_latency = 0; + + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = 1; + host_exe_->write64(HE_RD_NUM_LINES, 1); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { + total_perf_data = total_perf_data + perf_data; + total_latency = total_latency + latency; + } + host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ + BandWidth: {2:0.3f} GB/s", i, latency, perf_data); + } // end for loop + host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", + total_latency / he_latency_iterations_); + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga read cache hit test + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(HE_CXL_RD_LATENCY); } - he_perf_counters(HE_CXL_RD_LATENCY); host_exe_->free_cache_read(); host_exe_->free_dsm(); @@ -453,29 +628,28 @@ class he_cache_cmd : public he_cmd { cout << "********** FPGA write cache miss test start**********" << endl; /* STEPS - 1) Allocate DSM, Read buffer, Write buffer - 2) Write number of lines more then 32 kb 2mb/64 - 3) Set WR ItoMWr (CXL) config - 4) Run test ( Buffer is not present in FPGA - FPGA write Cache miss ) + 1) Allocate DSM, Write buffer + 2) Set cache lines and line repeat count + 3) Set Write CXL config + 4) Run test ( Buffer is not present in FPGA - FPGA write cache miss) */ // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); - cout << "Read/write number Lines:" << FPGA_512CACHE_LINES << endl; + cout << "Read/write number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size << endl; - // set W_CONFIG + // set Write config he_wr_cfg_.value = 0; he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; - he_wr_cfg_.opcode = WR_LINE_M; - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + he_wr_cfg_.opcode = WR_LINE_I; // Set WR_ADDR_TABLE_CTRL wr_table_ctl_.value = 0; @@ -490,7 +664,6 @@ class he_cache_cmd : public he_cmd { wr_table_ctl_.enable_address_stride = 1; wr_table_ctl_.stride = he_stride_; } - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); // Allocate DSM buffer if (!host_exe_->allocate_dsm()) { @@ -499,26 +672,82 @@ class he_cache_cmd : public he_cmd { } // Allocate Read, Write buffer - if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { + if (!host_exe_->allocate_cache_write(BUFFER_SIZE_2MB, numa_node_)) { cerr << "allocate cache read failed" << endl; host_exe_->free_dsm(); return -1; } - // Start test - he_start_test(); + // continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read_write(); - host_exe_->free_dsm(); - return -1; + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(); + + } else if (he_latency_iterations_ > 0) { + // Latency loop test + double perf_data = 0; + double total_perf_data = 0; + + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = 1; + host_exe_->write64(HE_WR_NUM_LINES, 1); + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, NULL)) { + total_perf_data = total_perf_data + perf_data; + } + host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", + i, perf_data); + } //end for loop + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga Write cache hit test + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(); } - he_perf_counters(); - host_exe_->free_cache_read_write(); + host_exe_->free_cache_write(); host_exe_->free_dsm(); cout << "********** AFU Write FPGA Cache Miss successfully ********** " @@ -529,21 +758,21 @@ class he_cache_cmd : public he_cmd { int he_run_host_rd_cache_hit_test() { - cout << "********** Host LLC Read cache hit test start**********" << endl; + cout << "********** Host LLC Read cache hit test start**********" << endl; /* STEPS 1) Allocate DSM, Read buffer - 2) create thread read buffer - 3) Set RdLine_I (CXL) config - 4) Run test ( AFU reads from host cache to FPGA cache) + 2) Create thread read buffer (move cache lines to HOST LLC) + 3) Set Read CXL config + 4) Run test (AFU reads from host cache to FPGA cache) */ // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, he_cls_count_); - cout << "Read number Lines:" << FPGA_512CACHE_LINES << endl; + cout << "Read number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -579,25 +808,89 @@ class he_cache_cmd : public he_cmd { std::thread t1(he_cache_thread, host_exe_->get_read(), BUFFER_SIZE_32KB); sleep(1); - // Start test - he_start_test(); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - g_stop_thread = true; - t1.join(); - sleep(1); - host_exe_->free_cache_read(); - host_exe_->free_dsm(); - return -1; + // continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(HE_CXL_RD_LATENCY); + + } else if (he_latency_iterations_ > 0) { + // Latency loop test + double perf_data = 0; + double latency = 0; + double total_perf_data = 0; + double total_latency = 0; + + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = 1; + + host_exe_->write64(HE_RD_NUM_LINES, 1); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { + total_perf_data = total_perf_data + perf_data; + total_latency = total_latency + latency; + } + host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ + BandWidth: {2:0.3f} GB/s", i, latency, perf_data); + } //end for loop + host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", + total_latency / he_latency_iterations_); + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga read cache hit test + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(HE_CXL_RD_LATENCY); } g_stop_thread = true; t1.join(); - he_perf_counters(HE_CXL_RD_LATENCY); sleep(1); host_exe_->free_cache_read(); host_exe_->free_dsm(); @@ -611,21 +904,20 @@ class he_cache_cmd : public he_cmd { int he_run_host_wr_cache_hit_test() { cout << "********** Host LLC Write cache hit test start**********" << endl; - /* STEPS 1) Allocate DSM, Write buffer - 2) create thread read buffer - 3) Set ItoMWr (CXL) config - 4) Run test ( AFU write to host cache) + 2) Create thread read buffer (move cache lines to HOST LLC) + 3) Set Write CXL config + 4) Run test (AFU write to host cache) */ // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); - cout << "Write number Lines:" << FPGA_512CACHE_LINES << endl; + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + cout << "Write number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -661,24 +953,83 @@ class he_cache_cmd : public he_cmd { std::thread t1(he_cache_thread, host_exe_->get_write(), BUFFER_SIZE_32KB); sleep(1); - // Start test - he_start_test(); + // continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - g_stop_thread = true; - t1.join(); - sleep(1); - host_exe_->free_cache_write(); - host_exe_->free_dsm(); - return -1; + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(); + + } else if (he_latency_iterations_ > 0) { + // Latency loop test + double perf_data = 0; + double total_perf_data = 0; + + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = 1; + host_exe_->write64(HE_WR_NUM_LINES, 1); + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, NULL)) { + total_perf_data = total_perf_data + perf_data; + } + host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", + i, perf_data); + } // end for loop + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga Write cache hit test + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(); } g_stop_thread = true; t1.join(); - he_perf_counters(); cout << "********** AFU write host cache successfully ********** " << endl; sleep(1); @@ -690,21 +1041,21 @@ class he_cache_cmd : public he_cmd { } int he_run_host_rd_cache_miss_test() { - cout << "********** Host LLC Read cache miss test start**********" << endl; + cout << "********** Host LLC Read cache miss test start**********" << endl; /* STEPS 1) Allocate DSM, Read buffer - 2) flush host read buffer cachde - 3) Set RdLine_I (CXL) config - 4) Run test ( AFU reads from host cache to FPGA cache) + 2) Flush host read buffer cache + 3) Set read CXL config + 4) Run test (AFU reads from host cache(cache lines are not in host LLC)) */ // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES ); - cout << "Read/write number Lines:" << FPGA_512CACHE_LINES << endl; + host_exe_->write64(HE_RD_NUM_LINES, he_cls_count_); + cout << "Read/write number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -736,19 +1087,79 @@ class he_cache_cmd : public he_cmd { return -1; } - // Start test - he_start_test(); + // continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read(); - host_exe_->free_dsm(); - return -1; + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(HE_CXL_RD_LATENCY); + + } else if (he_latency_iterations_ > 0) { + // Latency loop test + double perf_data = 0; + double latency = 0; + double total_perf_data = 0; + double total_latency = 0; + + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = 1; + + host_exe_->write64(HE_RD_NUM_LINES, 1); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { + total_perf_data = total_perf_data + perf_data; + total_latency = total_latency + latency; + } + host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ + BandWidth: {2:0.3f} GB/s", i, latency, perf_data); + } //end for loop + host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", + total_latency / he_latency_iterations_); + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga read cache hit test + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(HE_CXL_RD_LATENCY); } - he_perf_counters(HE_CXL_RD_LATENCY); host_exe_->free_cache_read(); host_exe_->free_dsm(); @@ -762,20 +1173,19 @@ class he_cache_cmd : public he_cmd { int he_run_host_wr_cache_miss_test() { cout << "********** Host LLC Write cache miss test start**********" << endl; - /* STEPS 1) Allocate DSM, write buffer - 2) flush host write buffer cachde - 3) Set RdLine_I (CXL) config - 4) Run test ( AFU reads from host cache to FPGA cache) + 2) Flush host write buffer cache + 3) Set write CXL config + 4) Run test (AFU writes host memory (cache lines are not in host LLC)) */ // HE_INFO // Set write number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); - cout << "Write number Lines:" << FPGA_512CACHE_LINES << endl; + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + cout << "Write number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -807,19 +1217,75 @@ class he_cache_cmd : public he_cmd { return -1; } - // Start test - he_start_test(); + // continuous mode + if (he_continuousmode_) { + he_rd_cfg_.continuous_mode_enable = 0x1; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_write(); - host_exe_->free_dsm(); - return -1; + // Start test + he_start_test(); + + // Continuous mode + he_continuousmode(); + + // performance + he_perf_counters(); + + } else if (he_latency_iterations_ > 0) { + // Latency loop test + double perf_data = 0; + double total_perf_data = 0; + + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = 1; + host_exe_->write64(HE_WR_NUM_LINES, 1); + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + for (uint64_t i = 0; i < he_latency_iterations_; i++) { + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + + if (he_get_perf(&perf_data, NULL)) { + total_perf_data = total_perf_data + perf_data; + } + host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", + i, perf_data); + } // end for loop + host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", + total_perf_data / he_latency_iterations_); + + } else { + // fpga Write cache hit test + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); + + // Start test + he_start_test(); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + he_perf_counters(); } - he_perf_counters(); host_exe_->free_cache_write(); host_exe_->free_dsm(); @@ -830,6 +1296,42 @@ class he_cache_cmd : public he_cmd { return 0; } + + void he_forcetestcmpl() + { + // Force stop test + he_ctl_.value = 0; + he_ctl_.ForcedTestCmpl = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + + if (!he_wait_test_completion()) + sleep(1); + + he_ctl_.value = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + usleep(1000); + } + + + bool he_continuousmode() + { + uint32_t count = 0; + if (he_continuousmode_ && he_contmodetime_ > 0) + { + host_exe_->logger_->info("continuous mode time: {0} seconds", he_contmodetime_); + host_exe_->logger_->info("Ctrl+C to stop continuous mode"); + + while (!g_he_exit) { + sleep(1); + count++; + if (count > he_contmodetime_) + break; + } + he_forcetestcmpl(); + } + return true; + } + virtual int run(test_afu *afu, CLI::App *app) { (void)app; int ret = 0; @@ -860,45 +1362,6 @@ class he_cache_cmd : public he_cmd { return -1; } - if (he_test_all_ == true) { - int retvalue = 0; - ret = he_run_fpga_rd_cache_hit_test(); - if (ret != 0) { - retvalue = ret; - } - ret = he_run_fpga_wr_cache_hit_test(); - if (ret != 0) { - retvalue = ret; - } - - ret = he_run_fpga_rd_cache_miss_test(); - if (ret != 0) { - retvalue = ret; - } - ret = he_run_fpga_wr_cache_miss_test(); - if (ret != 0) { - retvalue = ret; - } - ret = he_run_host_rd_cache_hit_test(); - if (ret != 0) { - retvalue = ret; - } - ret = he_run_host_wr_cache_hit_test(); - if (ret != 0) { - retvalue = ret; - } - - ret = he_run_host_rd_cache_miss_test(); - if (ret != 0) { - retvalue = ret; - } - ret = he_run_host_wr_cache_miss_test(); - if (ret != 0) { - retvalue = ret; - } - - return retvalue; - } if (he_test_ == HE_FPGA_RD_CACHE_HIT) { ret = he_run_fpga_rd_cache_hit_test(); @@ -952,6 +1415,8 @@ class he_cache_cmd : public he_cmd { bool he_test_all_; uint32_t he_dev_instance_; bool he_stride_cmd_; + uint32_t he_cls_count_; + uint64_t he_latency_iterations_; }; void he_cache_thread(uint8_t *buf_ptr, uint64_t len) { diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index 366cffe07c09..f5264d230769 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -226,11 +226,10 @@ class he_cmd : public test_command { return 0; } - bool he_wait_test_completion(const char* str = HE_TEST_STARTED) { + bool he_wait_test_completion() { /* Wait for test completion */ uint32_t timeout = HELPBK_TEST_TIMEOUT; - cout << str << endl; volatile uint8_t *status_ptr = host_exe_->get_dsm(); while (0 == ((*status_ptr) & 0x1)) { usleep(HELPBK_TEST_SLEEP_INVL); @@ -267,12 +266,14 @@ class he_cmd : public test_command { } - void he_start_test() { + void he_start_test(const char* str = HE_TEST_STARTED) { // start test he_ctl_.Start = 0; host_exe_->write64(HE_CTL, he_ctl_.value); he_ctl_.Start = 1; host_exe_->write64(HE_CTL, he_ctl_.value); + + cout << str << endl; } bool verify_numa_node() { @@ -300,6 +301,33 @@ class he_cmd : public test_command { return true; } + + bool he_get_perf(double* perf_data, double* latency, + he_cxl_latency cxl_latency = HE_CXL_LATENCY_NONE) { + volatile he_cache_dsm_status* dsm_status = NULL; + + dsm_status = reinterpret_cast( + (uint8_t*)(host_exe_->get_dsm())); + if (!dsm_status) + return false; + + if (dsm_status->num_ticks > 0) { + *perf_data = + he_num_xfers_to_bw(dsm_status->num_reads + dsm_status->num_writes, + dsm_status->num_ticks); + + if (cxl_latency == HE_CXL_RD_LATENCY && dsm_status->num_reads > 0) { + *latency = (double)((dsm_status->num_ticks / (double)dsm_status->num_reads) + * (2.5)); + + } + return true; + } + + return false; + } + + protected: host_exerciser *host_exe_; uint32_t he_clock_mhz_; diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index d2eaeaac8a73..2f3a23202acf 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -216,7 +216,7 @@ union he_rd_config { uint64_t value; struct { uint64_t read_traffic_enable : 1; - uint64_t continuous_mode_Enable : 1; + uint64_t continuous_mode_enable : 1; uint64_t waitfor_completion : 1; uint64_t prewrite_sync_enable : 1; uint64_t postwrite_sync_enable : 1; From fac810a7ddc0f91e963ed22e72f77607a2f1c5e2 Mon Sep 17 00:00:00 2001 From: Sophie Mao <106994021+sophimao@users.noreply.github.com> Date: Thu, 9 Nov 2023 13:29:51 -0800 Subject: [PATCH 02/28] opae: bump revision to 2.10.0 (#3041) --- CMakeLists.txt | 2 +- packaging/opae.admin/version | 2 +- packaging/opae/version | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 225501bfcd01..352efac5ffa9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -793,7 +793,7 @@ endif() set(OPAE_VERSION_LOCAL "" CACHE STRING "OPAE local version") set(OPAE_VERSION_MAJOR 2 CACHE STRING "OPAE major version" FORCE) -set(OPAE_VERSION_MINOR 9 CACHE STRING "OPAE minor version" FORCE) +set(OPAE_VERSION_MINOR 10 CACHE STRING "OPAE minor version" FORCE) set(OPAE_VERSION_REVISION 0${OPAE_VERSION_LOCAL} CACHE STRING "OPAE revision version" FORCE) set(OPAE_VERSION ${OPAE_VERSION_MAJOR}.${OPAE_VERSION_MINOR}.${OPAE_VERSION_REVISION} CACHE STRING "OPAE version" FORCE) diff --git a/packaging/opae.admin/version b/packaging/opae.admin/version index 0d2477fc3351..b6382249652c 100755 --- a/packaging/opae.admin/version +++ b/packaging/opae.admin/version @@ -24,5 +24,5 @@ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -PROJECT_VERSION='2.9.0' +PROJECT_VERSION='2.10.0' PROJECT_RELEASE='1' diff --git a/packaging/opae/version b/packaging/opae/version index cd40c622c719..088c8136f4a2 100755 --- a/packaging/opae/version +++ b/packaging/opae/version @@ -24,5 +24,5 @@ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -declare -r PROJECT_VERSION='2.9.0' +declare -r PROJECT_VERSION='2.10.0' declare -r PROJECT_RELEASE='1' From bfecf48abaeaf61751fb4d74e91acb456cef13e9 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Tue, 14 Nov 2023 16:34:08 -0800 Subject: [PATCH 03/28] Fix:CXL Host exerciser calculations and output string (#3042) * Fix:CXL Host exerciser calculations and output string The CXL host exerciser read latency output string has been modified with the "Total transaction time nanoseconds". Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cmd.h | 29 ++++++++++++------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index f5264d230769..34d073c03304 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -77,24 +77,23 @@ class he_cmd : public test_command { cout << "actual data:" << dsm_status->actual_data << endl; cout << "expected data:" << dsm_status->expected_data << endl; + double latency = 0; + double perf_data = 0; // print bandwidth if (dsm_status->num_ticks > 0) { - double perf_data = - he_num_xfers_to_bw(dsm_status->num_reads + dsm_status->num_writes, - dsm_status->num_ticks); - host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); - } - - if (cxl_latency == HE_CXL_RD_LATENCY) { - if (dsm_status->num_ticks > 0 && dsm_status->num_reads > 0) { - double latency = (double)((dsm_status->num_ticks / (double)dsm_status->num_reads) - *( 2.5)); - - host_exe_->logger_->info("Read Latency : {0:0.2f} nanoseconds", latency); - } - else { - host_exe_->logger_->info("Read Latency: N/A"); + perf_data = he_num_xfers_to_bw(dsm_status->num_reads + + dsm_status->num_writes, dsm_status->num_ticks); + + if (cxl_latency == HE_CXL_RD_LATENCY) { + //To convert clock ticks to nanoseconds,multiply the clock ticks by 2.5 + latency = (double)(dsm_status->num_ticks * 2.5); + host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s Total transaction time: {1:0.2f} nanoseconds", + perf_data, latency); + } else { + host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); } + } else { + host_exe_->logger_->info("Read Latency: N/A"); } cout << "********* DSM Status CSR end *********" << endl; From 3a8b50e1f542ffed16dbf4a425813e53ba94ebc8 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:03:32 -0800 Subject: [PATCH 04/28] Fix: Remove latency iterations from write cache hit/miss scenario tests (#3043) - CXL host exerciser Remove latency iterations tests from write cache hit/miss scenario - Remove Bandwidth calculation and output in Read cache hit/miss scenario Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 209 +++--------------- samples/cxl_host_exerciser/cxl_he_cmd.h | 12 + .../cxl_host_exerciser/cxl_host_exerciser.h | 1 + samples/cxl_host_exerciser/he_cache_test.h | 4 +- 4 files changed, 41 insertions(+), 185 deletions(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index b9857321d678..6be9b9d7aace 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -239,10 +239,8 @@ class he_cache_cmd : public he_cmd { he_perf_counters(HE_CXL_RD_LATENCY); } else if(he_latency_iterations_ > 0) { + // Latency iterations test - double perf_data = 0; - double latency = 0; - double total_perf_data = 0; double total_latency = 0; rd_table_ctl_.enable_address_stride = 1; @@ -265,18 +263,14 @@ class he_cache_cmd : public he_cmd { return -1; } - if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { - total_perf_data = total_perf_data + perf_data; - total_latency = total_latency + latency; - } - host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ - BandWidth: {2:0.3f} GB/s", i, latency, perf_data); + total_latency = total_latency + get_ticks(); + host_exe_->logger_->info("Iteration: {0} Latency: {1:0.3f} nanoseconds", + i, (double)(get_ticks() * LATENCY_FACTOR)); } //end for loop + + total_latency = total_latency * LATENCY_FACTOR; host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", total_latency / he_latency_iterations_); - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); - } else { // fpga read cache hit test host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); @@ -424,39 +418,6 @@ class he_cache_cmd : public he_cmd { // performance he_perf_counters(); - } else if (he_latency_iterations_ > 0) { - // Latency iterations test - double perf_data = 0; - double total_perf_data = 0; - - wr_table_ctl_.enable_address_stride = 1; - wr_table_ctl_.stride = 1; - host_exe_->write64(HE_WR_NUM_LINES, 1); - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - - for (uint64_t i = 0; i < he_latency_iterations_; i++) { - // Start test - he_start_test(); - - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read_write(); - host_exe_->free_dsm(); - return -1; - } - - if (he_get_perf(&perf_data, NULL)) { - total_perf_data = total_perf_data + perf_data; - } - host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", - i, perf_data); - } //end for loop - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); - } else { // fpga Write cache hit test host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); @@ -512,7 +473,7 @@ class he_cache_cmd : public he_cmd { he_rd_cfg_.value = 0; he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; - he_rd_cfg_.opcode = RD_LINE_S; + he_rd_cfg_.opcode = RD_LINE_I; // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; @@ -559,9 +520,6 @@ class he_cache_cmd : public he_cmd { } else if (he_latency_iterations_ > 0) { // Latency loop test - double perf_data = 0; - double latency = 0; - double total_perf_data = 0; double total_latency = 0; rd_table_ctl_.enable_address_stride = 1; @@ -583,17 +541,14 @@ class he_cache_cmd : public he_cmd { return -1; } - if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { - total_perf_data = total_perf_data + perf_data; - total_latency = total_latency + latency; - } - host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ - BandWidth: {2:0.3f} GB/s", i, latency, perf_data); - } // end for loop + total_latency = total_latency + get_ticks(); + host_exe_->logger_->info("Iteration: {0} Latency: {1:0.3f} nanoseconds", + i, (double)(get_ticks() * LATENCY_FACTOR)); + } //end for loop + + total_latency = total_latency * LATENCY_FACTOR; host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", total_latency / he_latency_iterations_); - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); } else { // fpga read cache hit test @@ -694,39 +649,6 @@ class he_cache_cmd : public he_cmd { // performance he_perf_counters(); - } else if (he_latency_iterations_ > 0) { - // Latency loop test - double perf_data = 0; - double total_perf_data = 0; - - wr_table_ctl_.enable_address_stride = 1; - wr_table_ctl_.stride = 1; - host_exe_->write64(HE_WR_NUM_LINES, 1); - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - - for (uint64_t i = 0; i < he_latency_iterations_; i++) { - // Start test - he_start_test(); - - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_write(); - host_exe_->free_dsm(); - return -1; - } - - if (he_get_perf(&perf_data, NULL)) { - total_perf_data = total_perf_data + perf_data; - } - host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", - i, perf_data); - } //end for loop - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); - } else { // fpga Write cache hit test host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); @@ -782,7 +704,7 @@ class he_cache_cmd : public he_cmd { he_rd_cfg_.value = 0; he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; - he_rd_cfg_.opcode = RD_LINE_I; + he_rd_cfg_.opcode = RD_LINE_S; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // set RD_ADDR_TABLE_CTRL @@ -825,10 +747,8 @@ class he_cache_cmd : public he_cmd { he_perf_counters(HE_CXL_RD_LATENCY); } else if (he_latency_iterations_ > 0) { + // Latency loop test - double perf_data = 0; - double latency = 0; - double total_perf_data = 0; double total_latency = 0; rd_table_ctl_.enable_address_stride = 1; @@ -854,17 +774,14 @@ class he_cache_cmd : public he_cmd { return -1; } - if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { - total_perf_data = total_perf_data + perf_data; - total_latency = total_latency + latency; - } - host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ - BandWidth: {2:0.3f} GB/s", i, latency, perf_data); + total_latency = total_latency + get_ticks(); + host_exe_->logger_->info("Iteration: {0} Latency: {1:0.3f} nanoseconds", + i, (double)(get_ticks() * LATENCY_FACTOR)); } //end for loop + + total_latency = total_latency * LATENCY_FACTOR; host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", total_latency / he_latency_iterations_); - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); } else { // fpga read cache hit test @@ -969,42 +886,6 @@ class he_cache_cmd : public he_cmd { // performance he_perf_counters(); - } else if (he_latency_iterations_ > 0) { - // Latency loop test - double perf_data = 0; - double total_perf_data = 0; - - wr_table_ctl_.enable_address_stride = 1; - wr_table_ctl_.stride = 1; - host_exe_->write64(HE_WR_NUM_LINES, 1); - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - - for (uint64_t i = 0; i < he_latency_iterations_; i++) { - // Start test - he_start_test(); - - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - g_stop_thread = true; - t1.join(); - sleep(1); - host_exe_->free_cache_write(); - host_exe_->free_dsm(); - return -1; - } - - if (he_get_perf(&perf_data, NULL)) { - total_perf_data = total_perf_data + perf_data; - } - host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", - i, perf_data); - } // end for loop - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); - } else { // fpga Write cache hit test host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); @@ -1103,10 +984,8 @@ class he_cache_cmd : public he_cmd { he_perf_counters(HE_CXL_RD_LATENCY); } else if (he_latency_iterations_ > 0) { + // Latency loop test - double perf_data = 0; - double latency = 0; - double total_perf_data = 0; double total_latency = 0; rd_table_ctl_.enable_address_stride = 1; @@ -1129,17 +1008,14 @@ class he_cache_cmd : public he_cmd { return -1; } - if (he_get_perf(&perf_data, &latency, HE_CXL_RD_LATENCY)) { - total_perf_data = total_perf_data + perf_data; - total_latency = total_latency + latency; - } - host_exe_->logger_->info("Iteration: {0} latency: {1:0.3f} nanoseconds \ - BandWidth: {2:0.3f} GB/s", i, latency, perf_data); + total_latency = total_latency + get_ticks(); + host_exe_->logger_->info("Iteration: {0} Latency: {1:0.3f} nanoseconds", + i, (double)(get_ticks() * LATENCY_FACTOR)); } //end for loop + + total_latency = total_latency * LATENCY_FACTOR; host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", total_latency / he_latency_iterations_); - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); } else { // fpga read cache hit test @@ -1233,39 +1109,6 @@ class he_cache_cmd : public he_cmd { // performance he_perf_counters(); - } else if (he_latency_iterations_ > 0) { - // Latency loop test - double perf_data = 0; - double total_perf_data = 0; - - wr_table_ctl_.enable_address_stride = 1; - wr_table_ctl_.stride = 1; - host_exe_->write64(HE_WR_NUM_LINES, 1); - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - - for (uint64_t i = 0; i < he_latency_iterations_; i++) { - // Start test - he_start_test(); - - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_write(); - host_exe_->free_dsm(); - return -1; - } - - if (he_get_perf(&perf_data, NULL)) { - total_perf_data = total_perf_data + perf_data; - } - host_exe_->logger_->info("Iteration: {0} BandWidth: {2:0.3f} GB/s", - i, perf_data); - } // end for loop - host_exe_->logger_->info("Average BandWidth: {0:0.3f} GB/s", - total_perf_data / he_latency_iterations_); - } else { // fpga Write cache hit test host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index 34d073c03304..20bbdab37241 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -326,6 +326,18 @@ class he_cmd : public test_command { return false; } + uint64_t get_ticks() { + volatile he_cache_dsm_status* dsm_status = NULL; + + dsm_status = reinterpret_cast( + (uint8_t*)(host_exe_->get_dsm())); + if (!dsm_status) + return 0; + if (dsm_status->num_ticks > 0) + return dsm_status->num_ticks; + else + return 0; + } protected: host_exerciser *host_exe_; diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index 2f3a23202acf..0ece8c32ba14 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -44,6 +44,7 @@ static const uint64_t BUFFER_SIZE_32KB = 32* KB; static const uint64_t FPGA_32KB_CACHE_LINES = (32 * KB) / 64; static const uint64_t FPGA_2MB_CACHE_LINES = (2 * MB) / 64; static const uint64_t FPGA_512CACHE_LINES = 512; +static const double LATENCY_FACTOR = 2.5; // Host execiser CSR Offset enum { diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 03d006689e59..eb853caed20e 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -772,7 +772,7 @@ class afu { goto out_free; } - logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); rd_wr_buffer_ = (uint8_t *)ptr; @@ -811,7 +811,7 @@ class afu { << endl; } - logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); buffer_release(rd_wr_buffer_, rd_wr_buf_len_); From 74584049f134dd996e67e20fc035c24426159463 Mon Sep 17 00:00:00 2001 From: Tim Whisonant Date: Fri, 17 Nov 2023 14:40:38 -0800 Subject: [PATCH 05/28] Fix: address issues pointed out by Python scanners (#3036) (#3045) ### Description Our Python scanners point out issues regarding the use of the subprocess module. The issues tend to be in 2 main categories: 1) The use of subprocess.Popen() directly is discouraged. Instead, the scanners recommend using run, call, or checked_call. 2) The use of any of the subprocess calls with shell=True is forbidden, because it is prone to code injection attacks. Instead, the scanners require shell=False or omitting shell altogether. ### Collateral (docs, reports, design examples, case IDs): Python scans when preparing the release. - [ ] Document Update Required? (Specify FIM/AFU/Scripts) ### Tests added: ### Tests run: CI and manual testing of rtl_src_config. Signed-off-by: Tim Whisonant --- binaries/fpgadiag/opae/diag/fecmode.py | 25 +-- binaries/fpgadiag/opae/diag/fpgadiag.py | 5 +- binaries/fpgadiag/opae/diag/fpgastats.py | 14 +- platforms/platmgr/tools/rtl_src_config.py | 17 +- python/pacsign/ReadMe.txt | 10 +- python/pacsign/pacsign/common_util.py | 26 +-- python/pacsign/test.py | 254 ---------------------- 7 files changed, 42 insertions(+), 309 deletions(-) delete mode 100755 python/pacsign/test.py diff --git a/binaries/fpgadiag/opae/diag/fecmode.py b/binaries/fpgadiag/opae/diag/fecmode.py index d8840672ce0c..03a173326f99 100644 --- a/binaries/fpgadiag/opae/diag/fecmode.py +++ b/binaries/fpgadiag/opae/diag/fecmode.py @@ -1,5 +1,5 @@ #! /usr/bin/env python3 -# Copyright(c) 2020, Intel Corporation +# Copyright(c) 2020-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -42,8 +42,8 @@ CONF_FILE = '/etc/modprobe.d/dfl-fme.conf' OPTION_LINE = 'options dfl_n3000_nios fec_mode=' DRV_MODE = '/sys/module/dfl_n3000_nios/parameters/fec_mode' -REMOVE_MOD = 'rmmod dfl_n3000_nios' -PROBE_MOD = 'modprobe dfl_n3000_nios' +REMOVE_MOD = ['rmmod', 'dfl_n3000_nios'] +PROBE_MOD = ['modprobe', 'dfl_n3000_nios'] def get_fpga_sysfs_path(sbdf): @@ -85,14 +85,11 @@ def do_rsu(sbdf, debug): return None try: - cmd = "rsu bmcimg {}".format(sbdf) + cmd = ['rsu', 'bmcimg', sbdf] if debug: + cmd.append('-d') print(cmd) - cmd += ' -d' - rc = subprocess.call(cmd, shell=True) - if rc != 0: - print("failed to '{}'".format(cmd)) - return None + subprocess.run(cmd, check=True) except subprocess.CalledProcessError as e: print('failed call') return None @@ -145,10 +142,7 @@ def reload_driver(fec_mode, debug): try: if debug: print(REMOVE_MOD) - rc = subprocess.call(REMOVE_MOD, shell=True) - if rc != 0: - print("failed to '{}'".format(REMOVE_MOD)) - return rc + subprocess.run(REMOVE_MOD, check=True) except subprocess.CalledProcessError as e: print('failed call') return 2 @@ -158,10 +152,7 @@ def reload_driver(fec_mode, debug): try: if debug: print(PROBE_MOD) - rc = subprocess.call(PROBE_MOD, shell=True) - if rc != 0: - print("failed to '{}'".format(PROBE_MOD)) - return rc + subprocess.run(PROBE_MOD, check=True) except subprocess.CalledProcessError as e: print(e) return 2 diff --git a/binaries/fpgadiag/opae/diag/fpgadiag.py b/binaries/fpgadiag/opae/diag/fpgadiag.py index c4509d3a4581..60e70423a32e 100755 --- a/binaries/fpgadiag/opae/diag/fpgadiag.py +++ b/binaries/fpgadiag/opae/diag/fpgadiag.py @@ -1,5 +1,5 @@ #! /usr/bin/env python3 -# Copyright(c) 2017-2020 Intel Corporation +# Copyright(c) 2017-2023 Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -67,10 +67,9 @@ def main(): cmdline[0] = os.path.join(cwd, cmdline[0]) cmdline = cmdline + ['-t', args.target] + leftover - cmdline = ' '.join(cmdline) try: - subprocess.check_call(cmdline, shell=True) + subprocess.run(cmdline, check=True) except CalledProcessError as e: exit(e.returncode) diff --git a/binaries/fpgadiag/opae/diag/fpgastats.py b/binaries/fpgadiag/opae/diag/fpgastats.py index 82fc7c9c8d40..87815e35c311 100755 --- a/binaries/fpgadiag/opae/diag/fpgastats.py +++ b/binaries/fpgadiag/opae/diag/fpgastats.py @@ -1,5 +1,5 @@ #! /usr/bin/env python3 -# Copyright(c) 2018-2019, Intel Corporation +# Copyright(c) 2018-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -372,15 +372,13 @@ def eth_stats(self): print("Ethernet Interface Name:", eth_name[1]) print("------------------------------") try: - cmd = "ethtool {}".format(eth_name[1]) + cmd = ['ethtool', eth_name[1]] print(cmd) - rc = subprocess.call(cmd, shell=True) - cmd = "ethtool -S {}".format(eth_name[1]) + subprocess.run(cmd, check=True) + + cmd = ['ethtool', '-S', eth_name[1]] print(cmd) - rc = subprocess.call(cmd, shell=True) - if rc != 0: - print("failed to '{}'".format(cmd)) - return None + subprocess.run(cmd, check=True) except subprocess.CalledProcessError as e: print('failed call') return None diff --git a/platforms/platmgr/tools/rtl_src_config.py b/platforms/platmgr/tools/rtl_src_config.py index 1a7af4aa1c65..0289946c37c0 100755 --- a/platforms/platmgr/tools/rtl_src_config.py +++ b/platforms/platmgr/tools/rtl_src_config.py @@ -324,18 +324,16 @@ def addDefaultFpgaFamily(opts): if ('OPAE_PLATFORM_FPGA_FAMILY' not in os.environ): try: # Get the FPGA technology tag using afu_platform_info - cmd = 'afu_platform_info --key=fpga-family ' + cmd = ['afu_platform_info', '--key=fpga-family'] # What's the platform name? plat_class_file = os.path.join(getHWLibPath(opts), 'fme-platform-class.txt') with open(plat_class_file) as f: - cmd += f.read().strip() + cmd.append(f.read().strip()) - proc = subprocess.Popen(cmd, shell=True, - stdout=subprocess.PIPE) - for line in proc.stdout: - line = line.decode('ascii').strip() + proc = subprocess.run(cmd, check=True, capture_output=True, encoding='ascii') + for line in proc.stdout.split('\n'): os.environ['OPAE_PLATFORM_FPGA_FAMILY'] = line errcode = proc.wait() if (errcode): @@ -358,11 +356,10 @@ def getQuartusVersion(opts): 'QUARTUS_VERSION_MAJOR' not in os.environ): try: # Get the Quartus major version number - proc = subprocess.Popen('quartus_sh --version', shell=True, - stdout=subprocess.PIPE) + cmd = ['quartus_sh', '--version'] + proc = subprocess.run(cmd, check=True, capture_output=True, encoding='ascii') ok = False - for line in proc.stdout: - line = line.decode('ascii').strip() + for line in proc.stdout.split('\n'): if (line[:7] == 'Version'): ok = True diff --git a/python/pacsign/ReadMe.txt b/python/pacsign/ReadMe.txt index ca3891eff8a5..f76b7b74dc4e 100755 --- a/python/pacsign/ReadMe.txt +++ b/python/pacsign/ReadMe.txt @@ -2,7 +2,11 @@ This is standalone signing tool You need to have Python 3.5/3.6 (tested) to run the script -You can run test.py to fully execute all the available operation +You can run pacsign-tests.sh to fully execute all the available operation + + $ python3 -m virtualenv pacsign-venv + $ source ./pacsign-venv/bin/activate + $ pip3 install ./opae-sdk/python/pacsign + $ ./opae-sdk/python/pacsign/pacsign-tests.sh + $ deactivate - python test.py - \ No newline at end of file diff --git a/python/pacsign/pacsign/common_util.py b/python/pacsign/pacsign/common_util.py index 53d2896e3b9e..4114718e4dc3 100755 --- a/python/pacsign/pacsign/common_util.py +++ b/python/pacsign/pacsign/common_util.py @@ -96,22 +96,20 @@ def exception_handler(etype, value, tb): def run_command(command, printed_cmd=None, return_code=0, allow_error=False): - if printed_cmd is None: printed_cmd = command - p = subprocess.Popen( - command, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf8", - ) - returnmsg = p.communicate()[0] - assert p.returncode == 0 or allow_error, ( - 'Fail to run command "%s", error code %d =>\n%s' - % (printed_cmd, p.returncode, returnmsg) - ) - return (p.returncode, returnmsg) + + if isinstance(command, str): + command = command.split() + + try: + p = subprocess.run(command, check=True, capture_output=True, encoding='ascii') + except subprocess.CalledProcessError: + assert allow_error, ( + 'Fail to run command "%s", error code %d =>\n%s' + % (printed_cmd, p.returncode, p.stderr) + ) + return (p.returncode, p.stdout) def assert_in_error(boolean, string, *arg): diff --git a/python/pacsign/test.py b/python/pacsign/test.py deleted file mode 100755 index 435143809be2..000000000000 --- a/python/pacsign/test.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright(c) 2019, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -import os - -assert os.system("rm -rf test") == 0 -assert os.system("mkdir test") == 0 -# print ("Generate two pairs of private/public key") -assert ( - os.system( - ( - "python -m pacsign --operation=make_private_pem " - + "--curve=secp384r1 --no_passphrase test/pri1.pem -k key_manager" - ) - ) - != 0 -) -assert ( - os.system( - ( - "python -m pacsign --operation=make_private_pem " - + "--curve=secp384r1 --no_passphrase test/pri2.pem -k key_manager" - ) - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_private_pem " - + "--curve=secp256r1 --no_passphrase test/pri3.pem -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_private_pem " - + "--curve=secp384r1 --no_passphrase test/pri4.pem -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_public_pem " - + "test/pri1.pem test/pub1.pem -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_public_pem " - + "test/pri2.pem test/pub2.pem -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_public_pem " - + "test/pri3.pem test/pub3.pem -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_public_pem " - + "test/pri4.pem test/pub4.pem -k key_manager" - ) - == 0 -) -print("Generate root keychain") -assert ( - os.system( - "python -m pacsign --operation=make_root test/pub1.pem " - + "test/root.qky -k key_manager" - ) - == 0 -) -print("Append new key to root keychain to generate new keychain") -assert ( - os.system( - "python -m pacsign --operation=append_key --permission=-1 " + - "--cancel=0 --previous_qky=test/root.qky --previous_pem=test/pri1.pem " + - "test/pub2.pem test/key.qky -k key_manager") == 0) -print( - "Append new key to root keychain to generate new keychain " + - "(negative test)") -assert ( - os.system( - "python -m pacsign --operation=append_key --permission=-1 " + - "--cancel=0 --previous_qky=test/key.qky --previous_pem=test/pri2.pem " + - "test/pub4.pem test/negative.qky -k key_manager") != 0) -assert ( - os.system( - "python -m pacsign --operation=append_key --permission=-1 " + - "--cancel=0 --previous_qky=test/root.qky --previous_pem=test/pri1.pem " + - "test/pub3.pem test/negative.qky -k key_manager") != 0) -print("Insert Block0/Block1 into raw data and sign") -assert ( - os.system( - "python -m pacsign FIM -t update -H openssl_manager --yes " - + "-i hello_mem_afu.gbs -o s_PACSign.py " - + "-r d:/keys/darby/darby_dev_fim_root_public_256.pem " - + "-k d:/keys/darby/darby_dev_fim_csk0_public_256.pem -vv" - ) - == 0 -) -print("************************Insert Block0/Block1 into raw data and sign") -assert ( - os.system( - "python -m pacsign FIM -t update -H openssl_manager -i PACSign.py " - + "-o us_PACSign.py -vv -y" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign FIM -t update -H pkcs11_manager -k csk0 -r root_key " - + "-i PACSign.py -o us_pkcs11_PACSign.py -C PKCS11_config.json -y -vvv" - ) - == 0 -) -print("Insert Block0/Block1 into raw data and sign (negative test)") -assert ( - os.system( - "python -m pacsign --operation=insert_data_and_sign --type=FIM " - + "--qky=test/root.qky --pem=test/pri1.pem data.bin " - + "test/negative.bin -k key_manager -x update" - ) - != 0 -) -print("Insert Block0/Block1 into raw data, the output is unsigned data") -assert ( - os.system( - "python -m pacsign --operation=insert_data --type=BMC_FW data.bin " - + "test/unsigned_data.bin -k key_manager -x update" - ) - == 0 -) -print("Sign the unsigned data") -assert ( - os.system( - "python -m pacsign --operation=sign --qky=test/key.qky " - + "--pem=test/pri2.pem test/unsigned_data.bin test/data2sign.bin " - + "-k key_manager -x update" - ) - == 0 -) -print("Read Root Key Hash") -assert ( - os.system( - "python -m pacsign --operation=root_key_hash test/root.qky " - + "test/root.txt -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=root_key_hash test/key.qky " - + "test/key.txt -k key_manager" - ) - == 0 -) -print("Check file integrity") -assert ( - os.system( - "python -m pacsign --operation=check_integrity test/unsigned_data.bin" - + " > test/unsigned_data.bin.txt -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=check_integrity test/data1sign.bin " - + "> test/data1sign.bin.txt -k key_manager" - ) - == 0 -) -assert ( - os.system( - "python -m pacsign --operation=check_integrity test/data2sign.bin " - + "> test/data2sign.bin.txt -k key_manager" - ) - == 0 -) -print("Make and sign cancellation cert") -assert ( - os.system( - "python -m pacsign --operation=make_and_sign_cancellation_cert " - + "--type=FIM --qky=test/root.qky --pem=test/pri1.pem " - + "--cancel=1 test/cancel.cert -k key_manager" - ) - == 0 -) -print("Make and sign cancellation cert (negative test)") -assert ( - os.system( - "python -m pacsign --operation=make_and_sign_cancellation_cert " - + "--type=FIM --qky=test/root.qky --pem=test/pri1.pem " - + "--cancel=189 test/cancel.cert -k key_manager" - ) - != 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_and_sign_cancellation_cert " - + "--type=FIM --qky=test/key.qky --pem=test/pri2.pem " - + "--cancel=1 test/negative.cert -k key_manager" - ) - != 0 -) -assert ( - os.system( - "python -m pacsign --operation=make_and_sign_cancellation_cert " - + "--type=FIM --qky=test/key.qky --pem=test/pri2.pem " - + "--cancel=1 test/negative.bin -k key_manager" - ) - != 0 -) -print("Check cancellation cert integrity") -assert ( - os.system( - "python -m pacsign --operation=check_integrity test/cancel.cert " - + "> test/cancel.cert.txt -k key_manager" - ) - == 0 -) -assert not os.path.exists("test/negative.qky") -assert not os.path.exists("test/negative.bin") -assert not os.path.exists("test/negative.cert") -print("Misc") -assert os.system("python -m pacsign --help > test/help.txt") == 0 -assert os.system( - "python -m pacsign --help --operation=sign > test/ohelp.txt") == 0 From d63ca2cdd47c225557c4a63cd14eb2a175d39993 Mon Sep 17 00:00:00 2001 From: Tim Whisonant Date: Fri, 17 Nov 2023 15:06:04 -0800 Subject: [PATCH 06/28] (cherry-pick) Fix: correct issue pointed out by static analysis (#3044) (#3046) * Fix: correct issue pointed out by static analysis (#3044) ### Description Static analysis complained that although a destructor had been defined in class vfio_device, there was not a corresponding copy ctor nor assignment operator. The variable is treated as a Singleton in the application, so no copying occurs. Implementing the constructor and assignment operator as deleted solves the issue. ### Collateral (docs, reports, design examples, case IDs): - [ ] Document Update Required? (Specify FIM/AFU/Scripts) ### Tests added: ### Tests run: CI Signed-off-by: Tim Whisonant --- binaries/opae.io/main.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/binaries/opae.io/main.h b/binaries/opae.io/main.h index 45dca05060e4..2ef003320079 100644 --- a/binaries/opae.io/main.h +++ b/binaries/opae.io/main.h @@ -121,6 +121,9 @@ struct vfio_device { close(); } + vfio_device(const vfio_device &) = delete; + vfio_device &operator=(const vfio_device &) = delete; + static vfio_device* open(const char *pci_addr) { opae_vfio *v = new opae_vfio(); @@ -243,6 +246,5 @@ struct vfio_device { opae_vfio *v_; vfio_device(opae_vfio *v) : v_(v){} - vfio_device(const vfio_device &); }; From 7dcaef5c716f3fdf6e259ed5b141a774b44c6dd3 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Tue, 28 Nov 2023 14:24:07 -0800 Subject: [PATCH 07/28] Feature:add loop count command line input to CXL host exerciser (#3051) - add loop count command line input and set read /write config - set FPGA / host read and write cache miss opcode to RD_LINE_I /WR_LINE_I Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 30 ++++++++++++++++--- .../cxl_host_exerciser/cxl_host_exerciser.h | 13 +++++--- samples/cxl_host_exerciser/he_cache_test.h | 4 ++- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index 6be9b9d7aace..e927a4ef4f4d 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -49,10 +49,10 @@ void he_cache_thread(uint8_t *buf_ptr, uint64_t len); class he_cache_cmd : public he_cmd { public: he_cache_cmd() - : he_continuousmode_(false), he_contmodetime_(0), he_linerep_count_(0), + : he_continuousmode_(false), he_contmodetime_(0), he_linerep_count_(1), he_stride_(0), he_test_(0), he_test_all_(false), he_dev_instance_(0), he_stride_cmd_(false), he_cls_count_(FPGA_512CACHE_LINES), - he_latency_iterations_(0) {} + he_latency_iterations_(0), he_loop_count_(1) {} virtual ~he_cache_cmd() {} @@ -114,13 +114,18 @@ class he_cache_cmd : public he_cmd { // Line repeat count app->add_option("--linerepcount", he_linerep_count_, "Line repeat count") ->transform(CLI::Range(1, 256)) - ->default_val("10"); + ->default_val("1"); // Cache lines count app->add_option("--clscount", he_cls_count_, "Cache lines count") ->transform(CLI::Range(1, 512)) ->default_val("512"); + // Iteration loop count + app->add_option("--loops", he_loop_count_, "Number of loops") + ->transform(CLI::Range(1, 65535)) + ->default_val("1"); + // Number of latency test iterations app->add_option("--latency_iterations", he_latency_iterations_, "Number of latency test iterations") @@ -150,6 +155,7 @@ class he_cache_cmd : public he_cmd { cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Numa node:" << numa_node_ << endl; + cout << "loop count:" << he_loop_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -274,6 +280,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga read cache hit test host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + he_rd_cfg_.repeat_read_fsm = he_loop_count_; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // Start test @@ -323,6 +330,7 @@ class he_cache_cmd : public he_cmd { cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size << endl; + cout << "loop count:" << he_loop_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -420,6 +428,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga Write cache hit test + he_wr_cfg_.repeat_write_fsm = he_loop_count_; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); @@ -468,6 +477,7 @@ class he_cache_cmd : public he_cmd { cout << "Read number Lines:" << he_cls_count_ << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "loop count:" << he_loop_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -553,6 +563,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga read cache hit test host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + he_rd_cfg_.repeat_read_fsm = he_loop_count_; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // Start test @@ -599,6 +610,7 @@ class he_cache_cmd : public he_cmd { cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size << endl; + cout << "loop count:" << he_loop_count_ << endl; // set Write config he_wr_cfg_.value = 0; @@ -651,6 +663,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga Write cache hit test + he_wr_cfg_.repeat_write_fsm = he_loop_count_; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); @@ -699,6 +712,7 @@ class he_cache_cmd : public he_cmd { cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size << endl; + cout << "loop count:" << he_loop_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -786,6 +800,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga read cache hit test host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + he_rd_cfg_.repeat_read_fsm = he_loop_count_; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // Start test @@ -839,6 +854,7 @@ class he_cache_cmd : public he_cmd { cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size << endl; + cout << "loop count:" << he_loop_count_ << endl; // set RD_CONFIG he_wr_cfg_.value = 0; @@ -888,6 +904,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga Write cache hit test + he_wr_cfg_.repeat_write_fsm = he_loop_count_; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); @@ -941,6 +958,7 @@ class he_cache_cmd : public he_cmd { cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size << endl; + cout << "loop count:" << he_loop_count_ << endl; // set RD_CONFIG he_rd_cfg_.value = 0; @@ -1020,6 +1038,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga read cache hit test host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + he_rd_cfg_.repeat_read_fsm = he_loop_count_; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // Start test @@ -1066,12 +1085,13 @@ class he_cache_cmd : public he_cmd { cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size << endl; + cout << "loop count:" << he_loop_count_ << endl; // set RD_CONFIG he_wr_cfg_.value = 0; he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; - he_wr_cfg_.opcode = WR_PUSH_I; + he_wr_cfg_.opcode = WR_LINE_I; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); // set RD_ADDR_TABLE_CTR @@ -1111,6 +1131,7 @@ class he_cache_cmd : public he_cmd { } else { // fpga Write cache hit test + he_wr_cfg_.repeat_write_fsm = he_loop_count_; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); @@ -1260,6 +1281,7 @@ class he_cache_cmd : public he_cmd { bool he_stride_cmd_; uint32_t he_cls_count_; uint64_t he_latency_iterations_; + uint32_t he_loop_count_; }; void he_cache_thread(uint8_t *buf_ptr, uint64_t len) { diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index 0ece8c32ba14..1faa8306d5b1 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -169,14 +169,17 @@ union he_wr_config { struct { uint64_t write_traffic_enable : 1; uint64_t continuous_mode_enable : 1; - uint64_t waitfor_completion : 1; + uint64_t barrier : 1; uint64_t preread_sync_enable : 1; uint64_t postread_sync_enable : 1; uint64_t data_pattern : 2; uint64_t cl_evict_enable : 1; uint64_t opcode : 4; uint64_t line_repeat_count : 8; - uint64_t reserved : 44; + uint64_t rsvd_31_20 : 12; + uint64_t repeat_write_fsm : 16; + uint64_t disable_waitfor_completion : 1; + uint64_t rsvd_63_48 : 15; }; }; @@ -222,10 +225,12 @@ union he_rd_config { uint64_t prewrite_sync_enable : 1; uint64_t postwrite_sync_enable : 1; uint64_t data_pattern : 2; - uint64_t cl_evict_enable : 1; + uint64_t data_check_enable : 1; uint64_t opcode : 4; uint64_t line_repeat_count : 8; - uint64_t reserved : 44; + uint64_t rsvd_31_20 : 12; + uint64_t repeat_read_fsm : 16; + uint64_t rsvd_63_40 : 16; }; }; diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index eb853caed20e..e1898fedff3b 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -130,7 +130,7 @@ bool buffer_allocate(void** addr, uint64_t len, uint32_t numa_node) return false; } - if (addr_local == NULL) { + if (addr_local == NULL) { cerr << "Unable to mmap" << endl; return false; } @@ -142,6 +142,8 @@ bool buffer_allocate(void** addr, uint64_t len, uint32_t numa_node) return false; } + memset(addr_local, 0, len); + *addr = addr_local; return true; } From 3af8a083418bda323ed1513c81d1aab946c77ecb Mon Sep 17 00:00:00 2001 From: Tim Whisonant Date: Thu, 30 Nov 2023 13:43:18 -0800 Subject: [PATCH 08/28] Bug: fpgabist uninitialized variable errors (#3053) ### Description Uninitialized variable errors were encountered on RHEL 8.6 with g++ 8.5.0. ### Collateral (docs, reports, design examples, case IDs): - [ ] Document Update Required? (Specify FIM/AFU/Scripts) ### Tests added: ### Tests run: CI Signed-off-by: Tim Whisonant --- binaries/fpgabist/dma/fpga_dma.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/binaries/fpgabist/dma/fpga_dma.cpp b/binaries/fpgabist/dma/fpga_dma.cpp index 40b388a1c57b..f1ee262a6332 100644 --- a/binaries/fpgabist/dma/fpga_dma.cpp +++ b/binaries/fpgabist/dma/fpga_dma.cpp @@ -1,4 +1,4 @@ -// Copyright(c) 2018-2020, Intel Corporation +// Copyright(c) 2018-2023, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: @@ -438,7 +438,7 @@ static void *dispatcherWorker(void* dma_handle) { uint64_t desc_count = 1; msgdma_sw_desc_t *sw_desc[FPGA_DMA_BLOCK_SIZE+1]; msgdma_sw_desc_t *first_sw_desc; - msgdma_hw_descp_t *hw_descp; + msgdma_hw_descp_t *hw_descp = nullptr; bool is_owned_by_hw; uint8_t block_size = 0; uint8_t format; @@ -516,7 +516,7 @@ static void *dispatcherWorker(void* dma_handle) { // Skip invalid descriptors for(k=1; k<= (FPGA_DMA_BLOCK_SIZE-desc_count); k++) { - msgdma_hw_descp_t *unused_hw_descp; + msgdma_hw_descp_t *unused_hw_descp = nullptr; while(dma_h->free_desc.empty()); dma_h->free_desc.try_pop(unused_hw_descp); dump_hw_desc_log(0, unused_hw_descp->hw_desc, disp_log); @@ -544,7 +544,7 @@ static void *completionWorker(void* dma_handle) { FPGA_DMA_ERR("Invalid DMA handle\n"); return NULL; } - msgdma_sw_desc_t *sw_desc; + msgdma_sw_desc_t *sw_desc = nullptr; debug_print("started completion worker\n"); while (1) { @@ -560,7 +560,7 @@ static void *completionWorker(void* dma_handle) { if(sw_desc->last == 1 && (sw_desc->hw_descp->hw_desc_id < (FPGA_DMA_BLOCK_SIZE - 1))) { for(i = (sw_desc->hw_descp->hw_desc_id + 1) ; i < FPGA_DMA_BLOCK_SIZE ; i++) { - msgdma_hw_descp_t *unused_hw_descp; + msgdma_hw_descp_t *unused_hw_descp = nullptr; dma_h->invalid_desc_queue.try_pop(unused_hw_descp); dma_h->free_desc.push(unused_hw_descp); } From 09a0e22947668b36dec4c1b405b9c70d4b13463b Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:33:43 -0800 Subject: [PATCH 09/28] =?UTF-8?q?fix:CXL=20traffic=20generator=20read/writ?= =?UTF-8?q?e=20bandwidth=20and=20input=20arguments=20bou=E2=80=A6=20(#3054?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ix:CXL traffic generator read/write bandwidth and input arguments boundary check fix#1 For the correct value of loop count, the following condition must be satisfied. max(READ,WRITE) * loop_count <= 268,435,456 fix #2 cannot have READs only (without any previous WRITEs) fix #3 READs cannot exceed #WRITEs Fix #4 Read HDM size from CSR. Signed-off-by: anandaravuri * fix code review comments Signed-off-by: anandaravuri --------- Signed-off-by: anandaravuri --- samples/cxl_mem_tg/cxl_mem_tg.h | 22 ++++++++++++++++- samples/cxl_mem_tg/cxl_tg_test.h | 41 ++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/samples/cxl_mem_tg/cxl_mem_tg.h b/samples/cxl_mem_tg/cxl_mem_tg.h index c57bc9349b10..d6fa2ee84730 100644 --- a/samples/cxl_mem_tg/cxl_mem_tg.h +++ b/samples/cxl_mem_tg/cxl_mem_tg.h @@ -36,6 +36,11 @@ namespace cxl_mem_tg { using opae::fpga::types::token; const char *AFU_ID = "4DADEA34-2C78-48CB-A3DC-5B831F5CECBB"; +static const uint64_t CL = 64; +static const uint64_t KB = 1024; +static const uint64_t MB = KB * 1024; +static const uint64_t GB = MB * 1024; +static const uint64_t FPGA_32GB_CACHE_LINES = (32 * GB) / 64; static const uint64_t MEM_TG_TEST_TIMEOUT = 3000000; static const uint64_t TEST_SLEEP_INVL = 100; static const uint64_t TG_CTRL_CLEAR = 0x8000000000000000; @@ -82,6 +87,8 @@ enum { MEM_TG_CLK_COUNT = 0x0050, MEM_TG_WR_COUNT = 0x0058, MEM_TG_CLK_FREQ = 0x0060, + MEM_SIZE = 0x0068, + }; const int MEM_TG_CFG_OFFSET = 0x1000; @@ -179,6 +186,16 @@ union mem_tg1_count { }; }; +// Memory Size +union tg_mem_size { + enum { offset = MEM_SIZE }; + uint64_t value; + struct { + uint64_t total_mem_size : 32; + uint64_t hdm_mem_size : 32; + }; +}; + using test_afu = opae::afu_test::afu; using test_command = opae::afu_test::command; @@ -193,7 +210,9 @@ class cxl_mem_tg : public test_afu { rcnt_(1), bcnt_(1), stride_(1), - mem_speed_(TG_FREQ) { + mem_speed_(TG_FREQ), + hdm_size_(FPGA_32GB_CACHE_LINES) + { // iterations app_.add_option("--count", count_, "Number of iterations to run") ->default_val("1"); @@ -259,6 +278,7 @@ class cxl_mem_tg : public test_afu { uint64_t mem_speed_; uint32_t status_; uint64_t tg_offset_; + uint64_t hdm_size_; std::map limits_; diff --git a/samples/cxl_mem_tg/cxl_tg_test.h b/samples/cxl_mem_tg/cxl_tg_test.h index 382186bb9467..c5d060afd38e 100644 --- a/samples/cxl_mem_tg/cxl_tg_test.h +++ b/samples/cxl_mem_tg/cxl_tg_test.h @@ -43,6 +43,8 @@ using opae::fpga::types::token; #define CXL_TG_BW_FACTOR 0.931323 +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + /* 1) Write to TG_CLEAR with data=0xF to clear all the failure status registers. 2) Configure the registers with the value specified in table 1 below. @@ -96,6 +98,8 @@ class cxl_tg_test : public test_command { tg_exe_->logger_->debug("GUIDH:0x{:x}", tg_exe_->read64(AFU_ID_H)); tg_exe_->logger_->debug("TG Contol:0x{:x}", tg_exe_->read64(MEM_TG_CTRL)); tg_exe_->logger_->debug("TG Status:0x{:x}", tg_exe_->read64(MEM_TG_STAT)); + tg_exe_->logger_->debug("Memory Size:0x{:x}", + tg_exe_->read64(MEM_SIZE)); tg_exe_->logger_->debug("TG Total clock count:0x{:x}", tg_exe_->read64(MEM_TG_CLK_COUNT)); tg_exe_->logger_->debug("TG Write Clock Count:0x{:x}", @@ -271,17 +275,50 @@ class cxl_tg_test : public test_command { // Configure the registers with the value int config_input_options() { - mem_tg_ctl tg_ctl; tg_ctl.value = tg_exe_->read64(MEM_TG_CTRL); tg_exe_->logger_->debug("tg configure input options..."); tg_exe_->logger_->debug("mem tg ctl:{0:x}", tg_ctl.value); + tg_mem_size mem_size; + mem_size.value = tg_exe_->read64(MEM_SIZE); + + uint64_t value = mem_size.total_mem_size; + tg_exe_->logger_->debug("Total hardware memory size:{}", value); + value = mem_size.hdm_mem_size; + tg_exe_->logger_->debug("HDM memory size:{0:d}", value); + + if (mem_size.hdm_mem_size != 0) + tg_exe_->hdm_size_ = mem_size.hdm_mem_size; + + cout << "HDM memory cache line size:" << dec << tg_exe_->hdm_size_ << endl; + if (tg_ctl.tg_capability != 0x1) { - std::cerr << "No traffic generator for mem" << std::endl; + cerr << "No traffic generator for memory" << endl; return -1; } + if (tg_exe_->wcnt_ == 0 && tg_exe_->rcnt_ == 0) { + cerr << "Invalid Read and Write input arguments" << endl; + return -1; + } + + if (tg_exe_->rcnt_ > tg_exe_->wcnt_) { + cerr << "Read count exceeds Write count" << endl; + return -1; + } + + if ( tg_exe_->wcnt_ == 0) { + cerr << " Write count is zero" << endl; + return -1; + } + + if ( (MAX(tg_exe_->wcnt_, tg_exe_->rcnt_) * tg_exe_->loop_) >= + tg_exe_->hdm_size_) { + cerr << "Read,Write and loop count exceeds HDM memory size" << endl; + return -1; + } + tg_exe_->mem_speed_ = tg_exe_->read64(MEM_TG_CLK_FREQ); std::cout << "Memory clock frequency (kHz) : " << tg_exe_->mem_speed_ << std::endl; if (0 == tg_exe_->mem_speed_) { From 068f85217a499a5ef0bacedabb789f8e085cfe8f Mon Sep 17 00:00:00 2001 From: anandhv Date: Mon, 4 Dec 2023 07:38:23 -0500 Subject: [PATCH 10/28] [Bug] Disallow factory image update if boot page is also factory (#3049) * disallow factory image update if boot page is also factory * Flipped to log.error * comments * cleanup and comments * remove dependency on pacsign. reproduce factory constant locally --- python/opae.admin/opae/admin/fpga.py | 6 ++++ .../opae/admin/tools/fpgasupdate.py | 32 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/python/opae.admin/opae/admin/fpga.py b/python/opae.admin/opae/admin/fpga.py index 4b064d0c0b91..36d5c0537835 100644 --- a/python/opae.admin/opae/admin/fpga.py +++ b/python/opae.admin/opae/admin/fpga.py @@ -230,6 +230,12 @@ def pmci_bus(self): return pmci return self.find_one('*-sec*.*.auto') + @property + def boot_page(self): + pmci = self.pmci_bus + if pmci: + return pmci.find_one('**/fpga_boot_image') + @property def altr_asmip(self): return self.find_one('altr-asmip*.*.auto') diff --git a/python/opae.admin/opae/admin/tools/fpgasupdate.py b/python/opae.admin/opae/admin/tools/fpgasupdate.py index 1b0f44295017..8f10b17560e9 100755 --- a/python/opae.admin/opae/admin/tools/fpgasupdate.py +++ b/python/opae.admin/opae/admin/tools/fpgasupdate.py @@ -52,6 +52,7 @@ from opae.admin.fpga import fpga from opae.admin.utils.progress import progress from opae.admin.version import pretty_version +from opae.admin.sysfs import sysfs_device, sysfs_node if sys.version_info[0] == 2: input = raw_input # noqa pylint: disable=E0602 @@ -99,6 +100,9 @@ def uuid_from_bytes(blob): FPGA_ERR_WEAROUT = 7 FPGA_ERR_MAX = 8 +# Values for ConType field in Block0 of output binary from PACSign utility. +CONTENT_FACTORY = 3 + # bytes/sec when staging is flash FLASH_COPY_BPS = 43000.0 # bytes/sec when staging area is dram @@ -811,6 +815,34 @@ def main(): pac.upload_dev.find_one('loading')): use_ioctl = False + # The BMC disallows updating the factory image if the current boot-page is also 'factory'. + # The idea is to always have at least one known-good image in the flash so that + # you can recover from subsequent bad images. + # The BMC checks for this condition but does not, at the moment, report any useful error details. + # We simply get a generic error back and can't report any detail to the user. + # So we explicitly check for this condition and disallow it here with a descriptive message. + + # The bootpage is read from the fpga_boot_image sysfs entry. The 'fme' object has many sysfs_nodes + # for various items, including the boot_page, so we use that here. It simply returns a string + # indicating the boot page: fpga_factory, fpga_user1, or fpga_user2 + + boot_page = pac.fme.boot_page + if boot_page is None: + LOG.error('Secure update failed. Could not find **/fpga_boot_image sysfs entry.') + sys.exit(1) + + LOG.debug ("Boot page sysfs path: %s\n", boot_page.sysfs_path) + LOG.debug ("Boot page value: %s\n", boot_page.value) + LOG.debug ('Block0 ConType: %s\n', blk0['ConType']) + + # The binary is produced by the PACSign utility. + # CONTENT_FACTORY is the enum that PACSign inserts into the block0 region of + # the binary to indicate that the factory image is targeted. ConType refers to 'content type' + # and indicates if the binary is factoryPR, static region, BMC-related etc. + if ((boot_page.value == 'fpga_factory') and (blk0['ConType'] == CONTENT_FACTORY)): + LOG.error('Secure update failed. Cannot update factory image when current boot-page is also factory.') + sys.exit(1) + LOG.warning('Update starting. Please do not interrupt.') start = datetime.now() From 4973e051baba2915620073a34850c288a7aad1ed Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Thu, 7 Dec 2023 09:25:17 -0800 Subject: [PATCH 11/28] feature: add running pointer and ping-pong tests to CXL host exerciser (#3056) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feature: add running pointer and ping-pong tests to CXL host exerciser -Running pointer test: Create linked list in Host or fpga memory, write linked list head pointer to AFU read address table, AFU test execution walks linked list, updates buffer’s data to 1’s complements - Ping pong-pong test: Allocate Host or fpga memory, write ping pong buffer physical address to AFU read address table, AFU test execution increment buffer value to odd number , SW updates to event number until maximum count. Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 307 +++++++++++++++++- samples/cxl_host_exerciser/cxl_he_cmd.h | 270 ++++++++++++++- .../cxl_host_exerciser/cxl_host_exerciser.h | 47 ++- samples/cxl_host_exerciser/he_cache_test.h | 71 +++- 4 files changed, 683 insertions(+), 12 deletions(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index e927a4ef4f4d..7deacd09c06d 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -25,6 +25,7 @@ // POSSIBILITY OF SUCH DAMAGE. #pragma once +#include #include "cxl_he_cmd.h" #include "cxl_host_exerciser.h" #include "he_cache_test.h" @@ -229,6 +230,8 @@ class he_cache_cmd : public he_cmd { rd_table_ctl_.stride = he_stride_; } + host_exe_->reset_dsm(); + // Continuous mode if (he_continuousmode_) { he_rd_cfg_.continuous_mode_enable = 0x1; @@ -410,6 +413,8 @@ class he_cache_cmd : public he_cmd { wr_table_ctl_.stride = he_stride_; } + host_exe_->reset_dsm(); + // continuous mode if (he_continuousmode_) { he_rd_cfg_.continuous_mode_enable = 0x1; @@ -1160,7 +1165,6 @@ class he_cache_cmd : public he_cmd { return 0; } - void he_forcetestcmpl() { // Force stop test @@ -1176,6 +1180,271 @@ class he_cache_cmd : public he_cmd { usleep(1000); } + int he_run_running_pointer_test() { + + cout << "********** Running pointer test start**********" << endl; + + uint64_t *host_virt_ptr = NULL; + uint64_t *fpga_virt_ptr = NULL; + uint64_t host_phy_ptr = 0; + uint64_t fpga_phy_ptr = 0; + uint64_t data = RUNNIG_PTR_DATA_PATTERN; + uint32_t node_count = 0; + uint64_t phy_ptr = 0; + int retval = 0; + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + // Allocate running pointer buffers on HOST + if (he_target_ == HE_TARGET_HOST ) { + + host_exe_->logger_->debug("Running pointer test target host"); + // Allocate Pinned HOST buffer + if (!host_exe_->allocate_pinned_buffer(&host_virt_ptr,BUFFER_SIZE_2MB, numa_node_)) { + cerr << "Allocate pinned buffer failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + //virtual address to physical address + host_phy_ptr = __mem_virt2phys(host_virt_ptr); + cout << "Host physical address:" << std::hex << host_phy_ptr << endl; + + node_count = FPGA_2MB_CACHE_LINES - 10; + cout << " linked list Node count:" << std::dec << node_count << endl; + + // create linked list + if (!create_linked_list(host_virt_ptr, host_phy_ptr, data, + node_count,HOSTMEM_BIAS)) { + cerr << "Failed to create linked list" << endl; + host_exe_->free_pinned_buffer(host_virt_ptr); + host_exe_->free_dsm(); + return -1; + } + + phy_ptr = host_phy_ptr; + + } else if (he_target_ == HE_TARGET_FPGA) { + + // Allocate running pointer buffers on FPGA + host_exe_->logger_->debug("Running pointer test target fpga"); + if (!host_exe_->allocate_pinned_buffer(&fpga_virt_ptr, BUFFER_SIZE_2MB, 2)) { + cerr << "Allocate pinned buffer failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + //virtual address to physical address + fpga_phy_ptr = __mem_virt2phys(fpga_virt_ptr); + cout << "FPGA physical address:" << std::hex << fpga_phy_ptr << endl; + + node_count = FPGA_2MB_CACHE_LINES - 10; + cout << " linked list Node count:" << std::dec << node_count << endl; + + // create linked list + if (!create_linked_list(fpga_virt_ptr, fpga_phy_ptr, data, + node_count, FPGAMEM_HOST_BIAS)) { + cerr << "Failed to create linked list" << endl; + host_exe_->free_dsm(); + host_exe_->free_pinned_buffer(fpga_virt_ptr); + return -1; + } + he_ctl_.bias_support = FPGAMEM_HOST_BIAS; + phy_ptr = fpga_phy_ptr; + + } else { + + int numa_node = numa_node_of_cpu(sched_getcpu()); + + // Allocate running pointer buffers on host and FPGA + host_exe_->logger_->debug("Running pointer test target Host and FPGA"); + // Allocate Pinned Host buffer + if (!host_exe_->allocate_pinned_buffer(&host_virt_ptr, BUFFER_SIZE_2MB, numa_node)) { + cerr << "Allocate pinned buffer failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + //virtual address to physical address + host_phy_ptr = __mem_virt2phys(host_virt_ptr); + cout << "Host physical address:" << std::hex << host_phy_ptr << endl; + + // Allocate Pinned FPGA buffer + if (!host_exe_->allocate_pinned_buffer(&fpga_virt_ptr, BUFFER_SIZE_2MB, 2)) { + cerr << "allocate pinned buffer failed" << endl; + host_exe_->free_dsm(); + host_exe_->free_pinned_buffer(host_virt_ptr); + return -1; + } + + //virtual address to physical address + fpga_phy_ptr = __mem_virt2phys(fpga_virt_ptr); + cout << "FPGA physical address:" << std::hex << host_phy_ptr << endl; + + node_count = 2*(FPGA_2MB_CACHE_LINES - 10); + cout << " linked list Node count:" << std::dec << node_count << endl; + + // create linked list + if (!create_linked_list(host_virt_ptr, host_phy_ptr, data, + node_count, HOSTMEM_BIAS, fpga_virt_ptr, + fpga_phy_ptr, FPGAMEM_HOST_BIAS)) { + cerr << "Failed to create linked list" << endl; + host_exe_->free_dsm(); + host_exe_->free_pinned_buffer(fpga_virt_ptr); + host_exe_->free_pinned_buffer(host_virt_ptr); + return -1; + } + + he_ctl_.bias_support = HOSTMEM_BIAS; + phy_ptr = host_phy_ptr; + + } // end + + + // Write linked list count + he_rd_num_lines_.value = host_exe_->read64(HE_RD_NUM_LINES); + he_rd_num_lines_.max_count = node_count; + host_exe_->write64(HE_RD_NUM_LINES, he_rd_num_lines_.value); + + cout << "Linked list cout:" << std::dec << he_rd_num_lines_.max_count << endl; + cout << "physical address:" << std::hex << phy_ptr << endl; + + // Write PHY addrees to Read table CSR + host_exe_->write64(HE_RD_ADDR_TABLE_DATA, phy_ptr); + + // start test + he_start_test(HE_PING_PONG,RUNNING_POINTER); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_dsm(); + host_exe_->free_pinned_buffer(fpga_virt_ptr); + host_exe_->free_pinned_buffer(host_virt_ptr); + return -1; + } + + // verify linked list data + if (he_target_ == HE_TARGET_HOST) { + + if (!verify_linked_list(host_virt_ptr, host_phy_ptr, data, + node_count)) { + cerr << "Failed to verify linked list" << endl; + retval = -1; + } + } else if (he_target_ == HE_TARGET_FPGA) { + + if (!verify_linked_list(fpga_virt_ptr, fpga_phy_ptr, data, + node_count)) { + cerr << "Failed to verify linked list" << endl; + retval = -1; + } + } else { + if (!verify_linked_list(host_virt_ptr, host_phy_ptr, data, + node_count)) { + cerr << "Failed to verify linked list" << endl; + retval = -1; + } + } // end + + he_perf_counters(); + host_exe_->free_dsm(); + host_exe_->free_pinned_buffer(fpga_virt_ptr); + host_exe_->free_pinned_buffer(host_virt_ptr); + + + cout << "********** Ran Running pointer test successfully" + " **********" << endl; + cout << "********** Running pointer test start end**********" << endl; + return retval; + } + + int he_run_ping_pong_test() { + + cout << "********** Ping pong test start**********" << endl; + + uint32_t timeout = HE_CACHE_TEST_TIMEOUT; + volatile uint64_t* virt_ptr = NULL; + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache write failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + he_rd_num_lines_.value = host_exe_->read64(HE_RD_NUM_LINES); + he_rd_num_lines_.max_count = INT_MAX/10000; + host_exe_->write64(HE_RD_NUM_LINES, he_rd_num_lines_.value); + + cout << "HE_RD_NUM_LINES:" << std::hex << he_rd_num_lines_.value << endl; + cout << "Max ping poing count:" << std::dec << he_rd_num_lines_.max_count << endl; + + // get ping pong test buffer pointer set value 0 + virt_ptr = (uint64_t*)host_exe_->get_read(); + *virt_ptr = 0x0; + + // Start ping pong test + he_start_test(HE_PING_PONG, PING_PONG); + + while (true) { + + if (*virt_ptr >= he_rd_num_lines_.max_count) { + cout << "ping pong test completed successfully" << endl; + host_exe_->logger_->debug("reached ping pong maximum value:{} ", *virt_ptr); + break; + } + + // AFU increments numbers to odd numbers + if ((*virt_ptr) % 2 == 1) { + + *virt_ptr = *virt_ptr + 1; + timeout = HE_CACHE_TEST_TIMEOUT; + } else { + + usleep(HE_CACHE_TEST_SLEEP_INVL); + if (--timeout == 0) { + he_forcetestcmpl(); + cerr << "HE cache ping pong test time out error" << endl; + host_exerciser_errors(); + he_perf_counters(); + host_exe_->free_dsm(); + host_exe_->free_cache_read(); + return -1; + } + } + }; // end of while + + // wait for completion + if (!he_wait_test_completion()) { + he_forcetestcmpl(); + cerr << "HE Cache ping pong test time out error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_dsm(); + host_exe_->free_cache_read(); + return -1; + } + + he_perf_counters(); + host_exe_->free_dsm(); + host_exe_->free_cache_read(); + + cout << "********** Ping pong test end**********" << endl; + return 0; + } + bool he_continuousmode() { @@ -1196,12 +1465,38 @@ class he_cache_cmd : public he_cmd { return true; } + + bool verify_input_options() { + + if ( ( (he_test_ == HE_FPGA_RD_CACHE_HIT) || + (he_test_ == HE_FPGA_WR_CACHE_HIT) || + (he_test_ == HE_FPGA_RD_CACHE_MISS) || + (he_test_ == HE_FPGA_WR_CACHE_MISS) || + (he_test_ == HE_HOST_RD_CACHE_HIT) || + (he_test_ == HE_HOST_WR_CACHE_HIT) || + (he_test_ == HE_HOST_RD_CACHE_MISS) || + (he_test_ == HE_HOST_WR_CACHE_MISS) || + (he_test_ == HE_CACHE_PING_PONG)) && + he_target_ == HE_TARGET_BOTH) { + + cerr << "Wrong input configuration FPGA/Host Read/Write cache\ + hit/miss and target memory both fpga and host" << endl; + return false; + } + + return true; + } + virtual int run(test_afu *afu, CLI::App *app) { (void)app; int ret = 0; host_exe_ = dynamic_cast(afu); + if (!verify_input_options()) { + return -1; + } + if (!verify_numa_node()) { numa_node_ = 0; cout << "numa nodes are available set numa node to 0" << endl; @@ -1267,6 +1562,16 @@ class he_cache_cmd : public he_cmd { return ret; } + if (he_test_ == HE_CACHE_RUNNING_POINTER) { + ret = he_run_running_pointer_test(); + return ret; + } + + if (he_test_ == HE_CACHE_PING_PONG) { + ret = he_run_ping_pong_test(); + return ret; + } + return 0; } diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index 20bbdab37241..334e5cdfc24e 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -34,6 +34,10 @@ #define HE_TEST_STARTED "Test started ......" #define HE_PRTEST_SCENARIO "Pretest scenario started ......" +#define HE_PING_PONG "Ping pong test started ......" +#define HE_RUNNING_POINTER "Running pointer test started ......" + +#define PFN_MASK_SIZE 8 namespace host_exerciser { @@ -48,6 +52,7 @@ class he_cmd : public test_command { he_wr_cfg_.value = 0; rd_table_ctl_.value = 0; wr_table_ctl_.value = 0; + he_rd_num_lines_.value = 0; } virtual ~he_cmd() {} @@ -69,7 +74,7 @@ class he_cmd : public test_command { cout << "test completed :" << dsm_status->test_completed << endl; cout << "dsm number:" << dsm_status->dsm_number << endl; cout << "error vector:" << dsm_status->err_vector << endl; - cout << "num ticks:" << dsm_status->num_ticks << endl; + cout << "num ticks:" << std::dec << dsm_status->num_ticks << endl; cout << "num reads:" << dsm_status->num_reads << endl; cout << "num writes:" << dsm_status->num_writes << endl; cout << "penalty start:" << dsm_status->penalty_start << endl; @@ -227,11 +232,11 @@ class he_cmd : public test_command { bool he_wait_test_completion() { /* Wait for test completion */ - uint32_t timeout = HELPBK_TEST_TIMEOUT; + uint32_t timeout = HE_CACHE_TEST_TIMEOUT; volatile uint8_t *status_ptr = host_exe_->get_dsm(); while (0 == ((*status_ptr) & 0x1)) { - usleep(HELPBK_TEST_SLEEP_INVL); + usleep(HE_CACHE_TEST_SLEEP_INVL); if (--timeout == 0) { cout << "HE Cache time out error" << endl; return false; @@ -265,14 +270,21 @@ class he_cmd : public test_command { } - void he_start_test(const char* str = HE_TEST_STARTED) { + void he_start_test(const char* str = HE_TEST_STARTED, uint8_t test_type = RD_WR_TEST) { // start test + he_ctl_.test_type = test_type; he_ctl_.Start = 0; host_exe_->write64(HE_CTL, he_ctl_.value); he_ctl_.Start = 1; host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + cout << str << endl; + + host_exe_->logger_->debug("Test type :0x{:x} ", test_type); + host_exe_->logger_->debug("HE_CTL:0x{:x}", host_exe_->read64(HE_CTL)); } bool verify_numa_node() { @@ -339,6 +351,255 @@ class he_cmd : public test_command { return 0; } + int get_mtime(const char* file_name, struct timespec* mtime) + { + struct stat s; + + if (!lstat(file_name, &s)) { + if (S_ISLNK(s.st_mode)) { + mtime->tv_sec = mtime->tv_nsec = 0; + return -1; + } + mtime->tv_sec = s.st_mtime; + mtime->tv_nsec = s.st_mtim.tv_nsec; + return 0; + } + else { + mtime->tv_sec = mtime->tv_nsec = 0; + return errno; + } + } + + int file_open(const char* file_name, int mode) + { + struct stat before, after; + int fd; + int ret; + + ret = lstat(file_name, &before); + if (ret == 0) { + if (S_ISLNK(before.st_mode)) + return -1; + } + + fd = open(file_name, mode, 0x666); + if (fd < 0) + return -1; + + if (ret == 0) { + if (fstat(fd, &after) == 0) { + if (before.st_ino != after.st_ino) { + close(fd); + return -1; + } + } + else { + close(fd); + return -1; + } + } + return fd; + } + + uint64_t __mem_virt2phys(const void* virtaddr) + { + int page_size = getpagesize(); + struct timespec mtime1, mtime2; + const char* fname = "/proc/self/pagemap"; + unsigned long pfn; + uint64_t page; + off_t offset; + int fd, retval; + + retval = get_mtime(fname, &mtime1); + if (retval) { + cerr << "stat failed\n"; + return -1; + } + + fd = file_open("/proc/self/pagemap", O_RDONLY | O_EXCL); + if (fd < 0) + return -1; + + pfn = (unsigned long)virtaddr / page_size; + offset = pfn * sizeof(uint64_t); + if (lseek(fd, offset, SEEK_SET) == (off_t)-1) { + cerr << "seek error\n"; + close(fd); + return -1; + } + + retval = get_mtime(fname, &mtime2); + if (retval) { + cerr << "stat failed\n"; + close(fd); + return -1; + } + + if (mtime1.tv_sec != mtime2.tv_sec + || mtime1.tv_nsec != mtime2.tv_nsec) { + cerr << "file got modified after open \n"; + close(fd); + return -1; + } + + retval = read(fd, &page, PFN_MASK_SIZE); + close(fd); + if (retval != PFN_MASK_SIZE) + return -1; + if ((page & 0x7fffffffffffffULL) == 0) + return -1; + + return ((page & 0x7fffffffffffffULL) * page_size) + + ((unsigned long)virtaddr & (page_size - 1)); + } + + bool create_linked_list(uint64_t *virt_ptr_a, uint64_t phy_ptr_a, + uint64_t data, uint64_t max_size, he_bias_support bias_a = HOSTMEM_BIAS, + uint64_t *virt_ptr_b = NULL, uint64_t phy_ptr_b = 0, + he_bias_support bias_b = FPGAMEM_HOST_BIAS) { + + uint64_t *temp_virt_ptr_a = virt_ptr_a; + uint64_t temp_phy_ptr_a = phy_ptr_a; + uint64_t *temp_virt_ptr_b = virt_ptr_b; + uint64_t temp_phy_ptr_b = phy_ptr_b; + uint64_t i = 0; + struct he_cache_running_ptr *temp_node = NULL; + + host_exe_->logger_->debug("virt_ptr_a:{:p}", fmt::ptr(virt_ptr_a)); + host_exe_->logger_->debug("phy_ptr_a:0x{:x}", phy_ptr_a); + host_exe_->logger_->debug("virt_ptr_b:{:p}", fmt::ptr(virt_ptr_b)); + host_exe_->logger_->debug("phy_ptr_b:0x{:x}", phy_ptr_b); + host_exe_->logger_->debug("max_size:{0}", max_size); + host_exe_->logger_->debug("data:{:x}", data); + + if (virt_ptr_a == NULL || phy_ptr_a == 0) { + cerr << "Invalid input arguments" << endl; + return false; + } + + // Linked list on host or fpga memory + if (virt_ptr_a != NULL && phy_ptr_a != 0 && + virt_ptr_b == NULL && phy_ptr_b == 0) { + + temp_node = (struct he_cache_running_ptr*)(temp_virt_ptr_a); + + for (i = 0; i < max_size; ++i) { + temp_node->phy_next_ptr = temp_phy_ptr_a + 64; + temp_node->data = data; + temp_node->virt_next_ptr = (struct he_cache_running_ptr*)(temp_virt_ptr_a + 8); + temp_node->biasmode = bias_a; + + temp_node++; + temp_phy_ptr_a = temp_phy_ptr_a + 64; + temp_virt_ptr_a = temp_virt_ptr_a + 8; + } + + temp_node->phy_next_ptr = 0; + temp_node->virt_next_ptr = NULL; + temp_node->data = 0; + } + + // Linked list on host and fpga memory + if (virt_ptr_a != NULL && phy_ptr_a != 0 && + virt_ptr_b != NULL && phy_ptr_b != 0) { + + struct he_cache_running_ptr* temp_node_a = + (struct he_cache_running_ptr*)(temp_virt_ptr_a); + + struct he_cache_running_ptr* temp_node_b = + (struct he_cache_running_ptr*)(temp_virt_ptr_b); + + int which = 0; + for (i = 0; i < max_size; ++i) { + + if (which == 0) { + temp_node_a->phy_next_ptr = temp_phy_ptr_b; + temp_node_a->data = data; + temp_node_a->biasmode = bias_b; + temp_node_a->virt_next_ptr = temp_node_b; + ++temp_node_a; + temp_phy_ptr_a += 64; + + } else { + temp_node_b->phy_next_ptr = temp_phy_ptr_a; + temp_node_b->data = data; + temp_node_b->biasmode = bias_a; + temp_node_b->virt_next_ptr = temp_node_a; + ++temp_node_b; + temp_phy_ptr_b += 64; + } + + which = 1 - which; + } + + temp_node_a->phy_next_ptr = 0; + temp_node_a->virt_next_ptr = nullptr; + + temp_node_b->phy_next_ptr = 0; + temp_node_b->virt_next_ptr = nullptr; + } + + return true; + } + + bool verify_linked_list(uint64_t *virt_ptr, uint64_t phy_ptr, + uint64_t data, uint64_t max_size) { + + bool retval = true; + struct he_cache_running_ptr *temp = NULL; + uint64_t i = 0; + + host_exe_->logger_->debug("virt_ptr:{:p}", fmt::ptr(virt_ptr)); + host_exe_->logger_->debug("phy_ptr:0x{:x}", phy_ptr); + host_exe_->logger_->debug("max_size:{0}", max_size); + host_exe_->logger_->debug("data:{:x}", data); + + temp = (struct he_cache_running_ptr*)(virt_ptr); + for (i = 0; i < max_size; i++) { + + if (temp == NULL) { + retval = false; + break; + } + // 1's complement of data + if (temp->data != ~data) { + cerr << "Failed to convert data to 1's complement at index:" + << i << endl; + retval = false; + break; + } + temp = temp->virt_next_ptr; + } + return retval; + } + + bool print_linked_list(uint64_t *virt_ptr, uint64_t phy_ptr, + uint64_t data, uint64_t max_size) { + + bool retval = true; + volatile struct he_cache_running_ptr *temp = NULL; + uint64_t i = 0; + + host_exe_->logger_->debug("virt_ptr:{:p}", fmt::ptr(virt_ptr)); + host_exe_->logger_->debug("phy_ptr:0x{:x}", phy_ptr); + host_exe_->logger_->debug("max_size:{0}", max_size); + host_exe_->logger_->debug("data:{:x}", data); + + temp = (struct he_cache_running_ptr*)(virt_ptr); + for (i = 0; i < max_size; i++) { + + if (temp == NULL) { + retval = false; + break; + } + cout << "data:" << std::hex << temp->data << endl; + temp = temp->virt_next_ptr; + cout << "temp->virt_next_ptr:" << temp->virt_next_ptr << endl; + } + return retval; + } + protected: host_exerciser *host_exe_; uint32_t he_clock_mhz_; @@ -352,5 +613,6 @@ class he_cmd : public test_command { he_wr_config he_wr_cfg_; he_rd_addr_table_ctrl rd_table_ctl_; he_wr_addr_table_ctrl wr_table_ctl_; + he_rd_num_lines he_rd_num_lines_; }; } // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index 1faa8306d5b1..303e785b6605 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -32,10 +32,12 @@ #define MEM_TG_FEATURE_GUIDH 0x0118e06b1fa349b9 const char *HE_CACHE_AFU_ID = "0118E06B-1FA3-49B9-8159-9b5C2EBD4b23"; +#define RUNNIG_PTR_DATA_PATTERN 0x123456 + namespace host_exerciser { -static const uint64_t HELPBK_TEST_TIMEOUT = 30000; -static const uint64_t HELPBK_TEST_SLEEP_INVL = 100; +static const uint64_t HE_CACHE_TEST_TIMEOUT = 30000; +static const uint64_t HE_CACHE_TEST_SLEEP_INVL = 100; static const uint64_t CL = 64; static const uint64_t KB = 1024; static const uint64_t MB = KB * 1024; @@ -96,6 +98,14 @@ typedef enum { WR_FLUSH_CL_DCOH = 0x6, } he_wr_opcode; + +// Write Traffic Opcode +typedef enum { + RD_WR_TEST = 0x0, + RUNNING_POINTER = 0x10, + PING_PONG = 0x20, + } he_test_type; + // DFH Header union he_dfh { enum { offset = HE_DFH }; @@ -128,7 +138,9 @@ union he_ctl { uint64_t Start : 1; uint64_t ForcedTestCmpl : 1; uint64_t bias_support : 2; - uint64_t Reserved : 59; + uint64_t Reserved : 3; + uint64_t test_type : 8; + uint64_t Reserved1 :48; }; }; @@ -210,7 +222,8 @@ union he_rd_num_lines { uint64_t value; struct { uint64_t read_num_lines : 16; - uint64_t reserved : 48; + uint64_t reserved : 16; + uint64_t max_count : 32; }; }; @@ -297,12 +310,17 @@ typedef enum { HE_HOST_RD_CACHE_MISS = 0x6, HE_HOST_WR_CACHE_MISS = 0x7, + HE_CACHE_PING_PONG = 0x8, + HE_CACHE_RUNNING_POINTER= 0x9, + + } he_test_mode; // configures traget typedef enum { HE_TARGET_HOST = 0x0, HE_TARGET_FPGA = 0x1, + HE_TARGET_BOTH = 0x2, } he_target; @@ -323,6 +341,8 @@ const std::map he_test_modes = { {"hostwrcachehit", HE_HOST_WR_CACHE_HIT}, {"hostrdcachemiss", HE_HOST_RD_CACHE_MISS}, {"hostwrcachemiss", HE_HOST_WR_CACHE_MISS}, + {"pingpong", HE_CACHE_PING_PONG}, + {"runningpointer", HE_CACHE_RUNNING_POINTER}, }; // Bias Support @@ -331,11 +351,12 @@ typedef enum { HOST_BIAS_NA = 0x1, FPGAMEM_HOST_BIAS = 0x2, FPGAMEM_DEVICE_BIAS = 0x3, -} he_bisa_support; +} he_bias_support; const std::map he_targets = { {"host", HE_TARGET_HOST}, {"fpga", HE_TARGET_FPGA}, + {"both", HE_TARGET_BOTH}, }; // Bias support @@ -392,6 +413,22 @@ std::map addrtable_size = { }; +// HE Cache Running pointer +struct he_cache_running_ptr { + uint64_t phy_next_ptr; + uint64_t data; + he_cache_running_ptr *virt_next_ptr; + uint64_t rsvd[4]; + union { + uint64_t mode; + struct { + uint64_t rsvd_0_62 : 62; + uint64_t biasmode :2; + }; + }; +}; + + using test_afu = opae::afu_test::afu; using test_command = opae::afu_test::command; diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index e1898fedff3b..0f1516d7f865 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -599,8 +599,9 @@ class afu { return true; } - void reset_dsm() { - memset(dsm_buffer_, 0, dsm_buf_len_); + void reset_dsm() { + if(dsm_buffer_) + memset(dsm_buffer_, 0, dsm_buf_len_); } bool allocate_cache_read(size_t len = MiB(2), uint32_t numa_node = 0) { @@ -821,6 +822,72 @@ class afu { return true; } + + bool allocate_pinned_buffer(uint64_t **buf_ptr, size_t len = MiB(2), uint32_t numa_node = 0 ) { + + int res = 0; + void* ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Failed to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } + cout << "Pinned buffer numa node: " << numa_node << endl; + + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.csr_array[0] = 0; + + logger_->debug("Allocate pinned buffer user addr 0x:{0:x} length :" + "{1:d}", dma_map.user_addr, dma_map.length); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + *buf_ptr = (uint64_t*)ptr; + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_pinned_buffer(uint64_t *buf_ptr, size_t len = MiB(2)) { + + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + if (buf_ptr == NULL) + return false; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)buf_ptr; + dma_unmap.length = len; + dma_unmap.csr_array[0] = 0; + + logger_->debug("free pinned user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + buffer_release(buf_ptr, len); + return true; + } + + uint8_t *get_dsm() const { return dsm_buffer_; } uint8_t *get_read() const { return rd_buffer_; } From fefb2309c8b6cf8f4ff03968e8e4926f630997eb Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Fri, 8 Dec 2023 10:49:29 -0800 Subject: [PATCH 12/28] fix: set FPGA buffer read only for bias mode device bias. (#3057) - Set Host and FPGA buffer map Writable for Host BIAS mode targeting Host address and Host BIAS mode targeting HDM (Device) address - Set FPGA buffer map read-only for device BIAS mode targeting HDM (Device) address CXL Driver IOCTL buffer map by default Read-only flag value is 0, Set buffer map flag to DFL_CXL_BUFFER_MAP for Read/write buffers struct dfl_cxl_cache_buffer_map { __u32 argsz; #define DFL_CXL_BUFFER_MAP_WRITABLE 1 __u32 flags; __u64 user_addr; __u64 length; } Signed-off-by: anandaravuri --- libraries/plugins/xfpga/fpga-dfl.h | 1 + samples/cxl_host_exerciser/cxl_he_cmd.h | 1 + samples/cxl_host_exerciser/he_cache_test.h | 20 +++++++++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/libraries/plugins/xfpga/fpga-dfl.h b/libraries/plugins/xfpga/fpga-dfl.h index 43ffaee92cb2..ea06c146a1ae 100644 --- a/libraries/plugins/xfpga/fpga-dfl.h +++ b/libraries/plugins/xfpga/fpga-dfl.h @@ -377,6 +377,7 @@ struct dfl_cxl_cache_region_info { */ struct dfl_cxl_cache_buffer_map { __u32 argsz; +#define DFL_CXL_BUFFER_MAP_WRITABLE 1 __u32 flags; __u64 user_addr; __u64 length; diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index 334e5cdfc24e..2da1f1d8ab07 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -263,6 +263,7 @@ class he_cmd : public test_command { he_ctl_.bias_support = FPGAMEM_HOST_BIAS; } else { he_ctl_.bias_support = FPGAMEM_DEVICE_BIAS; + host_exe_->set_mmap_access(HE_CACHE_DMA_MMAP_R); } } diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 0f1516d7f865..34177288fd3c 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -94,6 +94,11 @@ enum { MATCHES_SIZE = 6 }; #define DFL_CXL_CACHE_WR_ADDR_TABLE_DATA 0x068 #define DFL_CXL_CACHE_RD_ADDR_TABLE_DATA 0x088 +// buffer access type +typedef enum { + HE_CACHE_DMA_MMAP_RW = 0x0, + HE_CACHE_DMA_MMAP_R = 0x1, +} he_mmap_access; bool buffer_allocate(void** addr, uint64_t len, uint32_t numa_node) { @@ -284,7 +289,7 @@ class afu { const char *log_level = nullptr) : name_(name), afu_id_(afu_id ? afu_id : ""), app_(name_), pci_addr_(""), log_level_(log_level ? log_level : "info"), timeout_msec_(60000), - current_command_(nullptr) { + current_command_(nullptr), dma_mmap_access_(HE_CACHE_DMA_MMAP_RW) { if (!afu_id_.empty()) app_.add_option("-g,--guid", afu_id_, "GUID")->default_str(afu_id_); app_.add_option("-p,--pci-address", pci_addr_, @@ -544,6 +549,7 @@ class afu { cout << "DSM buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; @@ -619,6 +625,8 @@ class afu { cout << "Read buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); + if (dma_mmap_access_ == HE_CACHE_DMA_MMAP_RW) + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; @@ -687,6 +695,8 @@ class afu { cout << "Write buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); + if (dma_mmap_access_ == HE_CACHE_DMA_MMAP_RW) + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; @@ -755,6 +765,8 @@ class afu { cout << "Read/Write buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); + if (dma_mmap_access_ == HE_CACHE_DMA_MMAP_RW) + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; @@ -838,6 +850,7 @@ class afu { cout << "Pinned buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.csr_array[0] = 0; @@ -896,6 +909,9 @@ class afu { uint8_t *get_read_write() const { return rd_wr_buffer_; } + void set_mmap_access(he_mmap_access access = HE_CACHE_DMA_MMAP_RW) + { dma_mmap_access_ = access; } + protected: std::string name_; std::string afu_id_; @@ -927,6 +943,8 @@ class afu { command::ptr_t current_command_; std::map commands_; + he_mmap_access dma_mmap_access_; + public: std::shared_ptr logger_; }; From 9637c6b59179a2216810cd21d91ac1f8a44883a0 Mon Sep 17 00:00:00 2001 From: Tim Whisonant Date: Mon, 11 Dec 2023 09:54:59 -0800 Subject: [PATCH 13/28] Feature: add opae-mem (#3055) ### Description Adding opae-mem tool and updates to the OPAE Python bindings that support it. opae-mem is intended to be a replacement for ofs.uio. ### Collateral (docs, reports, design examples, case IDs): - [x] Document Update Required? (Specify FIM/AFU/Scripts) ### Tests added: ### Tests run: CI Signed-off-by: Tim Whisonant --- libraries/plugins/uio/dfl.c | 7 + libraries/plugins/vfio/dfl.c | 7 + libraries/plugins/xfpga/mmio.c | 4 +- libraries/pyopae/CMakeLists.txt | 33 +- libraries/pyopae/opae/fpga/dfh.py | 75 ++ libraries/pyopae/opae/fpga/feature.py | 70 ++ libraries/pyopae/opae/fpga/hexview.py | 202 ++++ libraries/pyopae/opae/fpga/mailbox.py | 221 +++++ libraries/pyopae/opae/fpga/pcie/__init__.py | 25 + libraries/pyopae/opae/fpga/pcie/address.py | 293 ++++++ libraries/pyopae/opae/fpga/tools/__init__.py | 25 + libraries/pyopae/opae/fpga/tools/opae_mem.py | 945 +++++++++++++++++++ libraries/pyopae/pyproject.toml | 11 + libraries/pyopae/setup.py | 9 +- opae.spec.fedora | 1 + packaging/opae/deb/opae-devel.install | 1 + 16 files changed, 1916 insertions(+), 13 deletions(-) create mode 100644 libraries/pyopae/opae/fpga/dfh.py create mode 100644 libraries/pyopae/opae/fpga/feature.py create mode 100644 libraries/pyopae/opae/fpga/hexview.py create mode 100644 libraries/pyopae/opae/fpga/mailbox.py create mode 100644 libraries/pyopae/opae/fpga/pcie/__init__.py create mode 100644 libraries/pyopae/opae/fpga/pcie/address.py create mode 100644 libraries/pyopae/opae/fpga/tools/__init__.py create mode 100644 libraries/pyopae/opae/fpga/tools/opae_mem.py diff --git a/libraries/plugins/uio/dfl.c b/libraries/plugins/uio/dfl.c index 81dc79c69a9e..985938500cb8 100644 --- a/libraries/plugins/uio/dfl.c +++ b/libraries/plugins/uio/dfl.c @@ -29,6 +29,7 @@ #endif // HAVE_CONFIG_H #include +#include #include "opae_int.h" #include "opae_uio.h" @@ -180,6 +181,12 @@ int walk_fme(uio_pci_device_t *dev, struct opae_uio *u, continue; bar = port_offset_reg.bits.bar; + if (bar >= PCI_STD_NUM_BARS) { + OPAE_DBG("ignoring invalid BAR %d at offset 0x%x", + bar, fme_ports[i]); + continue; + } + if (opae_uio_region_get(u, bar, &port_mmio, &size)) { OPAE_ERR("failed to get Port BAR %d", bar); continue; diff --git a/libraries/plugins/vfio/dfl.c b/libraries/plugins/vfio/dfl.c index 35d4944e2a27..e0b4a951cf33 100644 --- a/libraries/plugins/vfio/dfl.c +++ b/libraries/plugins/vfio/dfl.c @@ -29,6 +29,7 @@ #endif // HAVE_CONFIG_H #include +#include #include "opae_int.h" #include "opae_vfio.h" @@ -180,6 +181,12 @@ int walk_fme(vfio_pci_device_t *dev, struct opae_vfio *v, continue; bar = port_offset_reg.bits.bar; + if (bar >= PCI_STD_NUM_BARS) { + OPAE_DBG("ignoring invalid BAR %d at offset 0x%x", + bar, fme_ports[i]); + continue; + } + if (opae_vfio_region_get(v, bar, &port_mmio, &size)) { OPAE_ERR("failed to get Port BAR %d", bar); continue; diff --git a/libraries/plugins/xfpga/mmio.c b/libraries/plugins/xfpga/mmio.c index 68ea9803d726..09722bf0b120 100644 --- a/libraries/plugins/xfpga/mmio.c +++ b/libraries/plugins/xfpga/mmio.c @@ -1,4 +1,4 @@ -// Copyright(c) 2017-2022, Intel Corporation +// Copyright(c) 2017-2023, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: @@ -148,7 +148,7 @@ STATIC fpga_result find_or_map_wm(fpga_handle handle, uint32_t mmio_num, if (!wm) { result = map_mmio_region(handle, mmio_num); if (result != FPGA_OK) { - OPAE_ERR("failed to map mmio region %d", mmio_num); + OPAE_DBG("failed to map mmio region %d", mmio_num); return result; } wm = wsid_find_by_index(_handle->mmio_root, mmio_num); diff --git a/libraries/pyopae/CMakeLists.txt b/libraries/pyopae/CMakeLists.txt index 83232f455863..f9a908653365 100644 --- a/libraries/pyopae/CMakeLists.txt +++ b/libraries/pyopae/CMakeLists.txt @@ -44,8 +44,6 @@ set(PYOPAE_SRC pysysobject.cpp ) - - opae_add_module_library(TARGET _opae SOURCE ${PYOPAE_SRC} LIBS @@ -65,12 +63,29 @@ set_target_properties(_opae ${LIBRARY_OUTPUT_PATH}/python${OPAE_PYTHON_VERSION}/opae/fpga ) -add_custom_command(TARGET _opae - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy - ${CMAKE_CURRENT_SOURCE_DIR}/opae/fpga/__init__.py - ${LIBRARY_OUTPUT_PATH}/python${OPAE_PYTHON_VERSION}/opae/fpga - COMMENT "Copying namespace package files") +set(PYPKGFILES + opae/fpga/__init__.py + opae/fpga/dfh.py + opae/fpga/feature.py + opae/fpga/mailbox.py + opae/fpga/hexview.py + opae/fpga/pcie/__init__.py + opae/fpga/pcie/address.py + opae/fpga/tools/__init__.py + opae/fpga/tools/opae_mem.py +) + +foreach(pypkgfile ${PYPKGFILES}) + get_filename_component(pypkgdir + ${LIBRARY_OUTPUT_PATH}/python${OPAE_PYTHON_VERSION}/${pypkgfile} DIRECTORY) + add_custom_command(TARGET _opae + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${pypkgdir} + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/${pypkgfile} + ${LIBRARY_OUTPUT_PATH}/python${OPAE_PYTHON_VERSION}/opae/fpga + COMMENT "Copying namespace package files") +endforeach(pypkgfile ${PYPKGFILES}) add_custom_command(TARGET _opae POST_BUILD @@ -84,7 +99,7 @@ if (OPAE_BUILD_PYTHON_DIST) set(PYFILES pyproject.toml setup.py - opae/fpga/__init__.py + ${PYPKGFILES} test_pyopae.py ) diff --git a/libraries/pyopae/opae/fpga/dfh.py b/libraries/pyopae/opae/fpga/dfh.py new file mode 100644 index 000000000000..c652e7f18198 --- /dev/null +++ b/libraries/pyopae/opae/fpga/dfh.py @@ -0,0 +1,75 @@ +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Define Configuration and Status Register bit layouts for Device Feature +Header version 0 and version 1.""" + +from ctypes import Union, LittleEndianStructure, c_uint64 + + +class CSR(Union): + """Configuration and Status Register""" + def __init__(self, value): + self.value = value + + +class DFH0_BITS(LittleEndianStructure): + """Bit fields for DFH v0.""" + _fields_ = [ + ('id', c_uint64, 12), # Feature ID + ('rev', c_uint64, 4), # Revision of feature + ('next', c_uint64, 24), + ('eol', c_uint64, 1), + ('reserved', c_uint64, 19), + ('feature_type', c_uint64, 4), # 1=AFU,3=private,4=FIU,5=interface + ] + + +class dfh0(CSR): + """Device Feature Header version 0""" + _fields_ = [('bits', DFH0_BITS), + ('value', c_uint64)] + width = 64 + + +class DFH1_BITS(LittleEndianStructure): + """Bit fields for DFH v1.""" + _fields_ = [ + ('id', c_uint64, 12), # Feature ID + ('rev', c_uint64, 4), # Revision of feature + ('next', c_uint64, 24), + ('eol', c_uint64, 1), + ('reserved', c_uint64, 11), + ('dfh_version', c_uint64, 8), # DFH version (1) + ('feature_type', c_uint64, 4), # 1=AFU,3=private,4=FIU,5=interface + ] + + +class dfh1(CSR): + """Device Feature Header version 1""" + _fields_ = [('bits', DFH1_BITS), + ('value', c_uint64)] + width = 64 diff --git a/libraries/pyopae/opae/fpga/feature.py b/libraries/pyopae/opae/fpga/feature.py new file mode 100644 index 000000000000..0badc49a14a3 --- /dev/null +++ b/libraries/pyopae/opae/fpga/feature.py @@ -0,0 +1,70 @@ +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Provides a means to enumerate FPGA devices by examining their +feature ID.""" + +from opae import fpga +from opae.fpga import dfh +import opae.fpga.pcie.address as addr + + +def enumerate(access_width: int, region: int, **kwargs): + """Enumerate FPGA regions based on the given properties. + kwargs contains the standard FPGA properties used for + enumeration, and supports one additional property, + 'feature_id', that when given acts as a filter for + selecting tokens by the feature_id field of the DFH. + """ + feature_id = None + if 'feature_id' in kwargs: + feature_id = kwargs.get('feature_id') + del kwargs['feature_id'] + + tokens = fpga.enumerate(**kwargs) + + access = addr.memory_access(access_width) + + if feature_id is None: + return tokens + + result = [] + while tokens: + tok = tokens.pop() + remove = True + try: + with fpga.open(tok, fpga.OPEN_SHARED) as hndl: + access.hndl = hndl + d = dfh.dfh0(access.read(0, dfh.dfh0.width, region)) + if d.bits.id == int(feature_id): + result.append(tok) + remove = False + except RuntimeError: + pass + if remove: + del tok + + return result diff --git a/libraries/pyopae/opae/fpga/hexview.py b/libraries/pyopae/opae/fpga/hexview.py new file mode 100644 index 000000000000..8b33ce3a65e5 --- /dev/null +++ b/libraries/pyopae/opae/fpga/hexview.py @@ -0,0 +1,202 @@ +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Provides a generic means to examine FPGA device MMIO and present it as + a hexadecimal dump.""" + +import struct + + +# 1111111111222222222233333333334444444444555555555566666666667777777777 +# 01234567890123456789012345678901234567890123456789012345678901234567890123456789 +# +# 32 +# ADDR COL0 COL1 COL2 COL3 DECODE +# 00000000: 00000001 00000002 00000003 00000004 |0000000000000000| +# +# 64 +# ADDR COL0 COL1 DECODE +# 00000000: 0000000200000001 0000000400000003 |0000000000000000| + +ADDR = 0 +COL0 = 1 +COL1 = 2 +COL2 = 3 +COL3 = 4 +DECODE = 5 +END = 6 + + +class hex_view(): + """Given a memory_access or a mailbox_access object and an access + width (32 or 64), produce a decoded hexadecimal dump for a given + number of bytes.""" + def __init__(self, access, width): + self.access = access + self.width = width + + @staticmethod + def decode(integer, length, order): + """Decode a 32- or 64-bit integer into its printable character + representation. When individual bytes of the given integer + represent printable characters, the resulting string will + contain those characters, else if a character is non-printable, + the string will contain a '.' character.""" + to_bytes = integer.to_bytes(length=length, byteorder=order) + decoded = '' + for b in struct.unpack(str(length) + 'c', to_bytes): + try: + b = b.decode() + decoded += b if b.isprintable() else '.' + except UnicodeDecodeError: + decoded += '.' + return decoded + + def render32(self, start_addr, byte_count, region, fp): + """Render a 32-bit memory dump to file fp. The dump starts + at memory address start_addr and continues for byte_count + bytes.""" + length = 4 + order = 'little' + bytes_per_row = 16 + addr_to_display = start_addr & ~(bytes_per_row - 1) + addr = addr_to_display + state = ADDR + remaining = byte_count + end = start_addr + byte_count + decode = '' + + while state != END: + if state == ADDR: + print(f'{addr:08x}:', end='', file=fp) + state = COL0 + elif state == COL0: + printing = (addr >= start_addr) and (addr < end) + if printing: + col0 = self.access.read(addr, self.width, region) + decode += self.decode(col0, length, order) + print(f' {col0:08x}', end='', file=fp) + remaining -= length + else: + decode += ' ' * length + print(' ' * 10, end='', file=fp) + addr += length + state = COL1 + elif state == COL1: + printing = (addr >= start_addr) and (addr < end) + if printing: + col1 = self.access.read(addr, self.width, region) + decode += self.decode(col1, length, order) + print(f' {col1:08x}', end='', file=fp) + remaining -= length + else: + decode += ' ' * length + print(' ' * 9, end='', file=fp) + addr += length + state = COL2 + elif state == COL2: + printing = (addr >= start_addr) and (addr < end) + if printing: + col2 = self.access.read(addr, self.width, region) + decode += self.decode(col2, length, order) + print(f' {col2:08x}', end='', file=fp) + remaining -= length + else: + decode += ' ' * length + print(' ' * 9, end='', file=fp) + addr += length + state = COL3 + elif state == COL3: + printing = (addr >= start_addr) and (addr < end) + if printing: + col3 = self.access.read(addr, self.width, region) + decode += self.decode(col3, length, order) + print(f' {col3:08x}', end='', file=fp) + remaining -= length + else: + decode += ' ' * length + print(' ' * 9, end='', file=fp) + addr += length + state = DECODE + elif state == DECODE: + print(f' |{decode}|', file=fp) + decode = '' + state = END if addr - start_addr >= byte_count else ADDR + + def render64(self, start_addr, byte_count, region, fp): + """Render a 64-bit memory dump to file fp. The dump starts + at memory address start_addr and continues for byte_count + bytes.""" + length = 8 + order = 'little' + bytes_per_row = 16 + addr_to_display = start_addr & ~(bytes_per_row - 1) + addr = addr_to_display + state = ADDR + remaining = byte_count + end = start_addr + byte_count + decode = '' + + while state != END: + if state == ADDR: + print(f'{addr:08x}:', end='', file=fp) + state = COL0 + elif state == COL0: + printing = (addr >= start_addr) and (addr < end) + if printing: + col0 = self.access.read(addr, self.width, region) + decode += self.decode(col0, length, order) + print(f' {col0:016x}', end='', file=fp) + remaining -= length + else: + decode += ' ' * length + print(' ' * 18, end='', file=fp) + addr += length + state = COL1 + elif state == COL1: + printing = (addr >= start_addr) and (addr < end) + if printing: + col1 = self.access.read(addr, self.width, region) + decode += self.decode(col1, length, order) + print(f' {col1:016x}', end='', file=fp) + remaining -= length + else: + decode += ' ' * length + print(' ' * 19, end='', file=fp) + addr += length + state = DECODE + elif state == DECODE: + print(f' |{decode}|', file=fp) + decode = '' + state = END if addr - start_addr >= byte_count else ADDR + + def render(self, start_addr, byte_count, region, fp): + """Render the hex_view to file fp, based on the width + parameter given during construction.""" + if self.width == 32: + self.render32(start_addr, byte_count, region, fp) + else: + self.render64(start_addr, byte_count, region, fp) diff --git a/libraries/pyopae/opae/fpga/mailbox.py b/libraries/pyopae/opae/fpga/mailbox.py new file mode 100644 index 000000000000..78fee7f7a2ba --- /dev/null +++ b/libraries/pyopae/opae/fpga/mailbox.py @@ -0,0 +1,221 @@ +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Define FPGA mailbox access class and register bits.""" + +import time + +from ctypes import LittleEndianStructure, c_uint64 +from opae.fpga.dfh import CSR + + +# Generic mailbox poll timeout == 100 usec +POLL_TIMEOUT = 1/10000 + +# Wait for read response timeout == 1 usec +POLL_SLEEP = 1/1000000 + + +class MAILBOX_CMD_STATUS_BITS(LittleEndianStructure): + """Mailbox command/status CSR bits.""" + _fields_ = [ + ("read_cmd", c_uint64, 1), + ("write_cmd", c_uint64, 1), + ("ack_trans", c_uint64, 1), + ("reserved", c_uint64, 29), + ("cmd_addr", c_uint64, 32), + ] + + +class command_status(CSR): + """Mailbox command and status register""" + _fields_ = [("bits", MAILBOX_CMD_STATUS_BITS), + ("value", c_uint64)] + width = 64 + + +class MAILBOX_DATA_BITS(LittleEndianStructure): + """Mailbox read data/write data CSR bits.""" + _fields_ = [ + ("read_data", c_uint64, 32), + ("write_data", c_uint64, 32), + ] + + +class mb_data(CSR): + """Mailbox data register""" + _fields_ = [("bits", MAILBOX_DATA_BITS), + ("value", c_uint64)] + width = 64 + + +class MailboxCSRAccessError(RuntimeError): + """Raised when a mailbox_access is constructed with + an invalid access width.""" + def __init__(self, msg): + super().__init__(msg) + + +class MailboxAckTransTimeout(RuntimeError): + """Raised when a mailbox read or write operation + fails to see the ACK transaction bit transition + to 1 within the timeout period.""" + def __init__(self, msg): + super().__init__(msg) + + +class mailbox_access(): + """Mailbox read/write access class. The read and write methods + are defined with the same signatures as the memory_access + class so that the two can be used interchangeably.""" + def __init__(self, access_width, cmd_reg_offset, hndl=None, + poll_to=POLL_TIMEOUT, poll_sleep=POLL_SLEEP): + self.hndl = hndl + self.access_width = access_width + if self.access_width not in [64, 32]: + raise MailboxCSRAccessError(f'Only 32 and 64 ' + f'are supported, not {access_width}') + self.cmd_stat_offset = cmd_reg_offset + self.rd_data_offset = self.cmd_stat_offset + 0x8 + self.wr_data_offset = self.cmd_stat_offset + 0xc + self.poll_to = poll_to + self.poll_sleep = poll_sleep + + # For these read/write accessors, we know the register width implicitly + # as defined in the classes above. + def read_cmd_stat(self, region=0) -> command_status: + """Retrieve the current contents of the mailbox command and + status register.""" + if self.access_width == 64: + value = self.hndl.read_csr64(self.cmd_stat_offset, region) + elif self.access_width == 32: + low = self.hndl.read_csr32(self.cmd_stat_offset, region) + high = self.hndl.read_csr32(self.cmd_stat_offset + 4, region) + value = (high << 32) | low + return command_status(value) + + def write_cmd_stat(self, cmd_stat: command_status, region=0): + """Write the contents of the given mailbox command and + status register.""" + if self.access_width == 64: + self.hndl.write_csr64(self.cmd_stat_offset, cmd_stat.value, region) + elif self.access_width == 32: + low = cmd_stat.value & 0xffffffff + high = cmd_stat.value >> 32 + self.hndl.write_csr32(self.cmd_stat_offset, low, region) + self.hndl.write_csr32(self.cmd_stat_offset + 4, high, region) + + def read_mb_data(self, region=0) -> mb_data: + """Retrieve the current contents of the mailbox data register.""" + if self.access_width == 64: + value = self.hndl.read_csr64(self.rd_data_offset, region) + elif self.access_width == 32: + low = self.hndl.read_csr32(self.rd_data_offset, region) + high = self.hndl.read_csr32(self.wr_data_offset, region) + value = (high << 32) | low + return mb_data(value) + + def write_mb_data(self, data: mb_data, region=0): + """Write the given value to the mailbox data register.""" + if self.access_width == 64: + self.hndl.write_csr64(self.rd_data_offset, data.value, region) + elif self.access_width == 32: + rd = data.bits.read_data + wr = data.bits.write_data + self.hndl.write_csr32(self.rd_data_offset, rd, region) + self.hndl.write_csr32(self.wr_data_offset, wr, region) + + def poll_for_ack_trans(self, timeout, region=0): + """Examine the mailbox command and status register until + bit 2 (Ack Transaction) transitions to 1.""" + total_time = 0 + sl = self.poll_sleep + cmd_stat = self.read_cmd_stat(region) + while total_time < timeout: + if cmd_stat.bits.ack_trans: + return (True, cmd_stat) + time.sleep(sl) + total_time += sl + cmd_stat = self.read_cmd_stat(region) + return (False, cmd_stat) + + def read(self, offset, size=64, region=0): + """Perform mailbox read protocol and return the + resulting field of the read/write data CSR.""" + # Clear the command/status CSR and wait. + self.write_cmd_stat(command_status(0), region) + time.sleep(self.poll_to) + + # Set read command bit(0) and read address. + cmd_stat = command_status(1) + cmd_stat.bits.cmd_addr = offset + self.write_cmd_stat(cmd_stat, region) + + # Poll for ack transaction bit. + ok, cmd_stat = self.poll_for_ack_trans(self.poll_to, region) + if not ok: + data = self.read_mb_data(region) + msg = (f'mailbox read ACK timeout. ' + f'command: {cmd_stat.value:016x} data: {data.value:016x}') + raise MailboxAckTransTimeout(msg) + + return self.read_mb_data(region).bits.read_data + + def write(self, offset, value, size=64, region=0): + """Perform mailbox write protocol.""" + # Clear the command/status CSR and wait. + self.write_cmd_stat(command_status(0), region) + time.sleep(self.poll_to) + + # Read-modify-write the data CSR. + write_data = self.read_mb_data(region) + write_data.bits.write_data = value + self.write_mb_data(write_data, region) + + # Set write command bit(1) and write address. + cmd_stat = command_status(2) + cmd_stat.bits.cmd_addr = offset + self.write_cmd_stat(cmd_stat, region) + + # Poll for ack transaction bit. + ok, cmd_stat = self.poll_for_ack_trans(self.poll_to, region) + if not ok: + data = self.read_mb_data(region) + msg = (f'mailbox write ACK timeout. ' + f'command: {cmd_stat.value:016x} data: {data.value:016x}') + raise MailboxAckTransTimeout(msg) + + +def feature_id_to_mailbox_base(feature_id: int) -> int: + """Give a hint about the location of the mailbox + command and status CSR, based on a feature id.""" + bases = { + 0x15: 0xa8, # HSSI Subsystem + 0x20: 0x28, # PCIe Subsystem + } + if feature_id in bases: + return bases[feature_id] + return None diff --git a/libraries/pyopae/opae/fpga/pcie/__init__.py b/libraries/pyopae/opae/fpga/pcie/__init__.py new file mode 100644 index 000000000000..ba56a211363b --- /dev/null +++ b/libraries/pyopae/opae/fpga/pcie/__init__.py @@ -0,0 +1,25 @@ +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. diff --git a/libraries/pyopae/opae/fpga/pcie/address.py b/libraries/pyopae/opae/fpga/pcie/address.py new file mode 100644 index 000000000000..fba9c6fd6601 --- /dev/null +++ b/libraries/pyopae/opae/fpga/pcie/address.py @@ -0,0 +1,293 @@ +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Provides utility classes for parsing PCIe addresses, PCIe device id's, and + hexadecimal integers. Also includes helper functions for manipulating + opae.fpga.properties objects.""" + +import re +import struct +import uuid +from opae import fpga + + +ADDRESS_PATTERN = (r'^(?P' + r'(?:(?P[\da-f]{4}):)?' + r'(?P(?P[\da-f]{2}):' + r'(?P[\da-f]{2})\.(?P[0-7]{1})))$') + +ID_PATTERN = (r'^(?P' + r'(?P[\da-f]{4}):(?P[\da-f]{4})' + r'(?:\s+(?P[\da-f]{4}):(?P[\da-f]{4}))?' + r')$') + +HEX_INT_PATTERN = (r'^(?P' + r'(?:(?P[-+]))?' + r'(?:(?P0[x]))?' + r'(?P[\da-f]+)' + r')$') + + +class pcie_address(): + """Parse a PCIe address from a string such that its + component parts can be used to initialize an + opae.fpga.properties object.""" + regex = re.compile(ADDRESS_PATTERN, re.IGNORECASE) + + def __init__(self, addr_str: str): + self.candidate = addr_str + self.address = None + self.properties = {} + mg = self.regex.match(self.candidate) + if mg: + d = mg.groupdict() + if d['segment'] is None: + d['segment'] = '0000' + d['pcie_address'] = d['segment'] + ':' + d['bdf'] + + self.address = d['pcie_address'] + self.properties = { + 'segment': int(d['segment'], 16), + 'bus': int(d['bus'], 16), + 'device': int(d['device'], 16), + 'function': int(d['function']), + } + + def is_ok(self) -> bool: + """Did the given string used to initialize the object + successfully parse into a PCIe address?""" + return self.address is not None + + def __str__(self): + return self.address if self.address else self.candidate + + def __repr__(self): + return str(self) + + def __eq__(self, other): + return str(self) == str(other) + + +class pcie_id(): + """Parse a PCIe VID:DID [SVID:SDID] from a string such + that its component parts can be used to initialize an + opae.fpga.properties object.""" + regex = re.compile(ID_PATTERN, re.IGNORECASE) + + def __init__(self, id_str: str): + self.candidate = id_str + self.id = None + self.properties = {} + mg = self.regex.match(self.candidate) + if mg: + d = mg.groupdict() + self.id = d['pcie_id'] + self.properties = { + 'vendor_id': int(d['vendor_id'], 16), + 'device_id': int(d['device_id'], 16), + } + if d['subsystem_vendor_id'] and d['subsystem_device_id']: + self.properties['subsystem_vendor_id'] = int(d['subsystem_vendor_id'], 16) + self.properties['subsystem_device_id'] = int(d['subsystem_device_id'], 16) + + def is_ok(self) -> bool: + """Did the given string used to initialize the object + successfully parse into a PCIe ID?""" + return self.id is not None + + def __str__(self): + return self.id if self.id else self.candidate + + def __repr__(self): + return str(self) + + def __eq__(self, other): + return str(self) == str(other) + + +def device_filter(addr_str: str=None, id_str: str=None): + """Create a device filter dictionary from the given + PCIe address and PCIe ID strings. The resulting + dictionary is suitable to pass as kwargs to + opae.fpga.enumerate().""" + filt = {} + if addr_str: + addr = pcie_address(addr_str) + if addr.is_ok(): + filt.update(addr.properties) + if id_str: + ID = pcie_id(id_str) + if ID.is_ok(): + filt.update(ID.properties) + return filt + + +def undo_device_filter(filt: dict): + """Given a dictionary created by device_filter(), convert + that dictionary back into its string form, ie the + original PCIe address or PCIe ID in string form.""" + if filt is None: + return '' + if 'segment' in filt: + return f'{filt["segment"]:04x}:{filt["bus"]:02x}:{filt["device"]:02x}.{filt["function"]}' + elif 'vendor_id' in filt: + ID = f'{filt["vendor_id"]:04x}:{filt["device_id"]:04x}' + if 'subsystem_vendor_id' in filt: + ID += ' {filt["subsystem_vendor_id"]:04x}:{filt["subsystem_device_id"]:04x}' + return ID + return '' + + +class hex_int(): + """Parse a string as a decimal or hexadecimal integer.""" + regex = re.compile(HEX_INT_PATTERN, re.IGNORECASE) + + def __init__(self, int_str, hex_str: bool=True, print_zeros: bool=True): + if isinstance(int_str, int): + int_str = str(int_str) + self.candidate = int_str + self.hex_str = hex_str + self.print_zeros = print_zeros + self.prefix = None + self.integer = None + self.width = 8 + mg = self.regex.match(self.candidate) + if mg: + d = mg.groupdict() + self.integer = int(d['hex_int'], 0) + self.prefix = d['prefix'] + if self.integer > 0xffffffff: + self.width = 16 + elif self.integer > 0xffff: + self.width = 8 + elif self.integer > 0xff: + self.width = 4 + else: + self.width = 2 + + def is_ok(self) -> bool: + """Did the given string used to initialize the object + successfully parse into a hex int?""" + return self.integer is not None + + def __str__(self): + if self.hex_str and self.prefix: + fmt = self.prefix + '{:' + if self.print_zeros: + fmt += '0' + fmt += str(self.width) + fmt += 'x}' + return fmt.format(self.integer) if self.integer else self.candidate + return str(self.integer) if self.integer else self.candidate + + def __repr__(self): + return str(self) + + def __int__(self): + return self.integer + + def __eq__(self, other): + if isinstance(other, hex_int): + return self.integer == other.integer + return self.integer == other + + def __add__(self, other): + if isinstance(other, hex_int): + return self.integer + other.integer + return self.integer + other + + def __format__(self, format_spec): + if not format_spec: + return format(str(self), format_spec) + return format(self.integer, format_spec) + + +class MemoryAccessWidthError(RuntimeError): + """Raised when a memory_access object is constructed with + an invalid access width.""" + def __init__(self, msg): + super().__init__(msg) + + +class memory_access(): + """Provide an abstraction around memory accesses. The given + access_width (32 or 64) determines how the read and write + methods acquire the memory, whether 32 or 64 bits at a time.""" + def __init__(self, access_width, hndl=None): + self.hndl = hndl + self.access_width = access_width + if self.access_width not in [64, 32]: + raise MemoryAccessWidthError(f'Only 32 and 64 ' + f'are supported, not {access_width}') + + def read(self, offset, size, region=0): + if size == 64: + if self.access_width == 64: + return self.hndl.read_csr64(offset, region) + elif self.access_width == 32: + low = self.hndl.read_csr32(offset, region) + high = self.hndl.read_csr32(offset + 4, region) + return (high << 32) | low + elif size == 32: + return self.hndl.read_csr32(offset, region) + + def write(self, offset, value, size, region=0): + """Write the given value to the offset in memory. size is + either 32 or 64.""" + if size == 64: + if self.access_width == 64: + self.hndl.write_csr64(offset, value, region) + elif self.access_width == 32: + self.hndl.write_csr32(offset, value & 0xffffffff, region) + self.hndl.write_csr32(offset + 4, value >> 32, region) + elif size == 32: + self.hndl.write_csr32(offset, value, region) + + def read_guid(self, offset, region=0): + """Read and decipher the first two quadwords at offset + as a 16-byte guid encoding.""" + low = self.read(offset, 64, region) + high = self.read(offset + 8, 64, region) + return uuid.UUID(bytes=struct.pack('>QQ', high, low)) + + +def properties_to_address(p: fpga.properties) -> pcie_address: + """Convert an opae.fpga.properties object containing the fields + of a PCIe address back to its string form.""" + return pcie_address(f'{p.segment:04x}:{p.bus:02x}:{p.device:02x}.{p.function}') + + +def tok_or_handle_to_address(tok_or_handle) -> pcie_address: + """Retrieve the properties of the given token or handle and + use them to construct the PCIe address.""" + return properties_to_address(fpga.properties(tok_or_handle)) + + +def properties_to_id(p: fpga.properties) -> pcie_id: + """Convert an opae.fpga.properties object containing the + fields of a PCIe ID back to its string form.""" + return pcie_id(f'{p.vendor_id:04x}:{p.device_id:04x} ' + f'{p.subsystem_vendor_id:04x}:{p.subsystem_device_id:04x}') diff --git a/libraries/pyopae/opae/fpga/tools/__init__.py b/libraries/pyopae/opae/fpga/tools/__init__.py new file mode 100644 index 000000000000..ba56a211363b --- /dev/null +++ b/libraries/pyopae/opae/fpga/tools/__init__.py @@ -0,0 +1,25 @@ +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. diff --git a/libraries/pyopae/opae/fpga/tools/opae_mem.py b/libraries/pyopae/opae/fpga/tools/opae_mem.py new file mode 100644 index 000000000000..b4b9bdd85c1d --- /dev/null +++ b/libraries/pyopae/opae/fpga/tools/opae_mem.py @@ -0,0 +1,945 @@ +#!/usr/bin/env python3 +# Copyright(c) 2023, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""opae-mem provides a means to view the MMIO space of + FPGA accelerators, whether connected to DFL, VFIO, UIO, + or ASE. + + Devices are enumerated using the opae-mem ls command: + + $ sudo chmod 666 /dev/uio* + $ opae-mem ls + [0000:d8:00.0] (8086:bcce 8086:0000) + UIO 0x14 00000000-0000-0000-0000-000000000000 + UIO 0x20 00000000-0000-0001-0000-000000000000 + [0000:3b:00.0] (8086:bcce 8086:0000) + UIO 0x15 00042415-0004-2415-0000-001100010000 + UIO 0x20 00000000-0000-0001-0000-000000000000 + UIO 0x14 00000000-0000-0000-0000-000000000000 + + This output shows two FPGA cards with a total of five FPGA + MMIO regions. + + The first card at address 0000:d8:00.0 has two MMIO regions + accessible via UIO: + Feature ID 0x14: s10 IOPLL + Feature ID 0x20: PCIe Subsystem + + The second card at address 0000:3b:00.0 has three MMIO regions + accessible via UIO: + Feature ID 0x15: HSSI Subsystem + Feature ID 0x20: PCIe Subsystem + Feature ID 0x14: s10 IOPLL + + The peek command provides a way to view a range of MMIO addresses: + + $ opae-mem peek -d 0000:3b:00.0 -f 0x20 --count 4 0x0 + 00000000: 3000000010000020 0000000000000000 | ......0........| + 00000010: 0000000000000001 0000000000000000 |................| + + Here, --count 4 causes the command to display four 64-bit qwords, + from addresses 0x0 through 0x18. + + The default format is hex display, which is modeled after the + output of hexdump -C. The format can be controlled using the -F + option. Valid values for -F are simple, hex, and json. + + $ opae-mem peek -d 0000:3b:00.0 -f 0x20 --count 4 -F simple 0x0 + 00000000: 3000000010000020 + 00000008: 0000000000000000 + 00000010: 0000000000000001 + 00000018: 0000000000000000 + + $ opae-mem peek -d 0000:3b:00.0 -f 0x20 --count 4 -F json 0x0 + { + "0x00000000": "0x3000000010000020", + "0x00000008": "0x0000000000000000", + "0x00000010": "0x0000000000000001", + "0x00000018": "0x0000000000000000" + } + + The output of opae-mem peek can be sent to a file using the -o + option. This is useful for capture/playback. Playback is available + with the opae-mem poke command. + + $ opae-mem peek -d 0000:3b:00.0 -f 0x20 --count 4 -F json -o file.json 0x0 + $ cat file.json + { + "0x00000000": "0x3000000010000020", + "0x00000008": "0x0000000000000000", + "0x00000010": "0x0000000000000001", + "0x00000018": "0x0000000000000000" + } + + The poke command provides a way to write MMIO addresses: + + $ opae-mem poke -d 0000:3b:00.0 -f 0x20 0x0 0xc0cac01a + + In the above poke command, 0x0 is the MMIO offset to write + and 0xc0cac01a is the value. + + The output of the opae-mem peek command with -F json can + be played back as a series of writes to an MMIO region + using the opae-mem poke --json option: + + $ opae-mem poke -d 0000:3b:00.0 -f 0x20 --json file.json + + The opae-mem mb-read command is used to issue read requests + to an FPGA mailbox interface: + + $ opae-mem mb-read -d 0000:3b:00.0 -f 0x20 -c 4 0x0 + mb-read [0000:3b:00.0] 0x20 This command needs -b mailbox_base. For feature_id 0x20, try -b 0x0028 + + $ opae-mem mb-read -d 0000:3b:00.0 -f 0x20 -c 4 -b 0x28 0x0 + 00000000: 01000000 00000001 01104000 00000000 |.........@......| + + Each mailbox address represents a 32-bit data value. + + Like peek, the default display format for mb-read is hex. + To change the display format, use the -F option, which accepts + simple, hex, or json. + + $ opae-mem mb-read -d 0000:3b:00.0 -f 0x20 -c 4 -b 0x28 -F simple 0x0 + 00000000: 01000000 + 00000004: 00000001 + 00000008: 01104000 + 0000000c: 00000000 + + $ opae-mem mb-read -d 0000:3b:00.0 -f 0x20 -c 4 -b 0x28 -F json 0x0 + { + "0x00000028": { + "0x00000000": "0x01000000", + "0x00000004": "0x00000001", + "0x00000008": "0x01104000", + "0x0000000c": "0x00000000" + } + } + + The mb-write command provides a way to issue write commands + to an FPGA mailbox interface. + + $ opae-mem mb-write -d 0000:3b:00.0 -f 0x20 -b 0x28 0x0 0xc0cac01a + + In the above command, 0x0 is the mailbox address and 0xc0cac01a + is the 32-bit data value to be written. + + The output of the opae-mem mb-read command with -F json can + be played back as a series of writes to a mailbox interface + using the opae-mem mb-write --json option: + + $ opae-mem mb-read -d 0000:3b:00.0 -f 0x20 -c 4 -b 0x28 -F json -o mb.json 0x0 + $ opae-mem mb-write -d 0000:3b:00.0 -f 0x20 -b 0x28 --json mb.json + + Each of the above commands has explicitly specified the + -d PCIE_ADDR and -f FEATURE_ID parameters, making for some + long command lines. To shorten the length, opae-mem can + be "locked" to a (device, feature_id) pair: + + $ opae-mem lock -d 0000:3b:00.0 -f 0x20 + lock [0000:3b:00.0] 0x20 OK + + Once "locked" to a device, issuing the command again displays + the lock status: + + $ opae-mem lock + [locked 0000:3b:00.0 0x20] lock currently held by 0000:3b:00.0 0x20. + + From now until the time the session is unlocked, + opae-mem commands may omit the explicit -d and -f + parameters: + + $ opae-mem peek 0x0 + 00000000: 3000000010000020 | ......0 | + + "Locking" is simply a convenient way to shorten the opae-mem + command line. Each of the other commands operates in the same + way, as if -d and -f were specified explicitly. + + Note: a "lock" can be overridden by specifying -d and/or -f: + + $ opae-mem -V peek -d 0000:d8:00.0 -f 0x14 -c 4 0x0 + [locked 0000:3b:00.0 0x20 [override 0000:d8:00.0 0x14]] peek [0000:d8:00.0] 0x14 offset=0x0 region=0 format=hex + 00000000: 3000000010000014 0000000000000000 |.......0........| + 00000010: 0000000000000000 1000000000000000 |................| + + The preceding command used a lock override by specifying a + different device address to -d and a different feature_id + to -f. The -V (verbose) option was given to show the + lock override status. + + The unlock command is used to release a lock: + + $ opae-mem unlock + [locked 0000:3b:00.0 0x20] unlock Please tell me the device / feature id to unlock. (-d 0000:3b:00.0 -f 0x20) + + $ opae-mem unlock -d 0000:3b:00.0 -f 0x20 + [locked 0000:3b:00.0 0x20] unlock [0000:3b:00.0] 0x20 OK + + $ opae-mem lock + lock Give me the device address and feature id. +""" + +from argparse import ArgumentParser, FileType +import json +import os +import sys + +from opae import fpga +from opae.fpga import dfh +from opae.fpga import feature +from opae.fpga import hexview +from opae.fpga import mailbox +import opae.fpga.pcie.address as addr + +try: + from pathlib import Path +except ImportError: + from pathlib2 import Path # noqa + + +VERSION = '0.0.1' + +DEVICE_HELP = ('PCIe filter of device ' + '[segment:]bus:device.function' + ' or \'vendor:device[ subvendor:subdevice]\'') + + +# { +# "locked_by": { +# "address": "0000:dc:00.0", +# "feature_id": 32 +# } +# } +LOCK_FILE = os.path.join(Path.home(), '.local/etc/opae/opae-mem.json') + + +def enum_filter(addr_or_id): + """Used as the type parameter for argparse. + Converts a PCIe address or a PCIe id tuple to + a dictionary containing the appropriate keys + for an opae.fpga.properties object.""" + return addr.device_filter(addr_or_id, addr_or_id) + + +def get_lock_owner(): + """(address, feature_id)""" + locker = (None, None) + p = Path(LOCK_FILE) + if p.exists() and p.is_file(): + with open(p, 'r', encoding='utf-8') as fp: + try: + contents = json.load(fp) + locked_by = contents['locked_by'] + locker = (addr.pcie_address(locked_by['address']), + addr.hex_int(f'0x{locked_by["feature_id"]:0x}')) + except json.JSONDecodeError as jde: + print(f'{jde.msg} at {p} line {jde.lineno} col {jde.colno}') + print(f'{p} is invalid or corrupt. Please fix or remove it.') + sys.exit(1) + return locker + + +def lock_override_requested(address: addr.pcie_address, feature_id: addr.hex_int, args): + """Given the current lock owner (address, feature_id) and any filter specified + during argument parsing, determine whether (args.device, args.feature_id) + provides a lock override request. + * address and feature_id describe the current lock owner. + * args may describe an override request. + + Returns a tuple indicating t[0]: whether the lock is being overridden, + t[1]: the token matching the corresponding new lock owner if t[0] is True, + otherwise the current lock owner, and t[2] the overriding feature id if + t[0] is True, otherwise the current lock owner's feature id.""" + + # Find the current owner by enumeration. + owner_filt = addr.device_filter(str(address), str(address)) + owner_filt['feature_id'] = feature_id + owner_toks = feature.enumerate(args.width, args.region, **owner_filt) + if not owner_toks: + print('Previous locker not found. Releasing lock.') + release_lock() + sys.exit(1) + + if args.device is None and args.feature_id is None: + return (False, owner_toks[0], feature_id) # no override + + if args.device is None: + over_filt = owner_filt + over_filt['feature_id'] = args.feature_id + elif args.feature_id is None: + over_filt = args.device + over_filt['feature_id'] = feature_id + else: + over_filt = args.device + over_filt['feature_id'] = args.feature_id + + owner_props = fpga.properties(owner_toks[0]) + + # Find the overriding device by enumeration. + over_toks = feature.enumerate(args.width, args.region, **over_filt) + if not over_toks: + if args.feature_id is None: + return (False, owner_toks[0], feature_id) + print(f'Error enumerating {addr.undo_device_filter(args.device)}. Skipping') + return (False, owner_toks[0], feature_id) + + if len(over_toks) > 1: + print(f'more than 1 device matches. Please narrow your search criteria.') + sys.exit(1) + + over_props = fpga.properties(over_toks[0]) + + if owner_props.object_id != over_props.object_id: + if args.feature_id is None: + return (True, over_toks[0], feature_id) + return (True, over_toks[0], args.feature_id) + + # Same object_id + if args.feature_id is None: + return (False, owner_toks[0], feature_id) + + return (feature_id != args.feature_id, owner_toks[0], args.feature_id) + + +def set_lock_owner(address: addr.pcie_address, feature_id: addr.hex_int): + """Set the lock owner to (address, feature_id).""" + locker = get_lock_owner() + if locker[0]: + if address == locker[0] and feature_id == locker[1]: + return # nothing to do + + p = Path(LOCK_FILE) + parent = p.parent + if not parent.exists(): + parent.mkdir(parents=True) + + contents = { + 'locked_by': { + 'address': f'{address}', + 'feature_id': int(feature_id) + } + } + + with open(p, 'w', encoding='utf-8') as fp: + json.dump(contents, fp) + + +def release_lock(): + """Release the device lock by removing any existing lock file.""" + p = Path(LOCK_FILE) + if not p.exists() or not p.is_file(): + return False + p.unlink() + return True + + +def description_str(tok_or_hndl, feature_id: int): + """Queries the properties of the given token or handle object + to construct a string consisting of the PCIe address along + with a feature ID.""" + p = fpga.properties(tok_or_hndl) + s = f'[{p.segment:04x}:{p.bus:02x}:{p.device:02x}.{p.function}]' + if feature_id is not None: + s += f' 0x{int(feature_id):02x}' + return s + + +def args_description_str(args): + """Converts a device filter back to its PCIe address or + PCIe ID string.""" + return addr.undo_device_filter(args.device) + + +class lock_cmd(): + """Locks the script to a specified (device, feature id) pair so + that subsequent runs don't need to specify the -d and -f options.""" + needs_device = False + + @staticmethod + def add_subparser(subparser): + """Add argparser subparser options.""" + lock = subparser.add_parser('lock') + lock.add_argument('-d', '--device', dest='sdevice', + metavar='DEVICE', type=enum_filter, + help=DEVICE_HELP) + lock.add_argument('-f', '--feature-id', dest='sfeature_id', + metavar='FEATURE_ID', + type=addr.hex_int, default=None, + help='DFL feature ID') + + def __call__(self, args, lowner, lfeature_id): + descr = args_description_str(args) + descr = ' ' + descr + ' ' if descr else ' ' + msg = f'lock{descr}' + if lowner: + if not args.device or args.feature_id is None: + print(msg + f'currently held by {lowner} 0x{lfeature_id:02x}.') + return 0 + else: + if not args.device or args.feature_id is None: + print(msg + 'Give me the device address and feature id.') + return 1 + + args.device['feature_id'] = args.feature_id + tokens = feature.enumerate(args.width, args.region, **args.device) + if not tokens: + print(msg + 'no device/feature id found.') + return 1 + if len(tokens) > 1: + print(msg + 'more than 1 device matches. Please narrow your search with feature id.') + return 1 + + msg = f'lock {description_str(tokens[0], args.feature_id)}' + address = addr.properties_to_address(fpga.properties(tokens[0])) + set_lock_owner(address, int(args.feature_id)) + print(msg + ' OK') + return 0 + + +class unlock_cmd(): + """Command for script unlock from token. Allows reversing a prior + lock_cmd action.""" + needs_device = False + + @staticmethod + def add_subparser(subparser): + """Add argparser subparser options.""" + unlock = subparser.add_parser('unlock') + unlock.add_argument('-d', '--device', dest='sdevice', + metavar='DEVICE', type=enum_filter, + help=DEVICE_HELP) + unlock.add_argument('-f', '--feature-id', dest='sfeature_id', + metavar='FEATURE_ID', + type=addr.hex_int, default=None, + help='DFL feature ID') + + def __call__(self, args, lowner, lfeature_id): + descr = args_description_str(args) + descr = ' ' + descr + ' ' if descr else ' ' + msg = f'unlock{descr}' + if lowner: + if not args.device: + print(msg + f'Please tell me the device / feature id to unlock. (-d {lowner} -f 0x{lfeature_id:02x})') + return 1 + else: + print(msg + 'Not currently locked to a device.') + return 1 + + tokens = feature.enumerate(args.width, args.region, **args.device) + if not tokens: + print(msg + 'no device found.') + sys.exit(1) + elif len(tokens) > 1: + print(msg + 'more than one device matches. Please narrow your search with feature id.') + sys.exit(1) + else: + msg = f'unlock {description_str(tokens[0], args.feature_id)}' + address = addr.properties_to_address(fpga.properties(tokens[0])) + owner_addr, _ = get_lock_owner() + if address == owner_addr: + if release_lock(): + msg += ' OK' + else: + msg += ' Lock file not found!' + print(msg) + else: + print(msg + f' {address} is not the current lock owner.') + sys.exit(1) + return 0 + + +class ls_cmd(): + """Command for listing devices. Devices may be filtered by address, + feature ID, token type, and OPAE interface.""" + needs_device = False + + ifc_to_str = { + fpga.IFC_DFL: "DFL", + fpga.IFC_VFIO: "VFIO", + fpga.IFC_SIM_DFL: "DFL (ASE)", + fpga.IFC_SIM_VFIO: "VFIO (ASE)", + fpga.IFC_UIO: "UIO", + } + + str_to_ifc = { + "dfl": fpga.IFC_DFL, + "vfio": fpga.IFC_VFIO, + "dfl_ase": fpga.IFC_SIM_DFL, + "vfio_ase": fpga.IFC_SIM_VFIO, + "uio": fpga.IFC_UIO, + } + + @staticmethod + def add_subparser(subparser): + """Add argparse subparser options.""" + ls = subparser.add_parser('ls') + ls.add_argument('-d', '--device', dest='sdevice', + metavar='DEVICE', type=enum_filter, + help=DEVICE_HELP) + ls.add_argument('-f', '--feature-id', dest='sfeature_id', + metavar='FEATURE_ID', + type=addr.hex_int, default=None, + help='DFL feature ID') + ls.add_argument('-t', '--token-type', + choices=['device', 'accel'], default='accel', + help='Token type filter') + ls.add_argument('-i', '--interface', + choices=list(ls_cmd.str_to_ifc.keys()), default=None, + help='OPAE interface filter') + + def __call__(self, args, lowner, lfeature_id): + filt = args.device if args.device else {} + if args.feature_id is not None: + filt['feature_id'] = args.feature_id + if args.token_type is not None: + filt['type'] = (fpga.DEVICE if args.token_type == 'device' + else fpga.ACCELERATOR) + if args.interface is not None: + filt['interface'] = self.str_to_ifc[args.interface] + + tokens = feature.enumerate(args.width, args.region, **filt) + + # Capture the properties for each token, arranging + # each by its PCIe address and PCIe ID. + devs = {} + for t in tokens: + p = fpga.properties(t) + key = f'[{addr.properties_to_address(p)}] ({addr.properties_to_id(p)})' + if key in devs: + devs[key].append((t, p)) + else: + devs[key] = [(t, p)] + + access = addr.memory_access(args.width) + + for k, v in devs.items(): + print(k) + for t, p in v: + try: + with fpga.open(t, fpga.OPEN_SHARED) as hndl: + access.hndl = hndl + csr = dfh.dfh0(access.read(0, dfh.dfh0.width, args.region)) + guid = access.read_guid(8, args.region) + if args.verbose: + msg = f' interface={self.ifc_to_str[p.interface]} feature=0x{csr.bits.id:02x} guid={guid}' + else: + msg = f' {self.ifc_to_str[p.interface]} 0x{csr.bits.id:02x} {guid}' + print(msg) + except RuntimeError: + # FME tokens don't have mappable MMIO. + pass + + return 0 + + +class peek_cmd(): + """Command for memory peek. Allows multiple read via command + line arguments. The output may be formatted as simple, hex, + or JSON and may be redirected to a file.""" + needs_device = True + + @staticmethod + def add_subparser(subparser): + """Add argparse subparser options.""" + peek = subparser.add_parser('peek') + peek.add_argument('offset', type=addr.hex_int, + help='the MMIO offset (CSR) to peek') + peek.add_argument('-d', '--device', dest='sdevice', + metavar='DEVICE', type=enum_filter, + help=DEVICE_HELP) + peek.add_argument('-f', '--feature-id', dest='sfeature_id', + metavar='FEATURE_ID', + type=addr.hex_int, default=None, + help='DFL feature ID') + peek.add_argument('-c', '--count', type=addr.hex_int, + default=addr.hex_int('1'), + help='the number of CSRs to peek') + peek.add_argument('-o', '--output', type=FileType('w'), + default=sys.stdout, + help='file to store peek output') + peek.add_argument('-F', '--format', + choices=['simple', 'hex', 'json'], + default='hex', + help='output format') + + def __call__(self, hndl, args, feature_id): + descr = description_str(hndl, feature_id) + descr = ' ' + descr + ' ' if descr else ' ' + msg = f'peek{descr}' + + if int(args.count) <= 0: + print(msg + f'invalid count: {args.count}') + return 1 + + if args.verbose: + print(msg + f'offset={args.offset} ' + f'region={args.region} format={args.format}') + + access = addr.memory_access(args.width, hndl) + + if args.format == 'simple': + offset = args.offset + for _ in range(int(args.count)): + data = access.read(offset, 64, args.region) + print(f'{offset:08x}: {data:016x}', file=args.output) + offset += 8 + elif args.format == 'hex': + hview = hexview.hex_view(access, access.access_width) + hview.render(int(args.offset), + int(args.count) * (access.access_width / 8), + args.region, args.output) + elif args.format == 'json': + mem = {} + offset = args.offset + for _ in range(int(args.count)): + data = access.read(offset, 64, args.region) + mem[f'0x{offset:08x}'] = f'0x{data:016x}' + offset += 8 + json.dump(mem, args.output, indent=2) + + return 0 + + +class poke_cmd(): + """Command for memory poke. Allows single write via command + line arguments or multiple write via JSON file.""" + needs_device = True + + @staticmethod + def add_subparser(subparser): + """Add argparse subparser options.""" + poke = subparser.add_parser('poke') + poke.add_argument('offset', type=addr.hex_int, nargs='?', + help='the MMIO offset (CSR) to poke') + poke.add_argument('value', type=addr.hex_int, nargs='?', + help='the value to poke') + poke.add_argument('-d', '--device', dest='sdevice', + metavar='DEVICE', type=enum_filter, + help=DEVICE_HELP) + poke.add_argument('-f', '--feature-id', dest='sfeature_id', + metavar='FEATURE_ID', + type=addr.hex_int, default=None, + help='DFL feature ID') + poke.add_argument('-j', '--json', type=FileType('r'), + default=None, + help='name of JSON file containing data to poke') + + def __call__(self, hndl, args, feature_id): + descr = description_str(hndl, feature_id) + descr = ' ' + descr + ' ' if descr else ' ' + msg = f'poke{descr}' + + access = addr.memory_access(args.width, hndl) + + if args.json is None: + if args.offset is None or args.value is None: + print(msg + 'offset and value are required parameters.') + return 1 + if args.verbose: + print(msg + f'offset={args.offset} value={args.value} region={args.region}') + access.write(args.offset, args.value, 64, args.region) + else: + if args.verbose: + print(msg + f'region={args.region}') + mem = json.load(args.json) + for k, v in mem.items(): + access.write(int(k, 0), int(v, 0), 64, args.region) + + return 0 + + +class mb_read_cmd(): + """Command for mailbox read. Allows multiple read via command + line arguments. The output may be formatted as simple, hex, + or JSON and may be redirected to a file.""" + needs_device = True + + @staticmethod + def add_subparser(subparser): + """Add argparse subparser options.""" + mb_read = subparser.add_parser('mb-read') + mb_read.add_argument('address', type=addr.hex_int, + help='the mailbox address to read') + mb_read.add_argument('-d', '--device', dest='sdevice', + metavar='DEVICE', type=enum_filter, + help=DEVICE_HELP) + mb_read.add_argument('-f', '--feature-id', dest='sfeature_id', + metavar='FEATURE_ID', + type=addr.hex_int, default=None, + help='DFL feature ID') + mb_read.add_argument('-b', '--mailbox-base', type=addr.hex_int, + default=None, help='the CSR offset of the mailbox') + mb_read.add_argument('-c', '--count', type=addr.hex_int, + default=addr.hex_int('1'), + help='the number of addresses to read') + mb_read.add_argument('-t', '--timeout', type=int, default=100, + help='total number of microseconds to wait when polling') + mb_read.add_argument('-s', '--sleep', type=int, default=1, + help='number of microseconds to sleep between each poll') + mb_read.add_argument('-o', '--output', type=FileType('w'), + default=sys.stdout, + help='file to store mb-read output') + mb_read.add_argument('-F', '--format', + choices=['simple', 'hex', 'json'], + default='hex', + help='output format') + + def __call__(self, hndl, args, feature_id): + descr = description_str(hndl, feature_id) + descr = ' ' + descr + ' ' if descr else ' ' + msg = f'mb-read{descr}' + + if args.mailbox_base is None: + print(msg + 'This command needs -b mailbox_base.', end='') + base = mailbox.feature_id_to_mailbox_base(int(feature_id)) + if base is not None: + print(f' For feature_id 0x{feature_id:02x}, try -b 0x{base:04x}') + else: + print() + return 1 + + if int(args.count) <= 0: + print(msg + f'invalid count: {args.count}') + return 1 + + if args.verbose: + print(msg + f'address={args.address} base={args.mailbox_base} ' + f'timeout={args.timeout} sleep={args.sleep}') + + mb_access = mailbox.mailbox_access(args.width, + int(args.mailbox_base), hndl, + poll_to=args.timeout / 1000000, + poll_sleep=args.sleep / 1000000) + + if args.format == 'simple': + address = int(args.address) + for _ in range(int(args.count)): + data = mb_access.read(address, region=args.region) + print(f'{address:08x}: {data:08x}', file=args.output) + address += 4 + elif args.format == 'hex': + hview = hexview.hex_view(mb_access, 32) + hview.render(int(args.address), + int(args.count) * 4, + args.region, args.output) + elif args.format == 'json': + mem = {} + mb_base = args.mailbox_base + address = int(args.address) + for _ in range(int(args.count)): + data = mb_access.read(address, region=args.region) + mem[f'0x{address:08x}'] = f'0x{data:08x}' + address += 4 + mb = { f'0x{mb_base:08x}': mem } + json.dump(mb, args.output, indent=2) + + return 0 + + +class mb_write_cmd(): + """Command for mailbox write. Allows single write via command + line arguments or multiple write via JSON file.""" + needs_device = True + + @staticmethod + def add_subparser(subparser): + """Add argparse subparser options.""" + mb_write = subparser.add_parser('mb-write') + mb_write.add_argument('address', type=addr.hex_int, nargs='?', + help='the mailbox address to write') + mb_write.add_argument('value', type=addr.hex_int, nargs='?', + help='the value to write') + mb_write.add_argument('-d', '--device', dest='sdevice', + metavar='DEVICE', type=enum_filter, + help=DEVICE_HELP) + mb_write.add_argument('-f', '--feature-id', dest='sfeature_id', + metavar='FEATURE_ID', + type=addr.hex_int, default=None, + help='DFL feature ID') + mb_write.add_argument('-b', '--mailbox-base', type=addr.hex_int, + default=None, help='the CSR offset of the mailbox') + mb_write.add_argument('-t', '--timeout', type=int, default=100, + help='total number of microseconds to wait when polling') + mb_write.add_argument('-s', '--sleep', type=int, default=1, + help='number of microseconds to sleep between each poll') + mb_write.add_argument('-j', '--json', type=FileType('r'), + default=None, + help='name of JSON file containing data to write') + + def __call__(self, hndl, args, feature_id): + descr = description_str(hndl, feature_id) + descr = ' ' + descr + ' ' if descr else ' ' + msg = f'mb-write{descr}' + + if args.mailbox_base is None and args.json is None: + print(msg + 'This command requires -b mailbox_base.', end='') + base = mailbox.feature_id_to_mailbox_base(int(feature_id)) + if base is not None: + print(f' For feature_id 0x{feature_id:02x}, try -b 0x{base:04x}') + else: + print() + return 1 + + if args.json is None: + if args.address is None or args.value is None: + print(msg + 'address and value are required parameters.') + return 1 + if args.verbose: + print(msg + f'address={args.address} value={args.value} base={args.mailbox_base} ' + f'timeout={args.timeout} sleep={args.sleep}') + mb_access = mailbox.mailbox_access(args.width, + int(args.mailbox_base), hndl, + poll_to=args.timeout / 1000000, + poll_sleep=args.sleep / 1000000) + mb_access.write(int(args.address), int(args.value), region=args.region) + else: + if args.verbose: + print(msg + f'address={args.address} value={args.value} base={args.mailbox_base} ' + f'timeout={args.timeout} sleep={args.sleep}') + mb = json.load(args.json) + for mb_base in mb.keys(): + mem = mb[mb_base] + mb_access = mailbox.mailbox_access(args.width, + int(mb_base, 0), hndl, + poll_to=args.timeout / 1000000, + poll_sleep=args.sleep / 1000000) + for a in mem.keys(): + address = int(a, 0) + data = int(mem[a], 0) + mb_access.write(address, data, region=args.region) + + return 0 + + +def find_device(args): + """Use args.device and args.feature_id to obtain the + corresponding token.""" + if args.device is None: + args.device = {} + if args.feature_id is not None: + args.device['feature_id'] = args.feature_id + + devices = feature.enumerate(args.width, args.region, **args.device) + if not devices: + undo = addr.undo_device_filter(args.device) + print(f'No device found for filter criteria: {undo}') + sys.exit(1) + + if len(devices) > 1: + print('Found more than one device for filter criteria.') + print('Please try narrowing the search by providing a PCIe address.') + sys.exit(1) + + return devices[0] + + +def main(): + """Application entry point.""" + actions = { + 'lock': lock_cmd, + 'unlock': unlock_cmd, + 'ls': ls_cmd, + 'peek': peek_cmd, + 'poke': poke_cmd, + 'mb-read': mb_read_cmd, + 'mb-write': mb_write_cmd, + } + + parser = ArgumentParser() + + parser.add_argument('-d', '--device', type=enum_filter, + help=DEVICE_HELP) + parser.add_argument('-f', '--feature-id', + type=addr.hex_int, default=None, + help='DFL feature ID') + parser.add_argument('-w', '--width', type=int, choices=[64, 32], + default=64, help='CSR access width') + parser.add_argument('-r', '--region', type=int, + default=0, help='CSR MMIO region') + parser.add_argument('-V', '--verbose', action='store_true', + default=False, help='be verbose with output') + parser.add_argument('-v', '--version', action='version', + version=f"%(prog)s {VERSION}", + help='display version information and exit') + subparser = parser.add_subparsers(dest='which') + + for _, v in actions.items(): + v.add_subparser(subparser) + + args = parser.parse_args() + + # Favor the sub-parser's sdevice and sfeature_id when present. + if hasattr(args, 'sdevice') and args.sdevice is not None: + args.device = args.sdevice + if hasattr(args, 'sfeature_id') and args.sfeature_id is not None: + args.feature_id = args.sfeature_id + + action = None + if hasattr(args, 'which') and args.which: + action = actions[args.which]() + + if not action: + print(f'Please choose an action from {", ".join(actions.keys())}.\n') + parser.print_help() + sys.exit(1) + + # Determine whether a previous session locked a device. + # If so, retrieve the PCIe address of the locked device. + lowner, lfeature_id = get_lock_owner() + + tok = None + feature_id = lfeature_id + pr = isinstance(action, (lock_cmd, unlock_cmd)) or args.verbose + if lowner: + if pr: + print(f'[locked {lowner} 0x{lfeature_id:0x}', end='') + + is_override, tok, feature_id = lock_override_requested( + lowner, lfeature_id, args) + + if is_override and pr: + print(f' [override {addr.tok_or_handle_to_address(tok)} 0x{feature_id:0x}]', end='') + + if pr: + print('] ', end='\n' if isinstance(action, ls_cmd) else '') + + if feature_id is None: + feature_id = args.feature_id + + res = 0 + if action.needs_device: + if not tok: + tok = find_device(args) + with fpga.open(tok, 0) as hndl: + res = action(hndl, args, feature_id) + else: + res = action(args, lowner, lfeature_id) + + sys.exit(res) + +if __name__ == '__main__': + main() diff --git a/libraries/pyopae/pyproject.toml b/libraries/pyopae/pyproject.toml index e53d9f3abf3f..2ad600d2424d 100644 --- a/libraries/pyopae/pyproject.toml +++ b/libraries/pyopae/pyproject.toml @@ -36,6 +36,14 @@ description = "pyopae provides Python bindings around the OPAE C API" license = {text = "BSD-3-Clause"} requires-python = ">=3.6" +[tool.setuptools] +packages = [ +"opae", +"opae.fpga", +"opae.fpga.pcie", +"opae.fpga.tools", +] + [tool.setuptools.package-data] "*" = [ "README.md", @@ -58,5 +66,8 @@ requires-python = ">=3.6" "pysysobject.cpp", ] +[project.scripts] +opae-mem = "opae.fpga.tools.opae_mem:main" + [project.urls] Homepage = "https://opae.github.io" diff --git a/libraries/pyopae/setup.py b/libraries/pyopae/setup.py index c8e7098f94d9..3a596f8fa32f 100644 --- a/libraries/pyopae/setup.py +++ b/libraries/pyopae/setup.py @@ -73,6 +73,11 @@ def override_build_extensions(self): setup( name='opae.fpga', version='@OPAE_VERSION@', - packages=find_namespace_packages(), - ext_modules=extensions + packages=find_namespace_packages(include=['opae.*']), + ext_modules=extensions, + entry_points={ + 'console_scripts': [ + 'opae-mem = opae.fpga.tools.opae_mem:main', + ] + }, ) diff --git a/opae.spec.fedora b/opae.spec.fedora index dcdbe10e7fa2..4d3ee7f08513 100644 --- a/opae.spec.fedora +++ b/opae.spec.fedora @@ -316,6 +316,7 @@ done %{_bindir}/nlb7 %{_bindir}/vabtool %{_bindir}/n5010tool +%{_bindir}/opae-mem %{_usr}/share/opae/* %{python3_sitelib}/ethernet* diff --git a/packaging/opae/deb/opae-devel.install b/packaging/opae/deb/opae-devel.install index 5af694b0a4fe..cc4358c0ad2f 100644 --- a/packaging/opae/deb/opae-devel.install +++ b/packaging/opae/deb/opae-devel.install @@ -58,6 +58,7 @@ usr/bin/nlb3 usr/bin/nlb7 usr/bin/vabtool usr/bin/n5010tool +usr/bin/opae-mem usr/share/opae/* usr/lib/python3/dist-packages/ethernet* usr/lib/python3/dist-packages/hssi_ethernet* From 5307b57fefff795eed304b8b7a51ccd06df77477 Mon Sep 17 00:00:00 2001 From: pl-ravikanth <97520284+pl-ravikanth@users.noreply.github.com> Date: Thu, 14 Dec 2023 23:39:58 +0530 Subject: [PATCH 14/28] samples/cxl_mem_tg: 16GB memory support is added for MEM_TG (#3058) Problem/Feature : In code only 48KB support is available for MEM_TG Fix/Workaround : Extended it to 16GB. Changes made : 1.samples/cxl_mem_tg/cxl_mem_tg.h and samples/cxl_mem_tg/cxl_tg_test.h are updated accordingly. 2. Review comments from Ananda are added in samples/cxl_mem_tg/cxl_tg_test.h. Signed-off-by: Lakshmiravikanth Pammi Co-authored-by: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> --- samples/cxl_mem_tg/cxl_mem_tg.h | 2 +- samples/cxl_mem_tg/cxl_tg_test.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/cxl_mem_tg/cxl_mem_tg.h b/samples/cxl_mem_tg/cxl_mem_tg.h index d6fa2ee84730..03b335d8bdc6 100644 --- a/samples/cxl_mem_tg/cxl_mem_tg.h +++ b/samples/cxl_mem_tg/cxl_mem_tg.h @@ -191,8 +191,8 @@ union tg_mem_size { enum { offset = MEM_SIZE }; uint64_t value; struct { - uint64_t total_mem_size : 32; uint64_t hdm_mem_size : 32; + uint64_t total_mem_size : 32; }; }; diff --git a/samples/cxl_mem_tg/cxl_tg_test.h b/samples/cxl_mem_tg/cxl_tg_test.h index c5d060afd38e..a8b0a557b554 100644 --- a/samples/cxl_mem_tg/cxl_tg_test.h +++ b/samples/cxl_mem_tg/cxl_tg_test.h @@ -283,13 +283,13 @@ class cxl_tg_test : public test_command { tg_mem_size mem_size; mem_size.value = tg_exe_->read64(MEM_SIZE); - uint64_t value = mem_size.total_mem_size; + uint64_t value = mem_size.total_mem_size * MB / CL; tg_exe_->logger_->debug("Total hardware memory size:{}", value); - value = mem_size.hdm_mem_size; + value = mem_size.hdm_mem_size * MB / CL; tg_exe_->logger_->debug("HDM memory size:{0:d}", value); if (mem_size.hdm_mem_size != 0) - tg_exe_->hdm_size_ = mem_size.hdm_mem_size; + tg_exe_->hdm_size_ = (mem_size.total_mem_size - mem_size.hdm_mem_size) * MB / CL; cout << "HDM memory cache line size:" << dec << tg_exe_->hdm_size_ << endl; From 73a492654f231ac675c9e09fbbb7656fdc76421c Mon Sep 17 00:00:00 2001 From: Tim Whisonant Date: Thu, 14 Dec 2023 14:46:44 -0800 Subject: [PATCH 15/28] Fix: define PCI_STD_NUM_BARS when not found (#3061) ### Description A recent checkin began relying on sys/pci.h for its definition of PCI_STD_NUM_BARS. It seems that some distributions don't have a version of that file that defines PCI_STD_NUM_BARS. This change defines it to 6 when not found. ### Collateral (docs, reports, design examples, case IDs): - [ ] Document Update Required? (Specify FIM/AFU/Scripts) ### Tests added: ### Tests run: CI Signed-off-by: Tim Whisonant --- libraries/plugins/uio/dfl.c | 4 ++++ libraries/plugins/vfio/dfl.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/libraries/plugins/uio/dfl.c b/libraries/plugins/uio/dfl.c index 985938500cb8..c6b433602685 100644 --- a/libraries/plugins/uio/dfl.c +++ b/libraries/plugins/uio/dfl.c @@ -31,6 +31,10 @@ #include #include +#ifndef PCI_STD_NUM_BARS +#define PCI_STD_NUM_BARS 6 +#endif // PCI_STD_NUM_BARS + #include "opae_int.h" #include "opae_uio.h" #include "dfl.h" diff --git a/libraries/plugins/vfio/dfl.c b/libraries/plugins/vfio/dfl.c index e0b4a951cf33..70f12f214d23 100644 --- a/libraries/plugins/vfio/dfl.c +++ b/libraries/plugins/vfio/dfl.c @@ -31,6 +31,10 @@ #include #include +#ifndef PCI_STD_NUM_BARS +#define PCI_STD_NUM_BARS 6 +#endif // PCI_STD_NUM_BARS + #include "opae_int.h" #include "opae_vfio.h" #include "dfl.h" From 1d6728074f333e4a54d803ca16709315a9f69be1 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Fri, 15 Dec 2023 15:11:19 -0800 Subject: [PATCH 16/28] fix: remove clx he loopback sub command (#3059) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove clx he loopback sub command CXL host exerciser doesn’t support loopback feature, so remove lpbk sub command Signed-off-by: anandaravuri * Fix: define PCI_STD_NUM_BARS when not found (#3061) ### Description A recent checkin began relying on sys/pci.h for its definition of PCI_STD_NUM_BARS. It seems that some distributions don't have a version of that file that defines PCI_STD_NUM_BARS. This change defines it to 6 when not found. ### Collateral (docs, reports, design examples, case IDs): - [ ] Document Update Required? (Specify FIM/AFU/Scripts) ### Tests added: ### Tests run: CI Signed-off-by: Tim Whisonant --------- Signed-off-by: anandaravuri Signed-off-by: Tim Whisonant Co-authored-by: Tim Whisonant --- .../cxl_he_cache_lpbk_cmd.h | 81 ------------------- .../cxl_host_exerciser/cxl_host_exerciser.cpp | 2 - 2 files changed, 83 deletions(-) delete mode 100644 samples/cxl_host_exerciser/cxl_he_cache_lpbk_cmd.h diff --git a/samples/cxl_host_exerciser/cxl_he_cache_lpbk_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_lpbk_cmd.h deleted file mode 100644 index 82f2dcba91a0..000000000000 --- a/samples/cxl_host_exerciser/cxl_he_cache_lpbk_cmd.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright(c) 2023, Intel Corporation -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -#pragma once - -#include "cxl_host_exerciser.h" -#include "he_cache_test.h" - -namespace host_exerciser { - -class he_cache_lpbk_cmd : public he_cmd { -public: - he_cache_lpbk_cmd() {} - virtual ~he_cache_lpbk_cmd() {} - - virtual const char *name() const override { return "lpbk"; } - - virtual const char *description() const override { - return "run simple cxl he lpbk test"; - } - - virtual const char *afu_id() const override { return HE_CACHE_AFU_ID; } - - virtual uint64_t featureid() const override { return MEM_TG_FEATURE_ID; } - - virtual uint64_t guidl() const override { return MEM_TG_FEATURE_GUIDL; } - - virtual uint64_t guidh() const override { return MEM_TG_FEATURE_GUIDH; } - virtual void add_options(CLI::App *app) override { - // target host or fpga - app->add_option("--target", he_target_, - "host exerciser run on host or fpga") - ->transform(CLI::CheckedTransformer(he_targets)) - ->default_val("host"); - } - - virtual int run(test_afu *afu, CLI::App *app) { - (void)app; - // int ret = 0; - cout << "HE LPBK run" << endl; - host_exe_ = dynamic_cast(afu); - - if (!verify_numa_node()) { - numa_node_ = 0; - cout << "numa nodes are available set numa node to 0" << endl; - }; - - // reset HE cache - he_ctl_.value = 0; - he_ctl_.ResetL = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); - - he_ctl_.value = 0; - he_ctl_.ResetL = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - return 0; - } -}; -} // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp index 8fe4eecfad02..b1281d08d178 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp @@ -28,7 +28,6 @@ #include #include "cxl_he_cache_cmd.h" -#include "cxl_he_cache_lpbk_cmd.h" #include "cxl_host_exerciser.h" void he_sig_handler(int); @@ -37,7 +36,6 @@ int main(int argc, char *argv[]) { host_exerciser::host_exerciser app; app.register_command(); - app.register_command(); // host exerciser signal handler struct sigaction act_new; From 6c1c4bdbc768385b19dde0e2056809c095110611 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Mon, 18 Dec 2023 10:31:21 -0800 Subject: [PATCH 17/28] fix:set continuous mode bit for write hit/miss cases tests (#3062) CXL HE write hit/miss cases, sets continuous mode bit in rd_cfg instead of wr_cfg. set continuous mode bit write hit/miss case tests. Forced stop control register ResetL bit is low causes timeout failures set ResetL too high for forced stop Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index 7deacd09c06d..9e494f1ea199 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -417,7 +417,7 @@ class he_cache_cmd : public he_cmd { // continuous mode if (he_continuousmode_) { - he_rd_cfg_.continuous_mode_enable = 0x1; + he_wr_cfg_.continuous_mode_enable = 0x1; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); @@ -652,7 +652,7 @@ class he_cache_cmd : public he_cmd { // continuous mode if (he_continuousmode_) { - he_rd_cfg_.continuous_mode_enable = 0x1; + he_wr_cfg_.continuous_mode_enable = 0x1; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); @@ -893,7 +893,7 @@ class he_cache_cmd : public he_cmd { // continuous mode if (he_continuousmode_) { - he_rd_cfg_.continuous_mode_enable = 0x1; + he_wr_cfg_.continuous_mode_enable = 0x1; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); @@ -1120,7 +1120,7 @@ class he_cache_cmd : public he_cmd { // continuous mode if (he_continuousmode_) { - he_rd_cfg_.continuous_mode_enable = 0x1; + he_wr_cfg_.continuous_mode_enable = 0x1; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); @@ -1170,6 +1170,7 @@ class he_cache_cmd : public he_cmd { // Force stop test he_ctl_.value = 0; he_ctl_.ForcedTestCmpl = 1; + he_ctl_.ResetL = 1; host_exe_->write64(HE_CTL, he_ctl_.value); if (!he_wait_test_completion()) From 53b6de946ee43a7c8c68ed1414713c4f593a5aa8 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Wed, 20 Dec 2023 10:40:21 -0800 Subject: [PATCH 18/28] fix: remove Host reads and write miss tests (#3064) - Host reads and write miss tests are same as FPGA reads and write miss, so remove from CXL host exerciser. - Changes Host read cache hit opcode to RD_LINE_I Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 236 +----------------- .../cxl_host_exerciser/cxl_host_exerciser.h | 2 - 2 files changed, 1 insertion(+), 237 deletions(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index 9e494f1ea199..e3e384ea601d 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -723,7 +723,7 @@ class he_cache_cmd : public he_cmd { he_rd_cfg_.value = 0; he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; - he_rd_cfg_.opcode = RD_LINE_S; + he_rd_cfg_.opcode = RD_LINE_I; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); // set RD_ADDR_TABLE_CTRL @@ -943,228 +943,6 @@ class he_cache_cmd : public he_cmd { return 0; } - int he_run_host_rd_cache_miss_test() { - - cout << "********** Host LLC Read cache miss test start**********" << endl; - /* - STEPS - 1) Allocate DSM, Read buffer - 2) Flush host read buffer cache - 3) Set read CXL config - 4) Run test (AFU reads from host cache(cache lines are not in host LLC)) - */ - - // HE_INFO - // Set Read number Lines - he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, he_cls_count_); - cout << "Read/write number Lines:" << he_cls_count_ << endl; - cout << "Line Repeat Count:" << he_linerep_count_ << endl; - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; - cout << "loop count:" << he_loop_count_ << endl; - - // set RD_CONFIG - he_rd_cfg_.value = 0; - he_rd_cfg_.line_repeat_count = he_linerep_count_; - he_rd_cfg_.read_traffic_enable = 1; - he_rd_cfg_.opcode = RD_LINE_I; - host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); - - // set RD_ADDR_TABLE_CTR - rd_table_ctl_.value = 0; - rd_table_ctl_.enable_address_stride = 1; - rd_table_ctl_.stride = he_stride_; - host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - - // Allocate DSM buffer - if (!host_exe_->allocate_dsm()) { - cerr << "alloc dsm failed" << endl; - return -1; - } - - // Allocate Read buffer - if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { - cerr << "allocate cache read failed" << endl; - host_exe_->free_dsm(); - return -1; - } - - // continuous mode - if (he_continuousmode_) { - he_rd_cfg_.continuous_mode_enable = 0x1; - host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); - host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - - // Start test - he_start_test(); - - // Continuous mode - he_continuousmode(); - - // performance - he_perf_counters(HE_CXL_RD_LATENCY); - - } else if (he_latency_iterations_ > 0) { - - // Latency loop test - double total_latency = 0; - - rd_table_ctl_.enable_address_stride = 1; - rd_table_ctl_.stride = 1; - - host_exe_->write64(HE_RD_NUM_LINES, 1); - host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); - host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - - for (uint64_t i = 0; i < he_latency_iterations_; i++) { - // Start test - he_start_test(); - - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read(); - host_exe_->free_dsm(); - return -1; - } - - total_latency = total_latency + get_ticks(); - host_exe_->logger_->info("Iteration: {0} Latency: {1:0.3f} nanoseconds", - i, (double)(get_ticks() * LATENCY_FACTOR)); - } //end for loop - - total_latency = total_latency * LATENCY_FACTOR; - host_exe_->logger_->info("Average Latency: {0:0.3f} nanoseconds", - total_latency / he_latency_iterations_); - - } else { - // fpga read cache hit test - host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); - he_rd_cfg_.repeat_read_fsm = he_loop_count_; - host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); - - // Start test - he_start_test(); - - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_read(); - host_exe_->free_dsm(); - return -1; - } - he_perf_counters(HE_CXL_RD_LATENCY); - } - - host_exe_->free_cache_read(); - host_exe_->free_dsm(); - - cout << "********** Ran Host LLC Read cache miss successfully ********** " - << endl; - - cout << "********** Host LLC Read cache miss test end**********" << endl; - return 0; - } - - int he_run_host_wr_cache_miss_test() { - - cout << "********** Host LLC Write cache miss test start**********" << endl; - /* - STEPS - 1) Allocate DSM, write buffer - 2) Flush host write buffer cache - 3) Set write CXL config - 4) Run test (AFU writes host memory (cache lines are not in host LLC)) - */ - - // HE_INFO - // Set write number Lines - he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); - cout << "Write number Lines:" << he_cls_count_ << endl; - cout << "Line Repeat Count:" << he_linerep_count_ << endl; - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; - cout << "loop count:" << he_loop_count_ << endl; - - // set RD_CONFIG - he_wr_cfg_.value = 0; - he_wr_cfg_.line_repeat_count = he_linerep_count_; - he_wr_cfg_.write_traffic_enable = 1; - he_wr_cfg_.opcode = WR_LINE_I; - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); - - // set RD_ADDR_TABLE_CTR - wr_table_ctl_.value = 0; - wr_table_ctl_.enable_address_stride = 1; - wr_table_ctl_.stride = he_stride_; - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, rd_table_ctl_.value); - - // Allocate DSM buffer - if (!host_exe_->allocate_dsm()) { - cerr << "alloc dsm failed" << endl; - return -1; - } - - // Allocate write buffer - if (!host_exe_->allocate_cache_write(BUFFER_SIZE_2MB, numa_node_)) { - cerr << "allocate cache read failed" << endl; - host_exe_->free_dsm(); - return -1; - } - - // continuous mode - if (he_continuousmode_) { - he_wr_cfg_.continuous_mode_enable = 0x1; - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); - host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - - // Start test - he_start_test(); - - // Continuous mode - he_continuousmode(); - - // performance - he_perf_counters(); - - } else { - // fpga Write cache hit test - he_wr_cfg_.repeat_write_fsm = he_loop_count_; - host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); - host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - host_exe_->write64(HE_WR_NUM_LINES, he_cls_count_); - - // Start test - he_start_test(); - - // wait for completion - if (!he_wait_test_completion()) { - he_perf_counters(); - host_exerciser_errors(); - host_exe_->free_cache_write(); - host_exe_->free_dsm(); - return -1; - } - he_perf_counters(); - } - - host_exe_->free_cache_write(); - host_exe_->free_dsm(); - - cout << "********** Ran Host LLC Write cache miss successfully ********** " - << endl; - - cout << "********** Host LLC Write cache miss test end**********" << endl; - return 0; - } - void he_forcetestcmpl() { // Force stop test @@ -1475,8 +1253,6 @@ class he_cache_cmd : public he_cmd { (he_test_ == HE_FPGA_WR_CACHE_MISS) || (he_test_ == HE_HOST_RD_CACHE_HIT) || (he_test_ == HE_HOST_WR_CACHE_HIT) || - (he_test_ == HE_HOST_RD_CACHE_MISS) || - (he_test_ == HE_HOST_WR_CACHE_MISS) || (he_test_ == HE_CACHE_PING_PONG)) && he_target_ == HE_TARGET_BOTH) { @@ -1553,16 +1329,6 @@ class he_cache_cmd : public he_cmd { return ret; } - if (he_test_ == HE_HOST_RD_CACHE_MISS) { - ret = he_run_host_rd_cache_miss_test(); - return ret; - } - - if (he_test_ == HE_HOST_WR_CACHE_MISS) { - ret = he_run_host_wr_cache_miss_test(); - return ret; - } - if (he_test_ == HE_CACHE_RUNNING_POINTER) { ret = he_run_running_pointer_test(); return ret; diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index 303e785b6605..67d1c921cf63 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -339,8 +339,6 @@ const std::map he_test_modes = { {"fpgawrcachemiss", HE_FPGA_WR_CACHE_MISS}, {"hostrdcachehit", HE_HOST_RD_CACHE_HIT}, {"hostwrcachehit", HE_HOST_WR_CACHE_HIT}, - {"hostrdcachemiss", HE_HOST_RD_CACHE_MISS}, - {"hostwrcachemiss", HE_HOST_WR_CACHE_MISS}, {"pingpong", HE_CACHE_PING_PONG}, {"runningpointer", HE_CACHE_RUNNING_POINTER}, }; From 8a6ff0b1ef163c5e07dc089c9e8055a666a533dc Mon Sep 17 00:00:00 2001 From: anandhv Date: Wed, 20 Dec 2023 14:21:27 -0500 Subject: [PATCH 19/28] [Fix] - fpgasupdate -- skip Factory check under certain conditions (#3063) * disallow factory image update if boot page is also factory * Flipped to log.error * comments * cleanup and comments * remove dependency on pacsign. reproduce factory constant locally * fix issue with D5005 and .gbs files --- .../opae/admin/tools/fpgasupdate.py | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/python/opae.admin/opae/admin/tools/fpgasupdate.py b/python/opae.admin/opae/admin/tools/fpgasupdate.py index 8f10b17560e9..11610b64d80a 100755 --- a/python/opae.admin/opae/admin/tools/fpgasupdate.py +++ b/python/opae.admin/opae/admin/tools/fpgasupdate.py @@ -826,22 +826,30 @@ def main(): # for various items, including the boot_page, so we use that here. It simply returns a string # indicating the boot page: fpga_factory, fpga_user1, or fpga_user2 + # But there are 2 conditions where we may skip this check altogether. + # 1. If the boot_page entry is not available + # 2. If blk0 is absent (i.e. the binary is a '.gbs' rather than the output of PACSign) + boot_page = pac.fme.boot_page - if boot_page is None: - LOG.error('Secure update failed. Could not find **/fpga_boot_image sysfs entry.') - sys.exit(1) - - LOG.debug ("Boot page sysfs path: %s\n", boot_page.sysfs_path) - LOG.debug ("Boot page value: %s\n", boot_page.value) - LOG.debug ('Block0 ConType: %s\n', blk0['ConType']) - - # The binary is produced by the PACSign utility. - # CONTENT_FACTORY is the enum that PACSign inserts into the block0 region of - # the binary to indicate that the factory image is targeted. ConType refers to 'content type' - # and indicates if the binary is factoryPR, static region, BMC-related etc. - if ((boot_page.value == 'fpga_factory') and (blk0['ConType'] == CONTENT_FACTORY)): - LOG.error('Secure update failed. Cannot update factory image when current boot-page is also factory.') - sys.exit(1) + + if (boot_page is None) or (blk0 is None): + LOG.debug('Attemping to check if boot-page==factory and flash-target==factory...') + if boot_page is None: + LOG.debug('But could not find **/fpga_boot_image sysfs entry, which tells us the boot-page. Skipping check.') + if blk0 is None: + LOG.debug('But could not find Auth Block0 in the binary, therefore this may be a .gbs binary. Skipping check.') + else: + LOG.debug ("Boot page sysfs path: %s\n", boot_page.sysfs_path) + LOG.debug ("Boot page value: %s\n", boot_page.value) + LOG.debug ('Block0 ConType: %s\n', blk0['ConType']) + + # The binary is produced by the PACSign utility. + # CONTENT_FACTORY is the enum that PACSign inserts into the block0 region of + # the binary to indicate that the factory image is targeted. ConType refers to 'content type' + # and indicates if the binary is factoryPR, static region, BMC-related etc. + if ((boot_page.value == 'fpga_factory') and (blk0['ConType'] == CONTENT_FACTORY)): + LOG.error('Secure update failed. Cannot update factory image when current boot-page is also factory.') + sys.exit(1) LOG.warning('Update starting. Please do not interrupt.') From de1e4478366bfbeb72c39e4ea3a5fd2bd7483032 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Tue, 2 Jan 2024 12:24:19 -0800 Subject: [PATCH 20/28] opae: bump revision to 2.11.0 (#3065) Signed-off-by: anandaravuri --- CMakeLists.txt | 2 +- packaging/opae.admin/version | 2 +- packaging/opae/version | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 352efac5ffa9..5cbee859db85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -793,7 +793,7 @@ endif() set(OPAE_VERSION_LOCAL "" CACHE STRING "OPAE local version") set(OPAE_VERSION_MAJOR 2 CACHE STRING "OPAE major version" FORCE) -set(OPAE_VERSION_MINOR 10 CACHE STRING "OPAE minor version" FORCE) +set(OPAE_VERSION_MINOR 11 CACHE STRING "OPAE minor version" FORCE) set(OPAE_VERSION_REVISION 0${OPAE_VERSION_LOCAL} CACHE STRING "OPAE revision version" FORCE) set(OPAE_VERSION ${OPAE_VERSION_MAJOR}.${OPAE_VERSION_MINOR}.${OPAE_VERSION_REVISION} CACHE STRING "OPAE version" FORCE) diff --git a/packaging/opae.admin/version b/packaging/opae.admin/version index b6382249652c..7932e8bb3670 100755 --- a/packaging/opae.admin/version +++ b/packaging/opae.admin/version @@ -24,5 +24,5 @@ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -PROJECT_VERSION='2.10.0' +PROJECT_VERSION='2.11.0' PROJECT_RELEASE='1' diff --git a/packaging/opae/version b/packaging/opae/version index 088c8136f4a2..5ec09c7c3adc 100755 --- a/packaging/opae/version +++ b/packaging/opae/version @@ -24,5 +24,5 @@ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -declare -r PROJECT_VERSION='2.10.0' +declare -r PROJECT_VERSION='2.11.0' declare -r PROJECT_RELEASE='1' From 58607436752b8936148a113a1daa466d38e9ec4f Mon Sep 17 00:00:00 2001 From: pl-ravikanth <97520284+pl-ravikanth@users.noreply.github.com> Date: Wed, 3 Jan 2024 21:44:33 +0530 Subject: [PATCH 21/28] [Feature]: samples:cxl_hello_fpga : Add support for Hello fpga feature for CMC (#3052) * samples:cxl_hello_fpga:Add hello fpga support for CMC samples/CMakeLists.txt: Added cxl_hello_fpga module for compilation samples:cxl_hello_fpga: Added this folder updates pertaining to Hello FPGA feature for CMC Signed-off-by: Lakshmiravikanth Pammi * samples:cxl_hello_fpga : Add support for Hello fpga feature for CMC Problem/Feature: 1. A new feature, hello fpga, is introduced for CMC 2. CL bytes format is changed by RTL team Fix/Workaround: 1. Updates pertaining to hello fpga feature are newly added 2. Driver is updated as per the change in CL integrity check format. Changes made: New folder named as cxl_hello_fpga along with required files are introduced for supporting hello_fpga feature 1.Review comments shared by Ananda are added. 2.Clang-format is applied for all source files available in cxl_hello_fpga as suggested by Gerlach, Matthew. 3.Integrity check API is updated as per the new CL format introduced by RTL team Signed-off-by: Lakshmiravikanth Pammi --------- Signed-off-by: Lakshmiravikanth Pammi Co-authored-by: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> --- opae.spec.fedora | 1 + packaging/opae/deb/opae-extra-tools.install | 1 + samples/CMakeLists.txt | 1 + samples/cxl_hello_fpga/CMakeLists.txt | 66 ++ samples/cxl_hello_fpga/cxl_he_cmd.h | 313 +++++++ samples/cxl_hello_fpga/cxl_hello_fpga.cpp | 49 ++ samples/cxl_hello_fpga/cxl_hello_fpga.h | 420 ++++++++++ samples/cxl_hello_fpga/cxl_hello_fpga_cmd.h | 271 ++++++ samples/cxl_hello_fpga/he_cache_test.h | 880 ++++++++++++++++++++ 9 files changed, 2002 insertions(+) create mode 100644 samples/cxl_hello_fpga/CMakeLists.txt create mode 100644 samples/cxl_hello_fpga/cxl_he_cmd.h create mode 100644 samples/cxl_hello_fpga/cxl_hello_fpga.cpp create mode 100644 samples/cxl_hello_fpga/cxl_hello_fpga.h create mode 100644 samples/cxl_hello_fpga/cxl_hello_fpga_cmd.h create mode 100644 samples/cxl_hello_fpga/he_cache_test.h diff --git a/opae.spec.fedora b/opae.spec.fedora index 4d3ee7f08513..c3228be1ce39 100644 --- a/opae.spec.fedora +++ b/opae.spec.fedora @@ -357,6 +357,7 @@ done %{_bindir}/ofs.uio %{_bindir}/cxl_mem_tg %{_bindir}/cxl_host_exerciser +%{_bindir}/cxl_hello_fpga %{python3_sitearch}/opae.diag* %{python3_sitearch}/opae/diag* diff --git a/packaging/opae/deb/opae-extra-tools.install b/packaging/opae/deb/opae-extra-tools.install index a363035c3704..c450e9489c94 100644 --- a/packaging/opae/deb/opae-extra-tools.install +++ b/packaging/opae/deb/opae-extra-tools.install @@ -19,6 +19,7 @@ usr/bin/fpga_dma_test usr/bin/host_exerciser usr/bin/cxl_mem_tg usr/bin/cxl_host_exerciser +usr/bin/cxl_hello_fpga usr/bin/bist usr/bin/hps usr/bin/hssi diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index ab942e774b65..edcb78d7cd14 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -71,4 +71,5 @@ opae_add_subdirectory(n5010-test) opae_add_subdirectory(n5010-ctl) opae_add_subdirectory(cxl_mem_tg) opae_add_subdirectory(cxl_host_exerciser) +opae_add_subdirectory(cxl_hello_fpga) diff --git a/samples/cxl_hello_fpga/CMakeLists.txt b/samples/cxl_hello_fpga/CMakeLists.txt new file mode 100644 index 000000000000..052527652a75 --- /dev/null +++ b/samples/cxl_hello_fpga/CMakeLists.txt @@ -0,0 +1,66 @@ +## Copyright(c) 2023, Intel Corporation +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following conditions are met: +## +## * Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimer. +## * Redistributions in binary form must reproduce the above copyright notice, +## this list of conditions and the following disclaimer in the documentation +## and/or other materials provided with the distribution. +## * Neither the name of Intel Corporation nor the names of its contributors +## may be used to endorse or promote products derived from this software +## without specific prior written permission. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +## POSSIBILITY OF SUCH DAMAGE. + +if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) + + if (fmt_LIBRARIES) + # if we found fmt before (from CMakeLists.txt) + # then we need to find it again from this directory + # so we can "import" the fmt::fmt link target + find_package(fmt) + endif (fmt_LIBRARIES) + + opae_add_executable(TARGET cxl_hello_fpga + SOURCE cxl_hello_fpga.cpp + LIBS + opae-cxx-core + opae-c + ${spdlog_LIBRARIES} + ${json-c_LIBRARIES} + ${uuid_LIBRARIES} + ${numa_LIBRARIES} + ${fmt_LIBRARIES} + COMPONENT samplebin + ) + + target_include_directories(cxl_hello_fpga + PRIVATE + ${OPAE_INCLUDE_PATHS} + ${CMAKE_CURRENT_SOURCE_DIR} + ${OPAE_LIB_SOURCE}/plugins/xfpga/ + ${CLI11_INCLUDE_DIRS} + ${numa_INCLUDE_DIRS} + ${spdlog_INCLUDE_DIRS}) + + target_compile_options(cxl_hello_fpga PUBLIC + -Wno-unused-result + ) + + target_compile_definitions(cxl_hello_fpga PUBLIC + ${spdlog_DEFINITIONS} + ) + +endif(OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) diff --git a/samples/cxl_hello_fpga/cxl_he_cmd.h b/samples/cxl_hello_fpga/cxl_he_cmd.h new file mode 100644 index 000000000000..ef6fbd40edb0 --- /dev/null +++ b/samples/cxl_hello_fpga/cxl_he_cmd.h @@ -0,0 +1,313 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once +#include +#include + +#include + +#include "cxl_he_cmd.h" +#include "cxl_hello_fpga.h" +#include "he_cache_test.h" + +#define HE_TEST_STARTED "Test started ......" + +namespace hello_fpga { + +class he_cmd : public test_command { + public: + he_cmd() + : host_exe_(NULL), + he_clock_mhz_(400), + numa_node_(0), + he_target_(0), + he_bias_(0) { + he_ctl_.value = 0; + he_info_.value = 0; + he_rd_cfg_.value = 0; + he_wr_cfg_.value = 0; + rd_table_ctl_.value = 0; + wr_table_ctl_.value = 0; + } + + virtual ~he_cmd() {} + + // Convert number of transactions to bandwidth (GB/s) + double he_num_xfers_to_bw(uint64_t num_lines, uint64_t num_ticks) { + return (double)(num_lines * 64) / ((1000.0 / he_clock_mhz_ * num_ticks)); + } + + void he_perf_counters(he_cxl_latency cxl_latency = HE_CXL_LATENCY_NONE) { + volatile he_cache_dsm_status *dsm_status = NULL; + + dsm_status = reinterpret_cast( + (uint8_t *)(host_exe_->get_dsm())); + if (!dsm_status) return; + + cout << "\n********* DSM Status CSR Start *********" << endl; + cout << "test completed :" << dsm_status->test_completed << endl; + cout << "dsm number:" << dsm_status->dsm_number << endl; + cout << "error vector:" << dsm_status->err_vector << endl; + cout << "num ticks:" << dsm_status->num_ticks << endl; + cout << "num reads:" << dsm_status->num_reads << endl; + cout << "num writes:" << dsm_status->num_writes << endl; + cout << "penalty start:" << dsm_status->penalty_start << endl; + cout << "penalty end:" << dsm_status->penalty_end << endl; + cout << "actual data:" << dsm_status->actual_data << endl; + cout << "expected data:" << dsm_status->expected_data << endl; + + // print bandwidth + if (dsm_status->num_ticks > 0) { + double perf_data = + he_num_xfers_to_bw(dsm_status->num_reads + dsm_status->num_writes, + dsm_status->num_ticks); + host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); + } + + if (cxl_latency == HE_CXL_RD_LATENCY) { + if (dsm_status->num_ticks > 0 && dsm_status->num_reads > 0) { + double latency = + (double)((dsm_status->num_ticks / (double)dsm_status->num_reads) * + (2.5)); + + host_exe_->logger_->info("Read Latency : {0:0.2f} nanoseconds", + latency); + } else { + host_exe_->logger_->info("Read Latency: N/A"); + } + } + + cout << "********* DSM Status CSR end *********" << endl; + } + + void print_csr() { + host_exe_->logger_->debug("HE_DFH:0x{:x}", host_exe_->read64(HE_DFH)); + host_exe_->logger_->debug("HE_ID_L:0x{:x}", host_exe_->read64(HE_ID_L)); + host_exe_->logger_->debug("HE_ID_H:0x{:x}", host_exe_->read64(HE_ID_H)); + + host_exe_->logger_->debug("HE_SCRATCHPAD0:0x{:x}", + host_exe_->read64(HE_SCRATCHPAD0)); + + host_exe_->logger_->debug("HE_DSM_BASE:0x{:x}", + host_exe_->read64(HE_DSM_BASE)); + + host_exe_->logger_->debug("HE_CTL:0x{:x}", host_exe_->read64(HE_CTL)); + + host_exe_->logger_->debug("HE_INFO:0x{:x}", host_exe_->read64(HE_INFO)); + + host_exe_->logger_->debug("HE_WR_NUM_LINES:0x{:x}", + host_exe_->read64(HE_WR_NUM_LINES)); + + host_exe_->logger_->debug("HE_WR_BYTE_ENABLE:0x{:x}", + host_exe_->read64(HE_WR_BYTE_ENABLE)); + + host_exe_->logger_->debug("HE_WR_CONFIG:0x{:x}", + host_exe_->read64(HE_WR_CONFIG)); + + host_exe_->logger_->debug("HE_WR_ADDR_TABLE_CTRL:0x{:x}", + host_exe_->read64(HE_WR_ADDR_TABLE_CTRL)); + + host_exe_->logger_->debug("HE_WR_ADDR_TABLE_DATA:0x{:x}", + host_exe_->read64(HE_WR_ADDR_TABLE_DATA)); + + host_exe_->logger_->debug("HE_RD_NUM_LINES:0x{:x}", + host_exe_->read64(HE_RD_NUM_LINES)); + + host_exe_->logger_->debug("HE_RD_CONFIG:0x{:x}", + host_exe_->read64(HE_RD_CONFIG)); + + host_exe_->logger_->debug("HE_RD_ADDR_TABLE_CTRL:0x{:x}", + host_exe_->read64(HE_RD_ADDR_TABLE_CTRL)); + + host_exe_->logger_->debug("HE_RD_ADDR_TABLE_DATA:0x{:x}", + host_exe_->read64(HE_RD_ADDR_TABLE_DATA)); + + host_exe_->logger_->debug("HE_ERROR_STATUS:0x{:x}", + host_exe_->read64(HE_ERROR_STATUS)); + + host_exe_->logger_->debug("HE_ERROR_EXP_DATA:0x{:x}", + host_exe_->read64(HE_ERROR_EXP_DATA)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA0:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA0)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA1:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA1)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA2:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA2)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA3:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA3)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA4:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA4)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA5:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA5)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA6:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA6)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA7:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA7)); + } + + void host_exerciser_errors() { + he_err_status err_status; + uint64_t err = 0; + if (host_exe_ == NULL) return; + + err_status.value = host_exe_->read64(HE_ERROR_STATUS); + if (err_status.data_error == 1) { + cout << "Data Integrity Check error occured" << endl; + } + + if (err_status.err_index > 0) { + cout << "Error occurred at cache line address:" << err_status.err_index + << endl; + } + + err = host_exe_->read64(HE_ERROR_EXP_DATA); + cout << "Error Expected Data:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA0); + cout << "Error Expected Data0:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA1); + cout << "Error Expected Data1:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA2); + cout << "Error Expected Data2:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA3); + cout << "Error Expected Data3:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA4); + cout << "Error Expected Data4:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA5); + cout << "Error Expected Data5:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA6); + cout << "Error Expected Data6:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA7); + cout << "Error Expected Data7:" << err << endl; + } + + int parse_input_options() { + if (!host_exe_) return -1; + + return 0; + } + + bool he_wait_test_completion(const char *str = HE_TEST_STARTED) { + /* Wait for test completion */ + uint32_t timeout = HE_CACHE_TEST_TIMEOUT; + + cout << str << endl; + volatile uint8_t *status_ptr = host_exe_->get_dsm(); + while (0 == ((*status_ptr) & 0x1)) { + usleep(HE_CACHE_TEST_SLEEP_INVL); + if (--timeout == 0) { + cout << "HE Cache time out error" << endl; + return false; + } + } + return true; + } + + bool he_set_bias_mode() { + // Target memory HOST set BIAS host + if (he_target_ == HE_TARGET_HOST) { + he_ctl_.bias_support = HOSTMEM_BIAS; + // Target memory FPGA set BIAS host/device + if (he_bias_ == HOSTMEM_BIAS) { + he_ctl_.bias_support = HOSTMEM_BIAS; + } else { + cerr << "Wrong BIAS mode for specified target memory type" << endl; + return false; + } + } else { + // Target memory FPGA set BIAS host/device + if (he_bias_ == HOSTMEM_BIAS) { + he_ctl_.bias_support = FPGAMEM_HOST_BIAS; + } else { + he_ctl_.bias_support = FPGAMEM_DEVICE_BIAS; + } + } + + return true; + } + + void he_start_test() { + // start test + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + } + + bool verify_numa_node() { + if (numa_available() < 0) { + cerr << "System does not support NUMA API" << endl; + return false; + } + + int n = numa_max_node(); + cout << "Number nodes on system:" << n + 1 << endl; + + int numa_node = numa_node_of_cpu(sched_getcpu()); + cout << "HE Cache app numa node:" << numa_node << endl; + + if (he_target_ == HE_TARGET_HOST) { + numa_node_ = numa_node; + cout << "HE_TARGET_HOST numa node:" << numa_node_ << endl; + } else { + // find fpga numa node number + numa_node_ = 2; + cout << "HE_TARGET_FPGA numa node:" << numa_node_ << endl; + } + + return true; + } + + protected: + hello_fpga *host_exe_; + uint32_t he_clock_mhz_; + uint32_t numa_node_; + uint32_t he_target_; + uint32_t he_bias_; + + he_ctl he_ctl_; + he_info he_info_; + he_rd_config he_rd_cfg_; + he_wr_config he_wr_cfg_; + he_rd_addr_table_ctrl rd_table_ctl_; + he_wr_addr_table_ctrl wr_table_ctl_; +}; +} // end of namespace hello_fpga diff --git a/samples/cxl_hello_fpga/cxl_hello_fpga.cpp b/samples/cxl_hello_fpga/cxl_hello_fpga.cpp new file mode 100644 index 000000000000..71e5406b8c5b --- /dev/null +++ b/samples/cxl_hello_fpga/cxl_hello_fpga.cpp @@ -0,0 +1,49 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#include "cxl_hello_fpga.h" + +#include + +#include +#include + +#include "cxl_hello_fpga_cmd.h" + +void he_sig_handler(int); + +int main(int argc, char *argv[]) { + hello_fpga::hello_fpga app; + app.register_command(); + + // host exerciser signal handler + struct sigaction act_new; + memset(&act_new, 0, sizeof(act_new)); + + act_new.sa_handler = he_sig_handler; + sigaction(SIGINT, &act_new, NULL); + + return app.main(argc, argv); +} diff --git a/samples/cxl_hello_fpga/cxl_hello_fpga.h b/samples/cxl_hello_fpga/cxl_hello_fpga.h new file mode 100644 index 000000000000..41aaa0412d17 --- /dev/null +++ b/samples/cxl_hello_fpga/cxl_hello_fpga.h @@ -0,0 +1,420 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "he_cache_test.h" + +#define MEM_TG_FEATURE_ID 0x25 +#define MEM_TG_FEATURE_GUIDL 0x81599b5c2ebd4b23 +#define MEM_TG_FEATURE_GUIDH 0x0118e06b1fa349b9 +#define HELLO_FPGA_CL_HEADER 0x36db6db6 +const char *HE_CACHE_AFU_ID = "0118E06B-1FA3-49B9-8159-9b5C2EBD4b23"; + +namespace hello_fpga { + +static const uint64_t HE_CACHE_TEST_TIMEOUT = 30000; +static const uint64_t HE_CACHE_TEST_SLEEP_INVL = 100; +static const uint64_t CL = 64; +static const uint64_t KB = 1024; +static const uint64_t MB = KB * 1024; +static const uint64_t BUFFER_SIZE_2MB = 2 * MB; +static const uint64_t BUFFER_SIZE_32KB = 32 * KB; +static const uint64_t FPGA_32KB_CACHE_LINES = (32 * KB) / 64; +static const uint64_t FPGA_2MB_CACHE_LINES = (2 * MB) / 64; +static const uint64_t FPGA_512CACHE_LINES = 512; +static const uint64_t HELLO_FPGA_NUMCACHE_LINES = 5; + +// Host execiser CSR Offset +enum { + HE_DFH = 0x0000, + HE_ID_L = 0x0008, + HE_ID_H = 0x0010, + HE_DFH_RSVD0 = 0x0018, + HE_DFH_RSVD1 = 0x0020, + HE_SCRATCHPAD0 = 0x028, + HE_DSM_BASE = 0x030, + HE_CTL = 0x038, + HE_INFO = 0x040, + HE_WR_NUM_LINES = 0x048, + HE_WR_BYTE_ENABLE = 0x050, + HE_WR_CONFIG = 0x058, + HE_WR_ADDR_TABLE_CTRL = 0x060, + HE_WR_ADDR_TABLE_DATA = 0x068, + HE_RD_NUM_LINES = 0x070, + HE_RD_CONFIG = 0x078, + HE_RD_ADDR_TABLE_CTRL = 0x080, + HE_RD_ADDR_TABLE_DATA = 0x088, + HE_ERROR_STATUS = 0x090, + HE_ERROR_EXP_DATA = 0x098, + HE_ERROR_ACT_DATA0 = 0x0A0, + HE_ERROR_ACT_DATA1 = 0x0A8, + HE_ERROR_ACT_DATA2 = 0x0B0, + HE_ERROR_ACT_DATA3 = 0x0B8, + HE_ERROR_ACT_DATA4 = 0x0C0, + HE_ERROR_ACT_DATA5 = 0x0C8, + HE_ERROR_ACT_DATA6 = 0x0D0, + HE_ERROR_ACT_DATA7 = 0x0D8, +}; + +// Read Traffic Opcode +typedef enum { + RD_LINE_I = 0x0, + RD_LINE_S = 0x1, + RD_LINE_EM = 0x2, +} he_rd_opcode; + +// Write Traffic Opcode +typedef enum { + WR_LINE_I = 0x0, + WR_LINE_M = 0x1, + WR_PUSH_I = 0x2, + WR_BARRIER_FRNCE = 0x3, + WR_FLUSH_CL = 0x4, + WR_FLUSH_CL_HCOH = 0x5, + WR_FLUSH_CL_DCOH = 0x6, +} he_wr_opcode; + +// DFH Header +union he_dfh { + enum { offset = HE_DFH }; + uint64_t value; + struct { + uint64_t CcipVersionNumber : 12; + uint64_t AfuMajVersion : 4; + uint64_t NextDfhOffset : 24; + uint64_t EOL : 1; + uint64_t Reserved : 19; + uint64_t FeatureType : 4; + }; +}; + +// DSM BASE +union he_dsm_base { + enum { offset = HE_DSM_BASE }; + uint64_t value; + struct { + uint64_t DsmBase : 64; + }; +}; + +// CSR CTL +union he_ctl { + enum { offset = HE_CTL }; + uint64_t value; + struct { + uint64_t ResetL : 1; + uint64_t Start : 1; + uint64_t ForcedTestCmpl : 1; + uint64_t bias_support : 2; + uint64_t Reserved : 59; + }; +}; + +// CSR INFO +union he_info { + enum { offset = HE_INFO }; + uint64_t value; + struct { + uint64_t write_addr_table_size : 4; + uint64_t read_addr_table_size : 4; + uint64_t Reserved : 56; + }; +}; + +// HE_WR_NUM_LINES +union he_wr_num_lines { + enum { offset = HE_WR_NUM_LINES }; + uint64_t value; + struct { + uint64_t write_num_lines : 16; + uint64_t reserved : 48; + }; +}; + +// HE_WR_BYTE_ENABLE +union he_wr_byte_enable { + enum { offset = HE_WR_BYTE_ENABLE }; + uint64_t value; + struct { + uint64_t write_byte_enable : 64; + }; +}; + +// HE_WR_CONFIG +union he_wr_config { + enum { offset = HE_WR_CONFIG }; + uint64_t value; + struct { + uint64_t write_traffic_enable : 1; + uint64_t continuous_mode_enable : 1; + uint64_t waitfor_completion : 1; + uint64_t preread_sync_enable : 1; + uint64_t postread_sync_enable : 1; + uint64_t data_pattern : 2; + uint64_t cl_evict_enable : 1; + uint64_t opcode : 4; + uint64_t line_repeat_count : 8; + uint64_t reserved : 44; + }; +}; + +// HE_WR_ADDR_TABLE_CTRL +union he_wr_addr_table_ctrl { + enum { offset = HE_WR_ADDR_TABLE_CTRL }; + uint64_t value; + struct { + uint64_t enable_address_table : 1; + uint64_t enable_address_stride : 1; + uint64_t stride : 2; + uint64_t reserved : 60; + }; +}; + +// HE_WR_ADDR_TABLE_DATA +union he_wr_addr_table_data { + enum { offset = HE_WR_ADDR_TABLE_DATA }; + uint64_t value; + struct { + uint64_t address_table_value : 64; + }; +}; + +// HE_RD_NUM_LINES +union he_rd_num_lines { + enum { offset = HE_RD_NUM_LINES }; + uint64_t value; + struct { + uint64_t read_num_lines : 16; + uint64_t reserved : 48; + }; +}; + +// HE_RD_CONFIG +union he_rd_config { + enum { offset = HE_RD_CONFIG }; + uint64_t value; + struct { + uint64_t read_traffic_enable : 1; + uint64_t continuous_mode_Enable : 1; + uint64_t waitfor_completion : 1; + uint64_t prewrite_sync_enable : 1; + uint64_t postwrite_sync_enable : 1; + uint64_t data_pattern : 2; + uint64_t cl_evict_enable : 1; + uint64_t opcode : 4; + uint64_t line_repeat_count : 8; + uint64_t reserved : 44; + }; +}; + +// HE_RD_ADDR_TABLE_CTRL +union he_rd_addr_table_ctrl { + enum { offset = HE_RD_ADDR_TABLE_CTRL }; + uint64_t value; + struct { + uint64_t enable_address_table : 1; + uint64_t enable_address_stride : 1; + uint64_t stride : 2; + uint64_t reserved : 60; + }; +}; + +// HE_RD_ADDR_TABLE_DATA +union he_rd_addr_table_data { + enum { offset = HE_RD_ADDR_TABLE_DATA }; + uint64_t value; + struct { + uint64_t address_table_value : 64; + }; +}; + +// ERROR_STATUS +union he_err_status { + enum { offset = HE_ERROR_STATUS }; + uint64_t value; + struct { + uint64_t data_error : 1; + uint64_t rsvd1 : 15; + uint64_t err_index : 16; + uint64_t rsvd2 : 32; + }; +}; + +// HE DSM status +struct he_cache_dsm_status { + uint32_t test_completed : 1; + uint32_t dsm_number : 15; + uint32_t res1 : 16; + uint32_t err_vector : 32; + uint64_t num_ticks : 64; + uint32_t num_reads : 32; + uint32_t num_writes : 32; + uint32_t penalty_start : 32; + uint32_t penalty_end : 32; + uint32_t actual_data : 32; + uint32_t expected_data : 32; + uint32_t res5[2]; +}; + +// configures test mode +typedef enum { + + HE_HELLO_FPGA = 0x0, + +} he_test_mode; + +// configures traget +typedef enum { + HE_TARGET_HOST = 0x0, + HE_TARGET_FPGA = 0x1, +} he_target; + +// he cxl cache latency +typedef enum { + HE_CXL_LATENCY_NONE = 0x0, + HE_CXL_RD_LATENCY = 0x1, + HE_CXL_WR_LATENCY = 0x2, + HE_CXL_RD_WR_LATENCY = 0x3, +} he_cxl_latency; + +const std::map he_test_modes = { + {"hellofpga", HE_HELLO_FPGA}, +}; + +// Bias Support +typedef enum { + HOSTMEM_BIAS = 0x0, + HOST_BIAS_NA = 0x1, + FPGAMEM_HOST_BIAS = 0x2, + FPGAMEM_DEVICE_BIAS = 0x3, +} he_bisa_support; + +const std::map he_targets = { + {"host", HE_TARGET_HOST}, + {"fpga", HE_TARGET_FPGA}, +}; + +// Bias support +const std::map he_bias = { + {"host", HOSTMEM_BIAS}, + {"device", FPGAMEM_DEVICE_BIAS}, +}; + +// he cxl cache device instance +typedef enum { + HE_CXL_DEVICE0 = 0x0, + HE_CXL_DEVICE1 = 0x1, +} he_cxl_dev; + +const std::map he_cxl_device = { + {"/dev/dfl-cxl-cache.0", HE_CXL_DEVICE0}, + {"/dev/dfl-cxl-cache.1", HE_CXL_DEVICE1}, +}; + +// configures test mode +typedef enum { + HE_ADDRTABLE_SIZE4096 = 0xC, + HE_ADDRTABLE_SIZE2048 = 0xB, + HE_ADDRTABLE_SIZE1024 = 0xA, + HE_ADDRTABLE_SIZE512 = 0x9, + HE_ADDRTABLE_SIZE256 = 0x8, + HE_ADDRTABLE_SIZE128 = 0x7, + HE_ADDRTABLE_SIZE64 = 0x6, + HE_ADDRTABLE_SIZE32 = 0x5, + HE_ADDRTABLE_SIZE16 = 0x4, + HE_ADDRTABLE_SIZE8 = 0x3, + HE_ADDRTABLE_SIZE4 = 0x2, + HE_ADDRTABLE_SIZE2 = 0x1, +} he_addrtable_size; + +// he test type +typedef enum { + HE_ENABLE_TRAFFIC_STAGE = 0x0, + HE_SIP_SEQ_STAGE = 0x1, +} he_traffic_enable; + +const std::map traffic_enable = { + {"enable", HE_ENABLE_TRAFFIC_STAGE}, + {"skip", HE_SIP_SEQ_STAGE}, +}; + +std::map addrtable_size = { + {HE_ADDRTABLE_SIZE4096, 4096}, {HE_ADDRTABLE_SIZE2048, 2048}, + {HE_ADDRTABLE_SIZE1024, 1024}, {HE_ADDRTABLE_SIZE512, 512}, + {HE_ADDRTABLE_SIZE256, 256}, {HE_ADDRTABLE_SIZE128, 128}, + {HE_ADDRTABLE_SIZE64, 64}, {HE_ADDRTABLE_SIZE32, 32}, + {HE_ADDRTABLE_SIZE16, 16}, {HE_ADDRTABLE_SIZE8, 8}, + {HE_ADDRTABLE_SIZE4, 4}, {HE_ADDRTABLE_SIZE2, 2}, + +}; + +using test_afu = opae::afu_test::afu; +using test_command = opae::afu_test::command; + +class hello_fpga : public test_afu { + public: + hello_fpga() : test_afu("hello_fpga", nullptr, "info"), count_(1) {} + + virtual int run(CLI::App *app, test_command::ptr_t test) override { + int res = exit_codes::not_run; + + logger_->set_pattern(" %v"); + // Info prints details of an individual run. Turn it on if doing only one + // test and the user hasn't changed level from the default. + if ((log_level_.compare("warning") == 0)) + logger_->set_level(spdlog::level::info); + + logger_->info("starting test run, count of {0:d}", count_); + uint32_t count = 0; + try { + while (count < count_) { + logger_->debug("starting iteration: {0:d}", count + 1); + + res = test_afu::run(app, test); + count++; + logger_->debug("end iteration: {0:d}", count); + if (res) break; + } + } catch (std::exception &ex) { + logger_->error(ex.what()); + res = exit_codes::exception; + } + + auto pass = res == exit_codes::success ? "PASS" : "FAIL"; + logger_->info("Test {}({}): {}", test->name(), count, pass); + spdlog::drop_all(); + return res; + } + + public: + uint32_t count_; + + bool option_passed(std::string option_str) { + if (app_.count(option_str) == 0) return false; + return true; + } +}; +} // namespace hello_fpga diff --git a/samples/cxl_hello_fpga/cxl_hello_fpga_cmd.h b/samples/cxl_hello_fpga/cxl_hello_fpga_cmd.h new file mode 100644 index 000000000000..5da124ba8f0f --- /dev/null +++ b/samples/cxl_hello_fpga/cxl_hello_fpga_cmd.h @@ -0,0 +1,271 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "cxl_he_cmd.h" +#include "cxl_hello_fpga.h" +#include "he_cache_test.h" + +// HE exit global flag +volatile bool g_he_exit = false; + +// host exerciser signal handler +void he_sig_handler(int) { + g_he_exit = true; + cout << "HE signal handler exit app" << endl; +} + +namespace hello_fpga { + +class he_cache_cmd : public he_cmd { + public: + he_cache_cmd() : he_test_(0) {} + + virtual ~he_cache_cmd() {} + + virtual const char *name() const override { return "hellofpga"; } + + virtual const char *description() const override { + return "run simple cxl hello fpga test"; + } + + virtual const char *afu_id() const override { return HE_CACHE_AFU_ID; } + + virtual uint64_t featureid() const override { return MEM_TG_FEATURE_ID; } + + virtual uint64_t guidl() const override { return MEM_TG_FEATURE_GUIDL; } + + virtual uint64_t guidh() const override { return MEM_TG_FEATURE_GUIDH; } + + virtual void add_options(CLI::App *app) override { + // test mode + app->add_option("host exerciser cache test") + ->transform(CLI::CheckedTransformer(he_test_modes)) + ->default_val("hellofpga"); + } + + bool hello_fpga_data_intg_check(uint8_t *buf_address) { + uint8_t *data_buff_addr_ = NULL; + uint16_t hw_CL = 0; + uint32_t cl_idx = 0; + uint8_t idx = 0; + uint32_t header_data_ = 0; + uint64_t hw_phy_addr_ = 0; + volatile uint64_t *act_wr_buff_phy_addr = NULL; + + act_wr_buff_phy_addr = host_exe_->get_write_buff_phy_addr(); + + for (cl_idx = 0; cl_idx < (HELLO_FPGA_NUMCACHE_LINES); cl_idx++) { + data_buff_addr_ = (buf_address + (CL * cl_idx)); + + hw_CL = (((uint16_t)data_buff_addr_[0]) | + ((uint16_t)(data_buff_addr_[1]) << 8)); + + if (hw_CL != cl_idx) { + cout << "\n Cache Line counter mismatch " << endl; + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return false; + } + + header_data_ = (data_buff_addr_[2] | (data_buff_addr_[3] << 8) | + (data_buff_addr_[4] << 16) | ((data_buff_addr_[5] & 0x3F) << 24)); + + if (HELLO_FPGA_CL_HEADER != header_data_) { + cout << "\n Cache Line Header value mismatch - " << header_data_ + << endl; + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return false; + } + + if (0 != ((data_buff_addr_[5] >> 6) & 0x3)) { + cout << "\n Stride value mismatch " << endl; + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return false; + } + + for (idx = 6; idx < 56; idx++) { + if (0 != data_buff_addr_[idx]) { + cout << "\n A non-ZERO value is observed at byte " << idx << endl; + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return false; + } + } + + hw_phy_addr_ = ((uint64_t)(data_buff_addr_[56]) | + ((uint64_t)(data_buff_addr_[57]) << 8) | + ((uint64_t)(data_buff_addr_[58]) << 16) | + ((uint64_t)(data_buff_addr_[59]) << 24) | + ((uint64_t)(data_buff_addr_[60]) << 32) | + ((uint64_t)(data_buff_addr_[61]) << 40) | + ((uint64_t)(data_buff_addr_[62]) << 48) | + ((uint64_t)(data_buff_addr_[63]) << 56)); + + if (*act_wr_buff_phy_addr != hw_phy_addr_) { + cout << "\n Cache Line Physical Address mismatch " << endl; + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return false; + } + + *act_wr_buff_phy_addr = *act_wr_buff_phy_addr + CL; + } + return true; + } + + int he_run_hello_fpga_test() { + cout << "Hello FPGA Start" << endl; + + uint8_t *data_buff_addr = NULL; + + /* + STEPS + 1) Allocate DSM, Read buffer, Write buffer + 2) Write number of lines more then 32 kb 2mb/64 + 3) Set WR ItoMWr (CXL) config + 4) Run test ( Buffer is not present in FPGA - FPGA write Cache miss ) + */ + + /* + HE_INFO + Set Read number Lines + */ + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_WR_NUM_LINES, HELLO_FPGA_NUMCACHE_LINES); + + cout << "Read/write number Lines:" << HELLO_FPGA_NUMCACHE_LINES << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + /* + * set W_CONFIG + */ + he_wr_cfg_.value = 0; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_LINE_M; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + /* + * Set WR_ADDR_TABLE_CTRL + */ + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + /* + * Allocate DSM buffer + */ + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + /* + * Allocate Read, Write buffer + */ + if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + /* + * Obtain Write address + */ + data_buff_addr = host_exe_->get_read_write(); + + memset(data_buff_addr, 0xFF, BUFFER_SIZE_2MB); + + /* + * Start test + */ + he_start_test(); + + cout << "AFU Configuration : Successful" << endl; + /* + * wait for completion + */ + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + if (!hello_fpga_data_intg_check(data_buff_addr)) { + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + cout << "DATA Integrity Check : Failed" << endl; + return -1; + } + cout << "DATA Integrity Check : Successful" << endl; + + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + + cout << "Hello FPGA Executed Successfully" << endl; + return 0; + } + + virtual int run(test_afu *afu, CLI::App *app) { + (void)app; + int ret = 0; + + host_exe_ = dynamic_cast(afu); + + if (!verify_numa_node()) { + numa_node_ = 0; + cout << "numa nodes are available set numa node to 0" << endl; + }; + + // reset HE cache + he_ctl_.value = 0; + he_ctl_.ResetL = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + he_ctl_.ResetL = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + + print_csr(); + + if (he_test_ == HE_HELLO_FPGA) { + ret = he_run_hello_fpga_test(); + return ret; + } + + return 0; + } + + protected: + uint32_t he_test_; +}; + +} // end of namespace hello_fpga diff --git a/samples/cxl_hello_fpga/he_cache_test.h b/samples/cxl_hello_fpga/he_cache_test.h new file mode 100644 index 000000000000..52699c1b7891 --- /dev/null +++ b/samples/cxl_hello_fpga/he_cache_test.h @@ -0,0 +1,880 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fpga-dfl.h" + +using namespace std; + +const char *sbdf_pattern = + "(([0-9a-fA-F]{4}):)?([0-9a-fA-F]{2}):([0-9a-fA-F]{2})\\.([0-9])"; + +enum { MATCHES_SIZE = 6 }; +#define FEATURE_DEV \ + "/sys/bus/pci/devices/%s/" \ + "fpga_region/region*/dfl-fme*/dfl_dev*/feature_id" + +#define MAX_SIZE 256 +#define MAX_HE_CACHE_DEVICE 2 + +#define PROTECTION (PROT_READ | PROT_WRITE) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif +#ifndef MAP_HUGE_SHIFT +#define MAP_HUGE_SHIFT 26 +#endif + +#define MAP_2M_HUGEPAGE (0x15 << MAP_HUGE_SHIFT) /* 2 ^ 0x15 = 2M */ +#define MAP_1G_HUGEPAGE (0x1e << MAP_HUGE_SHIFT) /* 2 ^ 0x1e = 1G */ + +#ifdef __ia64__ +#define ADDR ((void *)(0x8000000000000000UL)) +#define FLAGS_4K (MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED) +#define FLAGS_2M (FLAGS_4K | MAP_2M_HUGEPAGE | MAP_HUGETLB) +#define FLAGS_1G (FLAGS_4K | MAP_1G_HUGEPAGE | MAP_HUGETLB) +#else +#define ADDR ((void *)(0x0UL)) +#define FLAGS_4K (MAP_PRIVATE | MAP_ANONYMOUS) +#define FLAGS_2M (FLAGS_4K | MAP_2M_HUGEPAGE | MAP_HUGETLB) +#define FLAGS_1G (FLAGS_4K | MAP_1G_HUGEPAGE | MAP_HUGETLB) +#endif + +#define KiB(x) ((x)*1024) +#define MiB(x) ((x)*1024 * 1024) +#define GiB(x) ((x)*1024 * 1024 * 1024) + +#define DFL_CXL_CACHE_DSM_BASE 0x030 +#define DFL_CXL_CACHE_WR_ADDR_TABLE_DATA 0x068 +#define DFL_CXL_CACHE_RD_ADDR_TABLE_DATA 0x088 + +// buffer access type +typedef enum { + HE_CACHE_DMA_MMAP_RW = 0x0, + HE_CACHE_DMA_MMAP_R = 0x1, +} he_mmap_access; + +bool buffer_allocate(void **addr, uint64_t len, uint32_t numa_node) { + void *addr_local = NULL; + int i = 0; + long status = 0; + unsigned long mask[4]; + unsigned int bits_per_UL = sizeof(unsigned long) * 8; + + for (i = 0; i < 4; i++) mask[i] = 0; + mask[numa_node / bits_per_UL] |= 1UL << (numa_node % bits_per_UL); + + if (len > MiB(2)) + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_1G, 0, 0); + else if (len > KiB(4)) + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_2M, 0, 0); + else + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_4K, 0, 0); + + if (addr_local == MAP_FAILED) { + if (errno == ENOMEM) { + if (len > MiB(2)) + cerr << "Could not allocate buffer (no free 1 " + "GiB huge pages)"; + if (len > KiB(4)) + cerr << "Could not allocate buffer (no free 2 " + "MiB huge pages)"; + else + cerr << "Could not allocate buffer (out of " + "memory)"; + return false; + } + cerr << "CXL cache mmap failed:" << strerror(errno) << endl; + return false; + } + + if (addr_local == NULL) { + cerr << "Unable to mmap" << endl; + return false; + } + + status = syscall(__NR_mbind, addr_local, len, 2, &mask, numa_node + 2, 1); + if (status != 0) { + cerr << "buffer_allocate(): unable to mbind:" << strerror(errno) << endl; + return false; + } + + *addr = addr_local; + return true; +} + +bool buffer_release(void *addr, uint64_t len) { + if (munmap(addr, len)) { + cerr << "CXL cache unmap failed:", strerror(errno); + return false; + } + return true; +} + +bool sysfs_read_u64(const char *path, uint64_t *value) { + ifstream fs; + fs.open(path, ios::in); + + std::string s; + if (fs.is_open()) { + std::string line; + std::getline(fs, line); + *value = std::stoul(line, 0, 16); + fs.close(); + return true; + } + return false; +} + +namespace opae { +namespace afu_test { + +template +inline bool parse_match_int(const char *s, regmatch_t m, T &v, int radix = 10) { + if (m.rm_so == -1 || m.rm_eo == -1) return false; + errno = 0; + v = std::strtoul(s + m.rm_so, NULL, radix); + return errno == 0; +} + +union pcie_address { + struct { + uint32_t function : 3; + uint32_t device : 5; + uint32_t bus : 8; + uint32_t domain : 16; + } fields; + uint32_t value; + + static pcie_address parse(const char *s) { + auto deleter = [&](regex_t *r) { + regfree(r); + delete r; + }; + std::unique_ptr re(new regex_t, deleter); + regmatch_t matches[MATCHES_SIZE]; + + int reg_res = regcomp(re.get(), sbdf_pattern, REG_EXTENDED | REG_ICASE); + if (reg_res) throw std::runtime_error("could not compile regex"); + + reg_res = regexec(re.get(), s, MATCHES_SIZE, matches, 0); + if (reg_res) throw std::runtime_error("pcie address not valid format"); + + uint16_t domain, bus, device, function; + if (!parse_match_int(s, matches[2], domain, 16)) domain = 0; + if (!parse_match_int(s, matches[3], bus, 16)) + throw std::runtime_error("error parsing pcie address"); + if (!parse_match_int(s, matches[4], device, 16)) + throw std::runtime_error("error parsing pcie address"); + if (!parse_match_int(s, matches[5], function)) + throw std::runtime_error("error parsing; pcie address"); + pcie_address a; + a.fields.domain = domain; + a.fields.bus = bus; + a.fields.device = device; + a.fields.function = function; + return a; + } +}; + +class afu; // forward declaration + +class command { + public: + typedef std::shared_ptr ptr_t; + command() : running_(true) {} + virtual ~command() {} + virtual const char *name() const = 0; + virtual const char *description() const = 0; + virtual int run(afu *afu, CLI::App *app) = 0; + virtual void add_options(CLI::App *app) { (void)app; } + virtual const char *afu_id() const { return nullptr; } + + virtual uint64_t featureid() const = 0; + virtual uint64_t guidl() const = 0; + virtual uint64_t guidh() const = 0; + + bool running() const { return running_; } + void stop() { running_ = false; } + + private: + std::atomic running_; +}; + +#if SPDLOG_VERSION >= 10900 +// spdlog version 1.9.0 defines SPDLOG_LEVEL_NAMES as an array of string_view_t. +// Convert to vector of std::string to be used in CLI::IsMember(). +inline std::vector spdlog_levels() { + std::vector levels_view = SPDLOG_LEVEL_NAMES; + std::vector levels_str(levels_view.size()); + std::transform(levels_view.begin(), levels_view.end(), levels_str.begin(), + [](spdlog::string_view_t sv) { + return std::string(sv.data(), sv.size()); + }); + return levels_str; +} +#else +inline std::vector spdlog_levels() { return SPDLOG_LEVEL_NAMES; } +#endif // SPDLOG_VERSION + +class afu { + public: + typedef int (*command_fn)(afu *afu, CLI::App *app); + enum exit_codes { + success = 0, + not_run, + not_found, + no_access, + exception, + error + }; + + afu(const char *name, const char *afu_id = nullptr, + const char *log_level = nullptr) + : name_(name), + afu_id_(afu_id ? afu_id : ""), + app_(name_), + pci_addr_(""), + log_level_(log_level ? log_level : "info"), + timeout_msec_(60000), + current_command_(nullptr), + dma_mmap_access_(HE_CACHE_DMA_MMAP_RW) { + if (!afu_id_.empty()) + app_.add_option("-g,--guid", afu_id_, "GUID")->default_str(afu_id_); + app_.add_option("-p,--pci-address", pci_addr_, + "[:]:."); + app_.add_option("-l,--log-level", log_level_, "stdout logging level") + ->default_str(log_level_) + ->check(CLI::IsMember(spdlog_levels())); + app_.add_option("-t,--timeout", timeout_msec_, "test timeout (msec)") + ->default_str(std::to_string(timeout_msec_)); + } + virtual ~afu() { + if (fd_ > 0) close(fd_); + if (logger_) spdlog::drop(logger_->name()); + } + + CLI::App &cli() { return app_; } + + int find_dev_feature() { + glob_t pglob; + glob_t dev_pglob; + char feature_path[MAX_SIZE] = {0}; + int gres = 0; + uint64_t value = 0; + size_t i = 0; + size_t dev_index = 0; + + if (!pci_addr_.empty()) { + if (snprintf(feature_path, sizeof(feature_path), FEATURE_DEV, + pci_addr_.c_str()) < 0) { + cerr << "snprintf buffer overflow" << endl; + return 1; + } + } else { + if (snprintf(feature_path, sizeof(feature_path), FEATURE_DEV, "*:*:*.*") < + 0) { + cerr << "snprintf buffer overflow" << endl; + return 2; + } + } + + gres = glob(feature_path, 0, NULL, &pglob); + if (gres) { + cerr << "Failed pattern match" << feature_path << ":" << strerror(errno) + << endl; + globfree(&pglob); + return 3; + } + + for (i = 0; i < pglob.gl_pathc; i++) { + bool retval = sysfs_read_u64(pglob.gl_pathv[i], &value); + if (!retval) { + cerr << "Failed to read sysfs value" << endl; + continue; + } + + if (current_command()->featureid() == value) { + string str(pglob.gl_pathv[i]); + string substr_dev(str.substr(0, str.rfind("/"))); + + substr_dev.append("/dfl-cxl-cache/dfl-cxl-cache*"); + gres = glob(substr_dev.c_str(), GLOB_NOSORT, NULL, &dev_pglob); + if (gres) { + cerr << "Failed pattern match" << substr_dev.c_str() << ":" + << strerror(errno) << endl; + globfree(&dev_pglob); + return 4; + } + + string str1(dev_pglob.gl_pathv[0]); + globfree(&dev_pglob); + dev_path_[dev_index].append("/dev"); + dev_path_[dev_index].append(str1.substr(str1.rfind("/"), 16)); + dev_index++; + } + } + + if (pglob.gl_pathv) { + globfree(&pglob); + } + + if (dev_index > 0) { + return 0; + } + + return 5; + } + + void unmap_mmio() { + if (mmio_base_) { + if (munmap(mmio_base_, rinfo_.size) == -1) + cerr << "Failed to unmap MMIO:" << strerror(errno) << endl; + } + } + + bool map_mmio() { + void *user_v; + user_v = mmap(NULL, rinfo_.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, + rinfo_.offset); + if (user_v == MAP_FAILED) { + cerr << "Failed to map MMIO:" << strerror(errno) << endl; + return false; + } + mmio_base_ = (uint8_t *)user_v; + + return true; + } + + int open_handle(const char *dev) { + int res = 0; + logger_->debug("CXL device:{0}", dev); + + fd_ = open(dev, O_RDWR); + if (fd_ < 0) { + cerr << "open() failed:" << strerror(errno) << endl; + return 1; + } + + memset(&rinfo_, 0, sizeof(rinfo_)); + rinfo_.argsz = sizeof(rinfo_); + res = ioctl(fd_, DFL_CXL_CACHE_GET_REGION_INFO, &rinfo_); + if (res) { + cerr << "ioctl() DFL_CXL_CACHE_GET_REGION_INFO failed:" << strerror(errno) + << endl; + close(fd_); + return 2; + } + logger_->debug("MMIO region flags:0x:{0:x} size:0x {1:x} offset:0x {2:x}", + rinfo_.flags, rinfo_.size, rinfo_.offset); + + if (!map_mmio()) { + cerr << "mmap failed:" << strerror(errno) << endl; + close(fd_); + return 3; + } + + volatile uint64_t *u64 = (volatile uint64_t *)mmio_base_; + logger_->debug("DFH : 0x:{0:X}", *u64); + logger_->debug("DFH + 8 : 0x:{0:X}", *(u64 + 1)); + logger_->debug("DFH + 16: 0x:{0:X}", *(u64 + 2)); + logger_->debug("DFH + 24: 0x:{0:X}", *(u64 + 3)); + + return exit_codes::not_run; + } + + int main(int argc, char *argv[]) { + if (!commands_.empty()) app_.require_subcommand(); + CLI11_PARSE(app_, argc, argv); + + command::ptr_t test(nullptr); + CLI::App *app = nullptr; + for (auto kv : commands_) { + if (*kv.first) { + app = kv.first; + test = kv.second; + break; + } + } + if (!test) { + std::cerr << "no command specified\n"; + return exit_codes::not_run; + } + + auto console_sink = std::make_shared(); + logger_ = std::make_shared(test->name(), console_sink); + spdlog::register_logger(logger_); + logger_->set_level(spdlog::level::from_str(log_level_)); + current_command_ = test; + if (find_dev_feature() != 0) { + cerr << "Failed to find feature" << endl; + return exit_codes::exception; + }; + + int dev_index = 0; + CLI::Option *opt = app->get_option_no_throw("--device"); + if (opt && opt->count() == 1) { + dev_index = stoi(opt->results().at(0)); + } + + int res = open_handle(dev_path_[dev_index].c_str()); + if (res != exit_codes::not_run) { + return res; + } + + return run(app, test); + } + + virtual int run(CLI::App *app, command::ptr_t test) { + int res = exit_codes::not_run; + current_command_ = test; + + try { + std::future f = std::async(std::launch::async, [this, test, app]() { + return test->run(this, app); + }); + auto status = f.wait_for(std::chrono::milliseconds(timeout_msec_)); + if (status == std::future_status::timeout) { + std::cerr << "Error: test timed out" << std::endl; + current_command_->stop(); + throw std::runtime_error("timeout"); + } + res = f.get(); + } catch (std::exception &ex) { + res = exit_codes::exception; + } + + current_command_.reset(); + return res; + } + + template + CLI::App *register_command() { + command::ptr_t cmd(new T()); + auto sub = app_.add_subcommand(cmd->name(), cmd->description()); + cmd->add_options(sub); + commands_[sub] = cmd; + return sub; + } + + uint64_t read64(uint32_t offset) { + uint64_t value = *((uint64_t *)(mmio_base_ + offset)); + return value; + } + + void write64(uint32_t offset, uint64_t value) { + *((uint64_t *)(mmio_base_ + offset)) = value; + return; + } + + uint32_t read32(uint32_t offset) { + uint32_t value = *((uint64_t *)(mmio_base_ + offset)); + return value; + } + + void write32(uint32_t offset, uint32_t value) { + *((uint32_t *)(mmio_base_ + offset)) = value; + return; + } + + command::ptr_t current_command() const { return current_command_; } + + bool allocate_dsm(size_t len = KiB(4), uint32_t numa_node = 0) { + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Failed to allocate 4k huge page:" << strerror(errno) << endl; + return false; + } + + cout << "DSM buffer numa node: " << numa_node << endl; + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; + + logger_->debug( + "Allocate DSM buffer user addr 0x:{0:x} length :" + "{1:d}", + dma_map.user_addr, dma_map.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + logger_->debug("DSM_BASE : 0x:{0:x}", *u64); + + dsm_buffer_ = (uint8_t *)ptr; + dsm_buf_len_ = len; + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_dsm() { + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)dsm_buffer_; + dma_unmap.length = dsm_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; + + logger_->debug("free dsm user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + logger_->debug("DSM_BASE : 0x:{0:x}", *u64); + buffer_release(dsm_buffer_, dsm_buf_len_); + return true; + } + + void reset_dsm() { memset(dsm_buffer_, 0, dsm_buf_len_); } + + bool allocate_cache_read(size_t len = MiB(2), uint32_t numa_node = 0) { + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Failed to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } + cout << "Read buffer numa node: " << numa_node << endl; + + dma_map.argsz = sizeof(dma_map); + if (dma_mmap_access_ == HE_CACHE_DMA_MMAP_RW) + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + + logger_->debug( + "Allocate read buffer user addr 0x:{0:x} length :" + "{1:d}", + dma_map.user_addr, dma_map.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + sleep(1); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + logger_->debug("DFL_CXL_CACHE_RD_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + rd_buffer_ = (uint8_t *)ptr; + rd_buf_len_ = len; + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_cache_read() { + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)rd_buffer_; + dma_unmap.length = rd_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + + logger_->debug("free read user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + logger_->debug("DFL_CXL_CACHE_RD_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + buffer_release(rd_buffer_, rd_buf_len_); + return true; + } + + bool allocate_cache_write(size_t len = MiB(2), uint32_t numa_node = 0) { + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Failed to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } + + cout << "Write buffer numa node: " << numa_node << endl; + dma_map.argsz = sizeof(dma_map); + if (dma_mmap_access_ == HE_CACHE_DMA_MMAP_RW) + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug( + "Allocate write buffer user addr 0x:{0:x}\ + length : {1:d}", + dma_map.user_addr, dma_map.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + wr_buffer_ = (uint8_t *)ptr; + wr_buf_len_ = len; + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_cache_write() { + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)wr_buffer_; + dma_unmap.length = wr_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug("free write user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + buffer_release(wr_buffer_, wr_buf_len_); + return true; + } + + bool allocate_cache_read_write(size_t len = MiB(2), uint32_t numa_node = 0) { + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Failed to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } + cout << "Read/Write buffer numa node: " << numa_node << endl; + + dma_map.argsz = sizeof(dma_map); + if (dma_mmap_access_ == HE_CACHE_DMA_MMAP_RW) + dma_map.flags = DFL_CXL_BUFFER_MAP_WRITABLE; + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + dma_map.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug( + "Allocate read/write buffer user addr 0x:{0:x}\ + length : {1:d}", + dma_map.user_addr, dma_map.length); + + volatile uint64_t *u64_wr = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + volatile uint64_t *u64_rd = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); + + wr_buffer_phy_addr_ = (volatile uint64_t *)u64_wr; + rd_wr_buffer_ = (uint8_t *)ptr; + rd_wr_buf_len_ = len; + + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_cache_read_write() { + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)rd_wr_buffer_; + dma_unmap.length = rd_wr_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + dma_unmap.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug("free read/write user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64_wr = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + volatile uint64_t *u64_rd = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); + + buffer_release(rd_wr_buffer_, rd_wr_buf_len_); + rd_wr_buffer_ = NULL; + return true; + } + + uint8_t *get_dsm() const { return dsm_buffer_; } + + uint8_t *get_read() const { return rd_buffer_; } + + uint8_t *get_write() const { return wr_buffer_; } + + uint8_t *get_read_write() const { return rd_wr_buffer_; } + + void set_mmap_access(he_mmap_access access = HE_CACHE_DMA_MMAP_RW) + { dma_mmap_access_ = access; } + + volatile uint64_t *get_write_buff_phy_addr() const { + return wr_buffer_phy_addr_; + } + + protected: + std::string name_; + std::string afu_id_; + CLI::App app_; + std::string pci_addr_; + std::string log_level_; + uint32_t timeout_msec_; + + int fd_; + uint8_t *mmio_base_; + uint64_t mmio_len_; + + uint8_t *dsm_buffer_; + uint64_t dsm_buf_len_; + + uint8_t *rd_buffer_; + uint64_t rd_buf_len_; + + uint8_t *wr_buffer_; + uint64_t wr_buf_len_; + + uint8_t *rd_wr_buffer_; + uint64_t rd_wr_buf_len_; + + volatile uint64_t *wr_buffer_phy_addr_; + + struct dfl_cxl_cache_region_info rinfo_; + + std::string dev_path_[MAX_HE_CACHE_DEVICE]; + + command::ptr_t current_command_; + std::map commands_; + + he_mmap_access dma_mmap_access_; + + public: + std::shared_ptr logger_; +}; + +} // end of namespace afu_test +} // end of namespace opae From cf87f88edea1e41851160ac1468933feb9d4a550 Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Fri, 5 Jan 2024 09:31:12 -0800 Subject: [PATCH 22/28] fix: CXL host exerciser segmentation fault non-root user mode (#3067) -CXL host exerciser segmentation fault occurs in non-root user mode, exit application if clx device open fails. Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/he_cache_test.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 34177288fd3c..b737800a83a6 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -435,7 +435,7 @@ class afu { logger_->debug("DFH + 16: 0x:{0:X}", *(u64 + 2)); logger_->debug("DFH + 24: 0x:{0:X}", *(u64 + 3)); - return exit_codes::not_run; + return exit_codes::success; } int main(int argc, char *argv[]) { @@ -474,7 +474,8 @@ class afu { } int res = open_handle(dev_path_[dev_index].c_str()); - if (res != exit_codes::not_run) { + if (res != exit_codes::success) { + cerr << "Failed to open cxl device" << endl; return res; } From 19841375d2a516c6ead52cbef942c7c26dfffc5e Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Fri, 5 Jan 2024 09:46:07 -0800 Subject: [PATCH 23/28] fix: CXL host exerciser running pointer test output wrong string (#3068) -CXL host exerciser running pointer tests show wrong "data ping pong tests". Change to "Running pointer test started ......" Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index e3e384ea601d..e2439483d7e5 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -1096,7 +1096,7 @@ class he_cache_cmd : public he_cmd { host_exe_->write64(HE_RD_ADDR_TABLE_DATA, phy_ptr); // start test - he_start_test(HE_PING_PONG,RUNNING_POINTER); + he_start_test(HE_RUNNING_POINTER,RUNNING_POINTER); // wait for completion if (!he_wait_test_completion()) { From 4d757b6dac2a61059c94835a9a5d68a90581d473 Mon Sep 17 00:00:00 2001 From: Michael Adler Date: Tue, 9 Jan 2024 17:31:39 -0500 Subject: [PATCH 24/28] [Feature] - Add parent/child AFU management to the libopae-c shell (#3072) Multi-ported AFUs have a parent AFU that names child AFUs by GUID, encoded in the first entry of a v1 AFU feature list. - Detect AFUs with children in fpgaOpen() and open all children, associating all open ports with the returned handle. - When a buffer is pinned in the parent, also pin the buffer at the same IOVA in all children. - Add fpgaGetChildren() in order to expose child handles for MMIO and interrupts. --- include/opae/access.h | 30 +++ libraries/libopae-c/CMakeLists.txt | 1 + libraries/libopae-c/adapter.h | 14 ++ libraries/libopae-c/api-shell.c | 115 +++++++++- libraries/libopae-c/multi-port-afu.c | 304 +++++++++++++++++++++++++++ libraries/libopae-c/multi-port-afu.h | 45 ++++ libraries/libopae-c/opae_int.h | 7 + 7 files changed, 509 insertions(+), 7 deletions(-) create mode 100644 libraries/libopae-c/multi-port-afu.c create mode 100644 libraries/libopae-c/multi-port-afu.h diff --git a/include/opae/access.h b/include/opae/access.h index 6518ccc8f0f7..174590d05f8a 100644 --- a/include/opae/access.h +++ b/include/opae/access.h @@ -72,6 +72,36 @@ extern "C" { fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags); +/** + * Extract the handles of children of a previously fpgaOpen()ed resource. + * Only AFUs with feature parameters that name child AFU GUIDs will have + * children. + * + * Child AFU handles may be used to connect to child-specific MMIO regions + * and manage interrupts. Children will be closed automatically along with + * the parent. fpgaClose() should not be called on returned child handles. + * + * Child handles may not be passed to fpgaPrepareBuffer(). All shared + * memory management must be associated with the parent. + * + * @param[in] handle Handle to previously opened FPGA object + * @param[in] max_children Maximum number of handles that may be returned + * in the `children` array. + * @param[out] children Pointer to an array of child handles currently + * open. When NULL or `max_children` is 0, the + * number of children will still be returned in + * `num_children`. + * @param[out] num_children Number of children belonging to the parent. + * This number may be higher than `max_children`. + * @returns FPGA_OK on success. + * FPGA_INVALID_PARAM if handle does not refer to + * an acquired resource, or if handle is NULL. + * FPGA_EXCEPTION if an internal error occurred + * while accessing the handle. + */ +fpga_result fpgaGetChildren(fpga_handle handle, uint32_t max_children, + fpga_handle *children, uint32_t *num_children); + /** * Close a previously opened FPGA object * diff --git a/libraries/libopae-c/CMakeLists.txt b/libraries/libopae-c/CMakeLists.txt index 67cb8e96eb45..218bb091b6f1 100644 --- a/libraries/libopae-c/CMakeLists.txt +++ b/libraries/libopae-c/CMakeLists.txt @@ -29,6 +29,7 @@ set(SRC api-shell.c init.c props.c + multi-port-afu.c cfg-file.c fpgad-cfg.c fpgainfo-cfg.c diff --git a/libraries/libopae-c/adapter.h b/libraries/libopae-c/adapter.h index e010a484d394..dcdb506531b2 100644 --- a/libraries/libopae-c/adapter.h +++ b/libraries/libopae-c/adapter.h @@ -102,8 +102,22 @@ typedef struct _opae_api_adapter_table { fpga_result (*fpgaGetIOAddress)(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr); + fpga_result (*fpgaBindSVA)(fpga_handle handle, uint32_t *pasid); + // Internal methods between shell and plugin to pin/unpin an existing + // buffer at a specific ioaddr. Used when managing the same address + // space on parent and child AFU ports, all opened by the same process. + fpga_result (*fpgaPinBuffer)(fpga_handle handle, void *buf_addr, + uint64_t len, uint64_t ioaddr); + fpga_result (*fpgaUnpinBuffer)(fpga_handle handle, void *buf_addr, + uint64_t len, uint64_t ioaddr); + + // Internal method to extract details of a workspace. + fpga_result (*fpgaGetWSInfo)(fpga_handle handle, uint64_t wsid, + uint64_t *ioaddr, + void **buf_addr, uint64_t *len); + /* ** fpga_result (*fpgaGetOPAECVersion)(fpga_version *version); ** diff --git a/libraries/libopae-c/api-shell.c b/libraries/libopae-c/api-shell.c index f54a84452de2..4524df4158eb 100644 --- a/libraries/libopae-c/api-shell.c +++ b/libraries/libopae-c/api-shell.c @@ -40,6 +40,7 @@ #include "pluginmgr.h" #include "opae_int.h" #include "props.h" +#include "multi-port-afu.h" #include "mock/opae_std.h" const char * @@ -195,6 +196,8 @@ opae_allocate_wrapped_handle(opae_wrapped_token *wt, fpga_handle opae_handle, whan->wrapped_token = wt; whan->opae_handle = opae_handle; whan->adapter_table = adapter; + whan->parent = NULL; + whan->child_next = NULL; opae_upref_wrapped_token(wt); } @@ -274,8 +277,8 @@ fpga_result __OPAE_API__ fpgaOpen(fpga_token token, fpga_handle *handle, int flags) { fpga_result res; - fpga_result cres = FPGA_OK; opae_wrapped_token *wrapped_token; + fpga_token_header *token_hdr; fpga_handle opae_handle = NULL; opae_wrapped_handle *wrapped_handle; @@ -298,13 +301,65 @@ fpga_result __OPAE_API__ fpgaOpen(fpga_token token, fpga_handle *handle, if (!wrapped_handle) { OPAE_ERR("malloc failed"); - res = FPGA_NO_MEMORY; - cres = wrapped_token->adapter_table->fpgaClose(opae_handle); + wrapped_token->adapter_table->fpgaClose(opae_handle); + return FPGA_NO_MEMORY; + } + + token_hdr = (fpga_token_header *)wrapped_token->opae_token; + if (token_hdr->objtype == FPGA_ACCELERATOR) { + res = afu_open_children(wrapped_handle); + if (res != FPGA_OK) { + // Close any children that are open + afu_close_children(wrapped_handle); + + // Close parent due to failure with child + if (wrapped_handle->adapter_table->fpgaClose) + wrapped_handle->adapter_table->fpgaClose( + wrapped_handle->opae_handle); + + opae_destroy_wrapped_handle(wrapped_handle); + return res; + } } *handle = wrapped_handle; - return res != FPGA_OK ? res : cres; + return FPGA_OK; +} + +fpga_result __OPAE_API__ fpgaGetChildren(fpga_handle handle, + uint32_t max_children, + fpga_handle *children, + uint32_t *num_children) +{ + opae_wrapped_handle *wrapped_handle = + opae_validate_wrapped_handle(handle); + + ASSERT_NOT_NULL(wrapped_handle); + ASSERT_NOT_NULL(num_children); + + if ((max_children > 0) && !children) { + OPAE_ERR("max_children > 0 with NULL children"); + return FPGA_INVALID_PARAM; + } + + *num_children = 0; + + // Is handle a child? If so, it has no children. + if (wrapped_handle->parent) + return FPGA_OK; + + // Children are already open + opae_wrapped_handle *wrapped_child = wrapped_handle->child_next; + while (wrapped_child) { + if (*num_children < max_children) + children[*num_children] = wrapped_child; + + *num_children += 1; + wrapped_child = wrapped_child->child_next; + } + + return FPGA_OK; } fpga_result __OPAE_API__ fpgaClose(fpga_handle handle) @@ -320,6 +375,7 @@ fpga_result __OPAE_API__ fpgaClose(fpga_handle handle) res = wrapped_handle->adapter_table->fpgaClose( wrapped_handle->opae_handle); + afu_close_children(wrapped_handle); opae_destroy_wrapped_handle(wrapped_handle); return res; @@ -905,6 +961,7 @@ fpga_result __OPAE_API__ fpgaGetUmsgPtr(fpga_handle handle, uint64_t **umsg_ptr) fpga_result __OPAE_API__ fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { + fpga_result res; opae_wrapped_handle *wrapped_handle = opae_validate_wrapped_handle(handle); @@ -921,12 +978,33 @@ fpga_result __OPAE_API__ fpgaPrepareBuffer(fpga_handle handle, ASSERT_NOT_NULL_RESULT(wrapped_handle->adapter_table->fpgaPrepareBuffer, FPGA_NOT_SUPPORTED); - return wrapped_handle->adapter_table->fpgaPrepareBuffer( + if (wrapped_handle->parent) { + OPAE_ERR("Call fpgaPrepareBuffer() from the parent handle"); + return FPGA_NOT_SUPPORTED; + } + + res = wrapped_handle->adapter_table->fpgaPrepareBuffer( wrapped_handle->opae_handle, len, buf_addr, wsid, flags); + if (res != FPGA_OK) + return res; + + res = afu_pin_buffer(wrapped_handle, *buf_addr, len, *wsid); + if (res == FPGA_OK) + return FPGA_OK; + + // Error! Undo pinning of parent after child failure. + if (wrapped_handle->adapter_table->fpgaReleaseBuffer) + wrapped_handle->adapter_table->fpgaReleaseBuffer( + wrapped_handle->opae_handle, *wsid); + + // Return the error + return res; } fpga_result __OPAE_API__ fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) { + fpga_result ret_res; + fpga_result res; opae_wrapped_handle *wrapped_handle = opae_validate_wrapped_handle(handle); @@ -934,8 +1012,13 @@ fpga_result __OPAE_API__ fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) ASSERT_NOT_NULL_RESULT(wrapped_handle->adapter_table->fpgaReleaseBuffer, FPGA_NOT_SUPPORTED); - return wrapped_handle->adapter_table->fpgaReleaseBuffer( + ret_res = afu_unpin_buffer(wrapped_handle, wsid); + + res = wrapped_handle->adapter_table->fpgaReleaseBuffer( wrapped_handle->opae_handle, wsid); + ret_res = (ret_res == FPGA_OK ? res : ret_res); + + return ret_res; } fpga_result __OPAE_API__ fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, @@ -955,6 +1038,7 @@ fpga_result __OPAE_API__ fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, fpga_result __OPAE_API__ fpgaBindSVA(fpga_handle handle, uint32_t *pasid) { + fpga_result res; opae_wrapped_handle *wrapped_handle = opae_validate_wrapped_handle(handle); @@ -963,8 +1047,25 @@ fpga_result __OPAE_API__ fpgaBindSVA(fpga_handle handle, uint32_t *pasid) if (!wrapped_handle->adapter_table->fpgaBindSVA) return FPGA_NOT_SUPPORTED; - return wrapped_handle->adapter_table->fpgaBindSVA( + res = wrapped_handle->adapter_table->fpgaBindSVA( wrapped_handle->opae_handle, pasid); + if (res != FPGA_OK) + return res; + + opae_wrapped_handle *wrapped_child = wrapped_handle->child_next; + while (wrapped_child) { + if (!wrapped_child->adapter_table->fpgaBindSVA) + return FPGA_NOT_SUPPORTED; + + res = wrapped_child->adapter_table->fpgaBindSVA( + wrapped_child->opae_handle, pasid); + if (res != FPGA_OK) + return res; + + wrapped_child = wrapped_child->child_next; + } + + return FPGA_OK; } fpga_result __OPAE_API__ fpgaGetOPAECVersion(fpga_version *version) diff --git a/libraries/libopae-c/multi-port-afu.c b/libraries/libopae-c/multi-port-afu.c new file mode 100644 index 000000000000..7675d66f2491 --- /dev/null +++ b/libraries/libopae-c/multi-port-afu.c @@ -0,0 +1,304 @@ +// Copyright(c) 2024, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Multi-ported AFUs have a parent AFU that names child AFUs by GUID. These +// functions apply operations to all childen of a parent AFU. +// + +#ifdef HAVE_CONFIG_H +#include +#endif // HAVE_CONFIG_H + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif // _GNU_SOURCE + +#include + +#include +#include +#include +#include +#include + +#include "pluginmgr.h" +#include "opae_int.h" +#include "props.h" +#include "mock/opae_std.h" + +STATIC void api_guid_to_fpga(uint64_t guidh, uint64_t guidl, fpga_guid guid) +{ + uint32_t i; + uint32_t s; + + // The API expects the MSB of the GUID at [0] and the LSB at [15]. + s = 64; + for (i = 0; i < 8; ++i) { + s -= 8; + guid[i] = (uint8_t) ((guidh >> s) & 0xff); + } + + s = 64; + for (i = 0; i < 8; ++i) { + s -= 8; + guid[8 + i] = (uint8_t) ((guidl >> s) & 0xff); + } +} + +fpga_result afu_open_children(opae_wrapped_handle *wrapped_parent_handle) +{ + fpga_result result; + uint64_t v; + + const opae_api_adapter_table *adapter = + wrapped_parent_handle->wrapped_token->adapter_table; + fpga_handle handle = wrapped_parent_handle->opae_handle; + + // + // Does this AFU have children? Return FPGA_OK if it does not. + // + + if (!adapter->fpgaReadMMIO64) + return FPGA_OK; + // Don't load children if the attached plugin is missing support + // for probing workspace IDs for buffer info. Without it, IOVAs + // can't be shared with children. + if (!adapter->fpgaGetWSInfo) + return FPGA_OK; + + // DFH must be a v1 AFU with ID 0 + result = adapter->fpgaReadMMIO64(handle, 0, 0, &v); + if (result != FPGA_OK) + return result; + + // An AFU? + if ((v >> 60) != 1) + return FPGA_OK; + // At least v1? + if (!((v >> 52) & 0xff)) + return FPGA_OK; + // ID is 0 (normal AFU)? + if (v & 0xfff) + return FPGA_OK; + + // Is there a parameter list? + result = adapter->fpgaReadMMIO64(handle, 0, 0x20, &v); + if (result != FPGA_OK) + return result; + if (((v >> 31) & 1) == 0) + return FPGA_OK; + + // Look for a parameter with ID 2 (list of child GUIDs) + bool found_children = false; + uint64_t offset = 0x28; + do { + result = adapter->fpgaReadMMIO64(handle, 0, offset, &v); + if (result != FPGA_OK) + return result; + + if ((v & 0xffff) == 2) { + found_children = true; + offset += 8; + break; + } + + // Next parameter + offset += (v >> 35) * 8; + } while (((v >> 32) & 1) == 0); // Continue until EOP + + if (!found_children) + return FPGA_OK; + + // Number of children, inferred from the size of the parameter block + uint32_t num_children = v >> 36; + opae_wrapped_handle *child_prev = NULL; + + // Walk the list of child AFU GUIDs and load them. The resulting + // list of child FPGA handles matches the order of the parameter + // block. + // + // *** For most errors, no cleanup is required. Once a child is + // *** on the parent's list it will be cleaned up along with + // *** the parent. + for (uint32_t c = 0; c < num_children; ++c) { + fpga_guid guid; + uint64_t guidh, guidl; + + result = adapter->fpgaReadMMIO64(handle, 0, offset, &guidl); + if (result != FPGA_OK) + return result; + result = adapter->fpgaReadMMIO64(handle, 0, offset+8, &guidh); + if (result != FPGA_OK) + return result; + + // Call the shell's public API methods, not the adapter + // instance. Children will have their own wrapped handles. + + fpga_properties filter = NULL; + result = fpgaGetProperties(NULL, &filter); + if (result != FPGA_OK) + return result; + fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR); + api_guid_to_fpga(guidh, guidl, guid); + fpgaPropertiesSetGUID(filter, guid); + + fpga_token accel_token; + uint32_t num_matches; + result = fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches); + fpgaDestroyProperties(&filter); + if (result != FPGA_OK) + return result; + if (num_matches == 0) { + char guid_str[64]; + uuid_unparse(guid, guid_str); + OPAE_ERR("Child %s not found", guid_str); + return FPGA_NOT_FOUND; + } + + fpga_handle child_handle; + result = fpgaOpen(accel_token, &child_handle, 0); + fpgaDestroyToken(&accel_token); + if (result != FPGA_OK) + return result; + + opae_wrapped_handle *wrapped_child_handle = + opae_validate_wrapped_handle(child_handle); + ASSERT_NOT_NULL(wrapped_child_handle); + + wrapped_child_handle->parent = wrapped_parent_handle; + if (!child_prev) + wrapped_parent_handle->child_next = wrapped_child_handle; + else + child_prev->child_next = wrapped_child_handle; + child_prev = wrapped_child_handle; + + // Next child GUID in the parameter block + offset += 16; + } + + return result; +} + +fpga_result afu_close_children(opae_wrapped_handle *wrapped_parent_handle) +{ + opae_wrapped_handle *wrapped_child_next; + + ASSERT_NOT_NULL(wrapped_parent_handle); + + // Is handle actually a child? Avoid recursion. + if (wrapped_parent_handle->parent) + return FPGA_OK; + + wrapped_child_next = wrapped_parent_handle->child_next; + while (wrapped_child_next) { + opae_wrapped_handle *wrapped_child = wrapped_child_next; + wrapped_child_next = wrapped_child->child_next; + + // Use the public API, which will clean up the wrapper. + fpgaClose(wrapped_child); + } + + return FPGA_OK; +} + +fpga_result afu_pin_buffer(opae_wrapped_handle *wrapped_parent_handle, + void *buf_addr, uint64_t len, uint64_t wsid) +{ + fpga_result res; + opae_wrapped_handle *wrapped_child = wrapped_parent_handle->child_next; + opae_wrapped_handle *wrapped_undo; + + if (!wrapped_child) + return FPGA_OK; + + ASSERT_NOT_NULL_RESULT(wrapped_parent_handle->adapter_table->fpgaGetIOAddress, + FPGA_NOT_SUPPORTED); + + uint64_t ioaddr; + res = wrapped_parent_handle->adapter_table->fpgaGetIOAddress( + wrapped_parent_handle->opae_handle, wsid, &ioaddr); + if (res != FPGA_OK) + return res; + + while (wrapped_child) { + ASSERT_NOT_NULL_RESULT(wrapped_child->adapter_table->fpgaPinBuffer, + FPGA_NOT_SUPPORTED); + res = wrapped_child->adapter_table->fpgaPinBuffer( + wrapped_child->opae_handle, buf_addr, len, ioaddr); + if (res != FPGA_OK) + goto error_child; + wrapped_child = wrapped_child->child_next; + } + + return FPGA_OK; + +error_child: + // Undo pinning of any children completed before the error + wrapped_undo = wrapped_parent_handle->child_next; + while (wrapped_undo != wrapped_child) { + if (wrapped_undo->adapter_table->fpgaUnpinBuffer) + wrapped_undo->adapter_table->fpgaUnpinBuffer( + wrapped_undo->opae_handle, buf_addr, len, ioaddr); + + wrapped_undo = wrapped_undo->child_next; + } + + return res; +} + +fpga_result afu_unpin_buffer(opae_wrapped_handle *wrapped_parent_handle, + uint64_t wsid) +{ + fpga_result res; + opae_wrapped_handle *wrapped_child = wrapped_parent_handle->child_next; + void *buf_addr; + uint64_t ioaddr; + uint64_t len; + + if (!wrapped_child) + return FPGA_OK; + + ASSERT_NOT_NULL_RESULT(wrapped_parent_handle->adapter_table->fpgaGetWSInfo, + FPGA_NOT_SUPPORTED); + res = wrapped_parent_handle->adapter_table->fpgaGetWSInfo( + wrapped_parent_handle->opae_handle, wsid, &ioaddr, &buf_addr, &len); + if (res != FPGA_OK) + return res; + + while (wrapped_child) { + ASSERT_NOT_NULL_RESULT(wrapped_child->adapter_table->fpgaUnpinBuffer, + FPGA_NOT_SUPPORTED); + res = wrapped_child->adapter_table->fpgaUnpinBuffer( + wrapped_child->opae_handle, buf_addr, len, ioaddr); + if (res != FPGA_OK) + return res; + + wrapped_child = wrapped_child->child_next; + } + + return FPGA_OK; +} diff --git a/libraries/libopae-c/multi-port-afu.h b/libraries/libopae-c/multi-port-afu.h new file mode 100644 index 000000000000..a9134beb8d9d --- /dev/null +++ b/libraries/libopae-c/multi-port-afu.h @@ -0,0 +1,45 @@ +// Copyright(c) 2024, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Multi-ported AFUs have a parent AFU that names child AFUs by GUID. These +// functions apply operations to all childen of a parent AFU. +// + +#ifndef __OPAE_MULTI_PORT_AFU_H__ +#define __OPAE_MULTI_PORT_AFU_H__ + +#include +#include + +fpga_result afu_open_children(opae_wrapped_handle *wrapped_parent_handle); +fpga_result afu_close_children(opae_wrapped_handle *wrapped_parent_handle); +fpga_result afu_pin_buffer(opae_wrapped_handle *wrapped_parent_handle, + void *buf_addr, uint64_t len, uint64_t wsid); +fpga_result afu_unpin_buffer(opae_wrapped_handle *wrapped_parent_handle, + uint64_t wsid); + +#endif // __OPAE_MULTI_PORT_AFU_H__ diff --git a/libraries/libopae-c/opae_int.h b/libraries/libopae-c/opae_int.h index a814b22479b3..20b15ea98830 100644 --- a/libraries/libopae-c/opae_int.h +++ b/libraries/libopae-c/opae_int.h @@ -147,6 +147,13 @@ typedef struct _opae_wrapped_handle { opae_wrapped_token *wrapped_token; fpga_handle opae_handle; opae_api_adapter_table *adapter_table; + + // For a multi-ported AFU with declared parent/child, pointer from + // child handle to parent. + struct _opae_wrapped_handle *parent; + // Linked list of children, starting at the parent. The list order + // matches the order of the parent's child AFU GUID parameter. + struct _opae_wrapped_handle *child_next; } opae_wrapped_handle; opae_wrapped_handle * From 672e2ec2a5089ca03677a9b16c7795293c13bdbe Mon Sep 17 00:00:00 2001 From: Lars Munch <113421197+lmu-silicom@users.noreply.github.com> Date: Thu, 11 Jan 2024 20:56:53 +0100 Subject: [PATCH 25/28] [Fix] Only clear AER errors when AER is available (#3073) When using the rsu --force option on systems without AER support the clearing of correctable/uncorrectable errors fails and setpci command returns exit code 1. Fix this by checking for AER support before clearing errors. Signed-off-by: Lars Munch --- python/opae.admin/opae/admin/fpga.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/opae.admin/opae/admin/fpga.py b/python/opae.admin/opae/admin/fpga.py index 36d5c0537835..ffc0a1ea7a1d 100644 --- a/python/opae.admin/opae/admin/fpga.py +++ b/python/opae.admin/opae/admin/fpga.py @@ -666,11 +666,17 @@ def clear_device_status(self, device): call_process(f'{cmd}={output:08x}') def clear_uncorrectable_errors(self, device): + if not device.supports_ecap('aer'): + self.log.debug(f'No AER support in device {device.pci_address}') + return self.log.debug(f'Clearing uncorrectable errors for {device.pci_address}') cmd = f'setpci -s {device.pci_address} ECAP_AER+0x04.L' call_process(f'{cmd}=FFFFFFFF') def clear_correctable_errors(self, device): + if not device.supports_ecap('aer'): + self.log.debug(f'No AER support in device {device.pci_address}') + return self.log.debug(f'Clearing correctable errors for {device.pci_address}') cmd = f'setpci -s {device.pci_address} ECAP_AER+0x10.L' call_process(f'{cmd}=FFFFFFFF') From a53d5e0d734edfa4fd90071160119a92ee5d353a Mon Sep 17 00:00:00 2001 From: Michael Adler Date: Tue, 16 Jan 2024 16:30:33 -0500 Subject: [PATCH 26/28] [Fix] SEGV when passing NULL buf_addr to fpgaPrepareBuffer() (#3077) Mistaken pointer dereference. buf_addr is allowed to be NULL when testing the fpgaPrepareBuffer() capabilities. Resolves a regression introduced in commit 4d757b6. --- libraries/libopae-c/api-shell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/libopae-c/api-shell.c b/libraries/libopae-c/api-shell.c index 4524df4158eb..68dca9f1120f 100644 --- a/libraries/libopae-c/api-shell.c +++ b/libraries/libopae-c/api-shell.c @@ -985,7 +985,7 @@ fpga_result __OPAE_API__ fpgaPrepareBuffer(fpga_handle handle, res = wrapped_handle->adapter_table->fpgaPrepareBuffer( wrapped_handle->opae_handle, len, buf_addr, wsid, flags); - if (res != FPGA_OK) + if ((res != FPGA_OK) || !buf_addr) return res; res = afu_pin_buffer(wrapped_handle, *buf_addr, len, *wsid); From 05d88abb35f1ea380df45c79a3ef4c73e31ad8fb Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Wed, 17 Jan 2024 12:24:50 -0800 Subject: [PATCH 27/28] ix:improve fpgametrics output readability and (#3075) - update fpga metrics index numbered from 1-42 (instead of 0-41) - improve fpgametrics output readability - group power and thermal sensor strings and values Signed-off-by: anandaravuri --- binaries/fpgametrics/fpgametrics.c | 31 +++++++++---------- .../plugins/xfpga/metrics/metrics_max10.c | 2 +- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/binaries/fpgametrics/fpgametrics.c b/binaries/fpgametrics/fpgametrics.c index c9eb8ec92601..c3af776fc79f 100644 --- a/binaries/fpgametrics/fpgametrics.c +++ b/binaries/fpgametrics/fpgametrics.c @@ -254,14 +254,13 @@ int main(int argc, char *argv[]) } printf("---------------------------------------------------------------------------------------------------\n"); - printf("metric_num qualifier_name group_name metric_name metric_units\n"); + printf("%-11s | %-19s | %-50s | %-5s \n", "metric_num", "group_name", "metric_name", "metric_units"); printf("---------------------------------------------------------------------------------------------------\n"); for (i = 0; i < num_metrics; i++) { - printf("%-3ld | %-30s | %-15s | %-30s | %-10s \n", - metric_info[i].metric_num, - metric_info[i].qualifier_name, + printf("%-10ld | %-20s | %-50s | %-1s \n", + metric_info[i].metric_num + 1, metric_info[i].group_name, metric_info[i].metric_name, metric_info[i].metric_units); @@ -279,9 +278,9 @@ int main(int argc, char *argv[]) ON_ERR_GOTO(res, out_close, "get num of metrics value by index"); printf("\n\n\n"); - printf("-------------------------------------------------------------------------------------------------\n "); - printf(" metric_num qualifier_name metric_name value \n "); - printf("-------------------------------------------------------------------------------------------------\n "); + printf("-------------------------------------------------------------------------------------------------\n"); + printf("%-11s | %-19s | %-50s | %-20s \n", "metric_num", "group_name", "metric_name", "value"); + printf("-------------------------------------------------------------------------------------------------\n"); for (i = 0; i < num_metrics; i++) { @@ -289,26 +288,26 @@ int main(int argc, char *argv[]) if (metric_info[num].metric_datatype == FPGA_METRIC_DATATYPE_INT && metric_array[i].isvalid) { - printf("%-20ld | %-30s | %-25s | %ld %-20s \n ", - metric_info[num].metric_num, - metric_info[num].qualifier_name, + printf("%-10ld | %-19s | %-50s | %ld %-20s \n", + metric_info[num].metric_num +1, + metric_info[i].group_name, metric_info[num].metric_name, metric_array[i].value.ivalue, metric_info[num].metric_units); } else if (metric_info[num].metric_datatype == FPGA_METRIC_DATATYPE_DOUBLE && metric_array[i].isvalid) { - printf("%-20ld | %-30s | %-25s | %0.2f %-20s \n ", - metric_info[num].metric_num, - metric_info[num].qualifier_name, + printf("%-10ld | %-19s | %-50s | %0.2f %-20s \n", + metric_info[num].metric_num + 1, + metric_info[i].group_name, metric_info[num].metric_name, metric_array[i].value.dvalue, metric_info[num].metric_units); } else { - printf("%-20ld | %-30s | %-25s | %s \n ", - metric_info[num].metric_num, - metric_info[num].qualifier_name, + printf("%-10ld | %-19s | %-50s | %s \n", + metric_info[num].metric_num + 1, + metric_info[i].group_name, metric_info[num].metric_name, "Fails to read metric value"); } diff --git a/libraries/plugins/xfpga/metrics/metrics_max10.c b/libraries/plugins/xfpga/metrics/metrics_max10.c index 71a37545873e..eff9febcc405 100644 --- a/libraries/plugins/xfpga/metrics/metrics_max10.c +++ b/libraries/plugins/xfpga/metrics/metrics_max10.c @@ -217,7 +217,7 @@ fpga_result dfl_enum_max10_metrics_info_pattern(struct _fpga_handle *_handle, } strncat(sysfspath, DFL_MAX10_SYSFS_LABEL, strlen(DFL_MAX10_SYSFS_LABEL) + 1); - gres = opae_glob(sysfspath, GLOB_NOSORT, NULL, &pglob); + gres = opae_glob(sysfspath, 0, NULL, &pglob); if (gres) { OPAE_ERR("Failed pattern match %s: %s", sysfspath, strerror(errno)); opae_globfree(&pglob); From 9ba83ec2b28c63635f0bf3145bbe804db1361ebb Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:17:40 -0800 Subject: [PATCH 28/28] =?UTF-8?q?fix:fpgabitstreaminfo=20update=20string?= =?UTF-8?q?=20=E2=80=9CBitstream=20version=E2=80=9D=20to=20"Image=20Info"?= =?UTF-8?q?=20(#3076)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix:fpgabitstreaminfo update string “Bitstream version” to "Image Info" Signed-off-by: anandaravuri --------- Signed-off-by: anandaravuri --- python/opae.admin/opae/admin/utils/verifier.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/opae.admin/opae/admin/utils/verifier.py b/python/opae.admin/opae/admin/utils/verifier.py index 9b94ebba2f03..8942fd3ed99d 100644 --- a/python/opae.admin/opae/admin/utils/verifier.py +++ b/python/opae.admin/opae/admin/utils/verifier.py @@ -202,7 +202,7 @@ def __init__(self, bits, payload): self.content_type = bits[8] self.cert_type = bits[9] self.slot_num = bits[10] & 0xF - self.bitstream_version = bytearray(bits[96:128]).decode() + self.image_info = bytearray(bits[96:128]).decode() self.sha256 = int_from_bytes(bits[16:48], byteorder="big") self.sha384 = int_from_bytes(bits[48:96], byteorder="big") self.calc_sha256 = int_from_bytes( @@ -250,10 +250,10 @@ def print_block(self): "\n\t\t\t{0:#0{1}x}").format(self.calc_sha384, 98)) print("\t\tMatch" if self.sha384 == self.calc_sha384 else "\t\tNo match") - if len(self.bitstream_version) != 0: - print("\tBitstream Version =\t\t{}".format(self.bitstream_version)) + if len(self.image_info) != 0: + print("\tImage Info =\t\t{}".format(self.image_info)) else: - print("\tNo Bitstream Version") + print("\tNo Image Info") if self.cert_type == database.BITSTREAM_TYPE_CANCEL: print("\n\tCSK to cancel =\t{}".format( int_from_bytes(self.payload[:4], byteorder="big")))