Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix order in enqueue_copy #3

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions 021_array_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@
cl_arrays = [cl_a, cl_b, cl_c]

for x in range(3):
cl.enqueue_copy(queue, cl_arrays[x], np_arrays[x])
cl.enqueue_copy(queue, np_arrays[x], cl_arrays[x])
queue.finish()
# Copy the data for array c back to the host

for x in np_arrays:
print(x)
# Print all three host arrays, to show sum() worked
# Print all three host arrays, to show sum() worked
4 changes: 2 additions & 2 deletions 030_timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ def gpu_array_sum(a, b):
elapsed = 1e-9*(event.profile.end - event.profile.start) # Calculate the time it took to execute the kernel
print("GPU Kernel Time: {0} s".format(elapsed)) # Print the time it took to execute the kernel
c_gpu = np.empty_like(a) # Create an empty array the same size as array a
cl.enqueue_read_buffer(queue, c_buffer, c_gpu).wait() # Read back the data from GPU memory into array c_gpu
cl.enqueue_copy(queue, c_buffer, c_gpu).wait() # Read back the data from GPU memory into array c_gpu
gpu_end_time = time() # Get the GPU end time
print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time)) # Print the time the GPU program took, including both memory copies
return c_gpu # Return the sum of the two arrays

cpu_array_sum(a, b) # Call the function that sums two arrays on the CPU
gpu_array_sum(a, b) # Call the function that sums two arrays on the GPU
gpu_array_sum(a, b) # Call the function that sums two arrays on the GPU
8 changes: 4 additions & 4 deletions 040_elementwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@
context = cl.create_some_context() # Initialize the Context
queue = cl.CommandQueue(context) # Instantiate a Queue

a = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array
b = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array
a = cl_array.to_device(queue, numpy.random.randn(50000).astype(numpy.float32)) # Create a random pyopencl array
b = cl_array.to_device(queue, numpy.random.randn(50000).astype(numpy.float32)) # Create a random pyopencl array
c = cl_array.empty_like(a) # Create an empty pyopencl destination array

sum = cl.elementwise.ElementwiseKernel(context, "float *a, float *b, float *c", "c[i] = a[i] + b[i]", "sum")
# Create an elementwise kernel object
# - Arguments: a string formatted as a C argument list
# - Operation: a snippet of C that carries out the desired map operatino
# - Operation: a snippet of C that carries out the desired map operation
# - Name: the fuction name as which the kernel is compiled

sum(a, b, c) # Call the elementwise kernel

print("a: {}".format(a))
print("b: {}".format(b))
print("c: {}".format(c))
# Print all three arrays, to show sum() worked
# Print all three arrays, to show sum() worked
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ PyOpenCL is a tool that is worth learning. Python allows exceptional clarity-of
- 010 Introspection - Find out about your computer's OpenCL situation
- 020 Array Sum - Use OpenCL To Add Two Large Random Arrays - Hiding Details
- 021 Array Sum - Use OpenCL To Add Two Large Random Arrays - Showing Details
- 030 Timing - Compare performance of a loop in pure Python versus OpenCL
- 040 Elementwise - Use PyOpenCL arrays and elementwise to add two large random arrays