import time
import os
import d3d11
import d3d11x
from d3d11c import *
#Layout description for the data. In this
#case it is a stream of signed 32-bit integers.
layoutDesc = [
("", 0, FORMAT_R32_SINT, 0, 0, INPUT_PER_VERTEX_DATA, 0),
]
NUMITEMS = 300
INPUTDATA = []
for x in range(NUMITEMS):
INPUTDATA.append([x])
def createBufferAndView():
buffer = d3d11.Buffer(layoutDesc, NUMITEMS, BIND_UNORDERED_ACCESS | BIND_SHADER_RESOURCE,
USAGE_DEFAULT, 0, RESOURCE_MISC_BUFFER_STRUCTURED)
return buffer, d3d11.View(buffer)
def createStagingBuffer(init, access):
return d3d11.Buffer(layoutDesc, init, 0, USAGE_STAGING, access)
def verifyResults(gpuResults):
pyResults = [None for x in range(NUMITEMS)] #"Preallocate" the list.
pyStart = time.clock()
#Implement the same "algorithm" in Python. To be fair this
#allocates memory and does some other stuff too, but it is quite
#normal way to do things in Python. The algorithm itself is
#pretty useless, but with O(n^3) computation it is easy to
#see time differences even with small data sets (=NUMITEMS).
for i, x in enumerate(INPUTDATA):
sum = 0
for i2, x2 in enumerate(INPUTDATA):
sum += INPUTDATA[i][0] + INPUTDATA[i2][0]
for i3, x3 in enumerate(INPUTDATA):
sum += INPUTDATA[i3][0]
pyResults[i] = [sum + i]
print("Python version took %f seconds" % (time.clock() - pyStart))
#Make sure that your GPU and CPU results match (altough
#floating point, overflow etc. results might not be identical).
#Of course you don't have to compare them EVERY time (it would remove
#the whole point of computing things faster).
if pyResults == gpuResults:
print("GPU and CPU results were identical.")
else:
print("ERROR! GPU and CPU results don't match!")
def compute():
#Create two input buffers and one for the output.
buffer1, buffer1srv = createBufferAndView()
buffer2, buffer2srv = createBufferAndView()
bufferOut, bufferOutUav = createBufferAndView()
bufferOut.resize(NUMITEMS)
#Staging buffer which will be used to copy data
#between GPU and CPU.
stager = createStagingBuffer(INPUTDATA, CPU_ACCESS_WRITE | CPU_ACCESS_READ)
buffer1.copy(stager)
buffer2.copy(stager)
#Load the effect and set buffers.
effect = d3d11.Effect(d3d11x.getResourceDir("Effects", "Compute.fx"))
effect.set("buffer1", buffer1srv)
effect.set("buffer2", buffer2srv)
effect.set("bufferOut", bufferOutUav)
effect.set("bufferSize", NUMITEMS)
effect.apply(0, 0)
compStart = time.clock()
#Start the compute operation.
device.dispatch(NUMITEMS, 1, 1)
#Copy data back for CPU access.
stager.copy(bufferOut)
outputData = []
with d3d11x.Mapper(stager, MAP_READ):
outputData = stager[:]
print("Compute shader took %f seconds" % (time.clock() - compStart))
verifyResults(outputData)
device = d3d11.Device(None)
compute()
os.system("pause")