I have coded a WGSL compute shader that outputs the result given an input as argument.
Now I need to run this shader many times using different inputs. All of the compute shader steps should be the same every time. I could indeed create a new pipeline each time and get the right results, but the execution is exceedingly slow, possibly due to all the overheads of creating a new pipeline / initializing data in buffers, etc.
How can I use my pre-created WGSL pipeline multiple times (on different inputs) without creating a new pipeline every time?
let adapter = await navigator.gpu.requestAdapter();
let device = await adapter.requestDevice();
let module = device.createShaderModule({code: `@group(0) @binding(0) var<storage, read_write> sample: array<u32, 720>;
@group(0) @binding(1) var<storage, read_write> table: array<array<u32, 720>>;
@group(0) @binding(2) var<storage, read_write> result: array<u32>;
@compute @workgroup_size(1,1,1) fn computeThis (@builtin(global_invocation_id) id: vec3<u32>)
{
var diff : u32 = 0;
for (var i : u32 = 0; i < 720; i++)
{
diff += (table[id.x][i] - sample[i])*(table[id.x][i] - sample[i]);
}
result[id.x] = diff;
}
`, });
let pipeline = device.createComputePipeline({layout: 'auto', compute: {module}});
let sampleBuffer = device.createBuffer({size: sample.byteLength, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST});
let tableBuffer = device.createBuffer({size: table.byteLength, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST});
let inputBuffer = device.createBuffer({size: input.byteLength, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST});
let resultBuffer = device.createBuffer({size: input.byteLength, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST});
let bindGroup = device.createBindGroup({layout: pipeline.getBindGroupLayout(0), entries: [{binding: 0, resource: { buffer: sampleBuffer }},{binding: 1, resource: { buffer: tableBuffer }},{binding: 2, resource: { buffer: inputBuffer }}]});
let encoder = device.createCommandEncoder();
let pass = encoder.beginComputePass();
pass.setPipeline(pipeline);
pass.setBindGroup(0, bindGroup);
pass.dispatchWorkgroups(LEN,1,1);
pass.end();
encoder.copyBufferToBuffer(inputBuffer, 0, resultBuffer, 0, resultBuffer.size);
device.queue.writeBuffer(sampleBuffer, 0, sample);
device.queue.writeBuffer(tableBuffer, 0, table);
device.queue.writeBuffer(inputBuffer, 0, input);
device.queue.submit([encoder.finish()]);
await resultBuffer.mapAsync(GPUMapMode.READ);
let result = new Uint32Array(resultBuffer.getMappedRange().slice());
resultBuffer.unmap();
inputBuffer.unmap();
sampleBuffer.unmap();
tableBuffer.unmap();