Refactor WebGPU ASYNCIFY to use JSPI
Smaller build size and better performance. Also, link time goes down a lot
This commit is contained in:
@@ -31,10 +31,10 @@ void* GPUBufferWebGPU::Map(GPUResourceMapMode mode)
|
||||
userData.Call(status == WGPUMapAsyncStatus_Success, status, message);
|
||||
};
|
||||
wgpuBufferMapAsync(Buffer, mapMode, 0, _desc.Size, mapRequest.Info);
|
||||
auto mapRequestResult = mapRequest.Wait();
|
||||
auto mapRequestResult = mapRequest.Wait(_device->WebGPUInstance);
|
||||
if (mapRequestResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Error, "WebGPU buffer map request has timed out after {}s", mapRequest.Data.WaitTime);
|
||||
LOG(Error, "WebGPU buffer map request has timed out after {}s", (int32)mapRequest.Data.WaitTime);
|
||||
return nullptr;
|
||||
}
|
||||
if (mapRequestResult == WGPUWaitStatus_Error)
|
||||
|
||||
@@ -618,10 +618,10 @@ bool GPUDeviceWebGPU::Init()
|
||||
userData.Call(status == WGPURequestDeviceStatus_Success, status, message);
|
||||
};
|
||||
wgpuAdapterRequestDevice(Adapter->Adapter, &deviceDesc, deviceRequest.Info);
|
||||
auto deviceRequestResult = deviceRequest.Wait();
|
||||
auto deviceRequestResult = deviceRequest.Wait(WebGPUInstance);
|
||||
if (deviceRequestResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Fatal, "WebGPU device request has timed out after {}s", deviceRequest.Data.WaitTime);
|
||||
LOG(Fatal, "WebGPU device request has timed out after {}s", (int32)deviceRequest.Data.WaitTime);
|
||||
return true;
|
||||
}
|
||||
if (deviceRequestResult == WGPUWaitStatus_Error)
|
||||
@@ -701,6 +701,11 @@ GPUDevice* CreateGPUDeviceWebGPU()
|
||||
{
|
||||
// Create instance
|
||||
WGPUInstanceDescriptor instanceDesc = WGPU_INSTANCE_DESCRIPTOR_INIT;
|
||||
#if !WEBGPU_ASYNCIFY && 0
|
||||
WGPUInstanceFeatureName instanceFeatures[1] = { WGPUInstanceFeatureName_TimedWaitAny };
|
||||
instanceDesc.requiredFeatureCount = 1;
|
||||
instanceDesc.requiredFeatures = instanceFeatures;
|
||||
#endif
|
||||
WGPUInstance instance = wgpuCreateInstance(&instanceDesc);
|
||||
if (!instance)
|
||||
{
|
||||
@@ -727,10 +732,10 @@ GPUDevice* CreateGPUDeviceWebGPU()
|
||||
userData.Call(status == WGPURequestAdapterStatus_Success, status, message);
|
||||
};
|
||||
wgpuInstanceRequestAdapter(instance, &adapterOptions, adapterRequest.Info);
|
||||
auto adapterRequestResult = adapterRequest.Wait();
|
||||
auto adapterRequestResult = adapterRequest.Wait(instance);
|
||||
if (adapterRequestResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Fatal, "WebGPU adapter request has timed out after {}s", adapterRequest.Data.WaitTime);
|
||||
LOG(Fatal, "WebGPU adapter request has timed out after {}s", (int32)adapterRequest.Data.WaitTime);
|
||||
return nullptr;
|
||||
}
|
||||
if (adapterRequestResult == WGPUWaitStatus_Error)
|
||||
@@ -796,7 +801,7 @@ void GPUDeviceWebGPU::Dispose()
|
||||
|
||||
void GPUDeviceWebGPU::WaitForGPU()
|
||||
{
|
||||
if (QueueSubmits == 0)
|
||||
if (QueueSubmits == 0 || Engine::FatalError != FatalErrorType::None)
|
||||
return;
|
||||
QueueSubmits = 0;
|
||||
AsyncCallbackWebGPU<WGPUQueueWorkDoneCallbackInfo> workDone(WGPU_QUEUE_WORK_DONE_CALLBACK_INFO_INIT);
|
||||
@@ -806,10 +811,10 @@ void GPUDeviceWebGPU::WaitForGPU()
|
||||
userData.Call(status == WGPUQueueWorkDoneStatus_Success, status, message);
|
||||
};
|
||||
wgpuQueueOnSubmittedWorkDone(Queue, workDone.Info);
|
||||
auto workDoneResult = workDone.Wait();
|
||||
auto workDoneResult = workDone.Wait(WebGPUInstance);
|
||||
if (workDoneResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Error, "WebGPU queue wait has timed out after {}s", workDone.Data.WaitTime);
|
||||
LOG(Error, "WebGPU queue wait has timed out after {}s", (int32)workDone.Data.WaitTime);
|
||||
return;
|
||||
}
|
||||
if (workDoneResult == WGPUWaitStatus_Error)
|
||||
|
||||
@@ -9,15 +9,37 @@ using Flax.Build.Platforms;
|
||||
/// </summary>
|
||||
public class GraphicsDeviceWebGPU : GraphicsDeviceBaseModule
|
||||
{
|
||||
/// <summary>
|
||||
/// Using ASYNCIFY leads to simple code by waiting on async WebGPU API callbacks with emscripten_sleep but doubles the code size and adds some overhead.
|
||||
/// https://emscripten.org/docs/porting/asyncify.html <br/>
|
||||
/// 0 - no async <br/>
|
||||
/// 1 - via Asyncify (causes the WASM to be much larger) <br/>
|
||||
/// 2 - via JSPI (experimental) <br/>
|
||||
/// </summary>
|
||||
public int WithAsyncify = 2;
|
||||
|
||||
/// <inheritdoc />
|
||||
public override void Setup(BuildOptions options)
|
||||
{
|
||||
base.Setup(options);
|
||||
|
||||
var port = "--use-port=emdawnwebgpu:cpp_bindings=false";
|
||||
options.CompileEnv.CustomArgs.Add(port);
|
||||
options.LinkEnv.CustomArgs.Add("-sASYNCIFY");
|
||||
options.OutputFiles.Add(port);
|
||||
options.CompileEnv.CustomArgs.Add(port);
|
||||
if (WithAsyncify == 2)
|
||||
{
|
||||
options.PrivateDefinitions.Add("WEBGPU_ASYNCIFY=2");
|
||||
options.LinkEnv.CustomArgs.Add("-sJSPI");
|
||||
options.LinkEnv.CustomArgs.Add("-sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=$getWasmTableEntry");
|
||||
}
|
||||
else if (WithAsyncify == 1)
|
||||
{
|
||||
options.PrivateDefinitions.Add("WEBGPU_ASYNCIFY");
|
||||
options.LinkEnv.CustomArgs.Add("-sASYNCIFY");
|
||||
options.LinkEnv.CustomArgs.Add("-sASYNCIFY_STACK_SIZE=8192");
|
||||
//options.LinkEnv.CustomArgs.Add("-sASYNCIFY_ONLY=[\"main\",\"WebGPUAsyncWait(AsyncWaitParamsWebGPU)\"]"); // TODO: try indirect calls only to reduce the code size
|
||||
options.LinkEnv.CustomArgs.Add("-sEXPORT_ALL"); // This bloats JS but otherwise dynamic calls don't work properly
|
||||
}
|
||||
options.PublicDefinitions.Add("GRAPHICS_API_WEBGPU");
|
||||
options.PrivateIncludePaths.Add(Path.Combine(EmscriptenSdk.Instance.EmscriptenPath, "emscripten/cache/ports/emdawnwebgpu/emdawnwebgpu_pkg/webgpu/include"));
|
||||
options.PrivateDependencies.Add("lz4");
|
||||
|
||||
@@ -4,6 +4,36 @@
|
||||
|
||||
#include "RenderToolsWebGPU.h"
|
||||
#include "Engine/Graphics/PixelFormat.h"
|
||||
#include <emscripten/emscripten.h>
|
||||
|
||||
WGPUWaitStatus WebGPUAsyncWait(AsyncWaitParamsWebGPU params)
|
||||
{
|
||||
#if 0
|
||||
// This needs WGPUInstanceFeatureName_TimedWaitAny which works only with ASYNCIFY enabled
|
||||
WGPUFutureWaitInfo futureWaitInfo;
|
||||
futureWaitInfo.future = future;
|
||||
futureWaitInfo.completed = WGPU_FALSE;
|
||||
uint64 timeoutNS = 5000000000ull; // Wait max 5 second
|
||||
return wgpuInstanceWaitAny(params.Instance, 1, &futureWaitInfo, timeoutNS);
|
||||
#endif
|
||||
|
||||
#if WEBGPU_ASYNCIFY
|
||||
auto startTime = Platform::GetTimeSeconds();
|
||||
int32 ticksLeft = 500; // Wait max 5 second
|
||||
while (Platform::AtomicRead(¶ms.Data->Result) == 0 && ticksLeft-- > 0)
|
||||
emscripten_sleep(10);
|
||||
if (ticksLeft <= 0)
|
||||
{
|
||||
params.Data->WaitTime = Platform::GetTimeSeconds() - startTime;
|
||||
return WGPUWaitStatus_TimedOut;
|
||||
}
|
||||
return params.Data->Result == 1 ? WGPUWaitStatus_Success : WGPUWaitStatus_Error;
|
||||
#else
|
||||
// Not possible to implement it here with stack preservation (need to go back with main thread to the browser)
|
||||
// Make GPU adapter/device requests register custom retry via emscripten_set_main_loop with coroutine or something like that to make it work without ASYNCIFY
|
||||
return WGPUWaitStatus_Error;
|
||||
#endif
|
||||
}
|
||||
|
||||
WGPUVertexFormat RenderToolsWebGPU::ToVertexFormat(PixelFormat format)
|
||||
{
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include "Engine/Core/Types/String.h"
|
||||
#include "IncludeWebGPU.h"
|
||||
#include <emscripten/emscripten.h>
|
||||
|
||||
enum class PixelFormat : unsigned;
|
||||
|
||||
@@ -29,6 +28,13 @@ struct AsyncCallbackDataWebGPU
|
||||
}
|
||||
};
|
||||
|
||||
struct AsyncWaitParamsWebGPU
|
||||
{
|
||||
WGPUInstance Instance;
|
||||
AsyncCallbackDataWebGPU* Data;
|
||||
};
|
||||
WGPUWaitStatus WebGPUAsyncWait(AsyncWaitParamsWebGPU params);
|
||||
|
||||
/// <summary>
|
||||
/// Helper utility to run WebGPU APIs that use async callback in sync by waiting on the spontaneous call back with an active-waiting loop.
|
||||
/// </summary>
|
||||
@@ -45,18 +51,9 @@ struct AsyncCallbackWebGPU
|
||||
Info.userdata1 = &Data;
|
||||
}
|
||||
|
||||
WGPUWaitStatus Wait()
|
||||
FORCE_INLINE WGPUWaitStatus Wait(WGPUInstance instance)
|
||||
{
|
||||
auto startTime = Platform::GetTimeSeconds();
|
||||
int32 ticksLeft = 500; // Wait max 5 second
|
||||
while (Platform::AtomicRead(&Data.Result) == 0 && ticksLeft-- > 0)
|
||||
emscripten_sleep(10);
|
||||
if (ticksLeft <= 0)
|
||||
{
|
||||
Data.WaitTime = Platform::GetTimeSeconds() - startTime;
|
||||
return WGPUWaitStatus_TimedOut;
|
||||
}
|
||||
return Data.Result == 1 ? WGPUWaitStatus_Success : WGPUWaitStatus_Error;
|
||||
return WebGPUAsyncWait({ instance, &Data });
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user