Refactor WebGPU ASYNCIFY to use JSPI
Smaller build size and better performance. Also, link time goes down a lot
This commit is contained in:
@@ -31,10 +31,10 @@ void* GPUBufferWebGPU::Map(GPUResourceMapMode mode)
|
||||
userData.Call(status == WGPUMapAsyncStatus_Success, status, message);
|
||||
};
|
||||
wgpuBufferMapAsync(Buffer, mapMode, 0, _desc.Size, mapRequest.Info);
|
||||
auto mapRequestResult = mapRequest.Wait();
|
||||
auto mapRequestResult = mapRequest.Wait(_device->WebGPUInstance);
|
||||
if (mapRequestResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Error, "WebGPU buffer map request has timed out after {}s", mapRequest.Data.WaitTime);
|
||||
LOG(Error, "WebGPU buffer map request has timed out after {}s", (int32)mapRequest.Data.WaitTime);
|
||||
return nullptr;
|
||||
}
|
||||
if (mapRequestResult == WGPUWaitStatus_Error)
|
||||
|
||||
@@ -618,10 +618,10 @@ bool GPUDeviceWebGPU::Init()
|
||||
userData.Call(status == WGPURequestDeviceStatus_Success, status, message);
|
||||
};
|
||||
wgpuAdapterRequestDevice(Adapter->Adapter, &deviceDesc, deviceRequest.Info);
|
||||
auto deviceRequestResult = deviceRequest.Wait();
|
||||
auto deviceRequestResult = deviceRequest.Wait(WebGPUInstance);
|
||||
if (deviceRequestResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Fatal, "WebGPU device request has timed out after {}s", deviceRequest.Data.WaitTime);
|
||||
LOG(Fatal, "WebGPU device request has timed out after {}s", (int32)deviceRequest.Data.WaitTime);
|
||||
return true;
|
||||
}
|
||||
if (deviceRequestResult == WGPUWaitStatus_Error)
|
||||
@@ -701,6 +701,11 @@ GPUDevice* CreateGPUDeviceWebGPU()
|
||||
{
|
||||
// Create instance
|
||||
WGPUInstanceDescriptor instanceDesc = WGPU_INSTANCE_DESCRIPTOR_INIT;
|
||||
#if !WEBGPU_ASYNCIFY && 0
|
||||
WGPUInstanceFeatureName instanceFeatures[1] = { WGPUInstanceFeatureName_TimedWaitAny };
|
||||
instanceDesc.requiredFeatureCount = 1;
|
||||
instanceDesc.requiredFeatures = instanceFeatures;
|
||||
#endif
|
||||
WGPUInstance instance = wgpuCreateInstance(&instanceDesc);
|
||||
if (!instance)
|
||||
{
|
||||
@@ -727,10 +732,10 @@ GPUDevice* CreateGPUDeviceWebGPU()
|
||||
userData.Call(status == WGPURequestAdapterStatus_Success, status, message);
|
||||
};
|
||||
wgpuInstanceRequestAdapter(instance, &adapterOptions, adapterRequest.Info);
|
||||
auto adapterRequestResult = adapterRequest.Wait();
|
||||
auto adapterRequestResult = adapterRequest.Wait(instance);
|
||||
if (adapterRequestResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Fatal, "WebGPU adapter request has timed out after {}s", adapterRequest.Data.WaitTime);
|
||||
LOG(Fatal, "WebGPU adapter request has timed out after {}s", (int32)adapterRequest.Data.WaitTime);
|
||||
return nullptr;
|
||||
}
|
||||
if (adapterRequestResult == WGPUWaitStatus_Error)
|
||||
@@ -796,7 +801,7 @@ void GPUDeviceWebGPU::Dispose()
|
||||
|
||||
void GPUDeviceWebGPU::WaitForGPU()
|
||||
{
|
||||
if (QueueSubmits == 0)
|
||||
if (QueueSubmits == 0 || Engine::FatalError != FatalErrorType::None)
|
||||
return;
|
||||
QueueSubmits = 0;
|
||||
AsyncCallbackWebGPU<WGPUQueueWorkDoneCallbackInfo> workDone(WGPU_QUEUE_WORK_DONE_CALLBACK_INFO_INIT);
|
||||
@@ -806,10 +811,10 @@ void GPUDeviceWebGPU::WaitForGPU()
|
||||
userData.Call(status == WGPUQueueWorkDoneStatus_Success, status, message);
|
||||
};
|
||||
wgpuQueueOnSubmittedWorkDone(Queue, workDone.Info);
|
||||
auto workDoneResult = workDone.Wait();
|
||||
auto workDoneResult = workDone.Wait(WebGPUInstance);
|
||||
if (workDoneResult == WGPUWaitStatus_TimedOut)
|
||||
{
|
||||
LOG(Error, "WebGPU queue wait has timed out after {}s", workDone.Data.WaitTime);
|
||||
LOG(Error, "WebGPU queue wait has timed out after {}s", (int32)workDone.Data.WaitTime);
|
||||
return;
|
||||
}
|
||||
if (workDoneResult == WGPUWaitStatus_Error)
|
||||
|
||||
@@ -9,15 +9,37 @@ using Flax.Build.Platforms;
|
||||
/// </summary>
|
||||
public class GraphicsDeviceWebGPU : GraphicsDeviceBaseModule
|
||||
{
|
||||
/// <summary>
|
||||
/// Using ASYNCIFY leads to simple code by waiting on async WebGPU API callbacks with emscripten_sleep but doubles the code size and adds some overhead.
|
||||
/// https://emscripten.org/docs/porting/asyncify.html <br/>
|
||||
/// 0 - no async <br/>
|
||||
/// 1 - via Asyncify (causes the WASM to be much larger) <br/>
|
||||
/// 2 - via JSPI (experimental) <br/>
|
||||
/// </summary>
|
||||
public int WithAsyncify = 2;
|
||||
|
||||
/// <inheritdoc />
|
||||
public override void Setup(BuildOptions options)
|
||||
{
|
||||
base.Setup(options);
|
||||
|
||||
var port = "--use-port=emdawnwebgpu:cpp_bindings=false";
|
||||
options.CompileEnv.CustomArgs.Add(port);
|
||||
options.LinkEnv.CustomArgs.Add("-sASYNCIFY");
|
||||
options.OutputFiles.Add(port);
|
||||
options.CompileEnv.CustomArgs.Add(port);
|
||||
if (WithAsyncify == 2)
|
||||
{
|
||||
options.PrivateDefinitions.Add("WEBGPU_ASYNCIFY=2");
|
||||
options.LinkEnv.CustomArgs.Add("-sJSPI");
|
||||
options.LinkEnv.CustomArgs.Add("-sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=$getWasmTableEntry");
|
||||
}
|
||||
else if (WithAsyncify == 1)
|
||||
{
|
||||
options.PrivateDefinitions.Add("WEBGPU_ASYNCIFY");
|
||||
options.LinkEnv.CustomArgs.Add("-sASYNCIFY");
|
||||
options.LinkEnv.CustomArgs.Add("-sASYNCIFY_STACK_SIZE=8192");
|
||||
//options.LinkEnv.CustomArgs.Add("-sASYNCIFY_ONLY=[\"main\",\"WebGPUAsyncWait(AsyncWaitParamsWebGPU)\"]"); // TODO: try indirect calls only to reduce the code size
|
||||
options.LinkEnv.CustomArgs.Add("-sEXPORT_ALL"); // This bloats JS but otherwise dynamic calls don't work properly
|
||||
}
|
||||
options.PublicDefinitions.Add("GRAPHICS_API_WEBGPU");
|
||||
options.PrivateIncludePaths.Add(Path.Combine(EmscriptenSdk.Instance.EmscriptenPath, "emscripten/cache/ports/emdawnwebgpu/emdawnwebgpu_pkg/webgpu/include"));
|
||||
options.PrivateDependencies.Add("lz4");
|
||||
|
||||
@@ -4,6 +4,36 @@
|
||||
|
||||
#include "RenderToolsWebGPU.h"
|
||||
#include "Engine/Graphics/PixelFormat.h"
|
||||
#include <emscripten/emscripten.h>
|
||||
|
||||
WGPUWaitStatus WebGPUAsyncWait(AsyncWaitParamsWebGPU params)
|
||||
{
|
||||
#if 0
|
||||
// This needs WGPUInstanceFeatureName_TimedWaitAny which works only with ASYNCIFY enabled
|
||||
WGPUFutureWaitInfo futureWaitInfo;
|
||||
futureWaitInfo.future = future;
|
||||
futureWaitInfo.completed = WGPU_FALSE;
|
||||
uint64 timeoutNS = 5000000000ull; // Wait max 5 second
|
||||
return wgpuInstanceWaitAny(params.Instance, 1, &futureWaitInfo, timeoutNS);
|
||||
#endif
|
||||
|
||||
#if WEBGPU_ASYNCIFY
|
||||
auto startTime = Platform::GetTimeSeconds();
|
||||
int32 ticksLeft = 500; // Wait max 5 second
|
||||
while (Platform::AtomicRead(¶ms.Data->Result) == 0 && ticksLeft-- > 0)
|
||||
emscripten_sleep(10);
|
||||
if (ticksLeft <= 0)
|
||||
{
|
||||
params.Data->WaitTime = Platform::GetTimeSeconds() - startTime;
|
||||
return WGPUWaitStatus_TimedOut;
|
||||
}
|
||||
return params.Data->Result == 1 ? WGPUWaitStatus_Success : WGPUWaitStatus_Error;
|
||||
#else
|
||||
// Not possible to implement it here with stack preservation (need to go back with main thread to the browser)
|
||||
// Make GPU adapter/device requests register custom retry via emscripten_set_main_loop with coroutine or something like that to make it work without ASYNCIFY
|
||||
return WGPUWaitStatus_Error;
|
||||
#endif
|
||||
}
|
||||
|
||||
WGPUVertexFormat RenderToolsWebGPU::ToVertexFormat(PixelFormat format)
|
||||
{
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include "Engine/Core/Types/String.h"
|
||||
#include "IncludeWebGPU.h"
|
||||
#include <emscripten/emscripten.h>
|
||||
|
||||
enum class PixelFormat : unsigned;
|
||||
|
||||
@@ -29,6 +28,13 @@ struct AsyncCallbackDataWebGPU
|
||||
}
|
||||
};
|
||||
|
||||
struct AsyncWaitParamsWebGPU
|
||||
{
|
||||
WGPUInstance Instance;
|
||||
AsyncCallbackDataWebGPU* Data;
|
||||
};
|
||||
WGPUWaitStatus WebGPUAsyncWait(AsyncWaitParamsWebGPU params);
|
||||
|
||||
/// <summary>
|
||||
/// Helper utility to run WebGPU APIs that use async callback in sync by waiting on the spontaneous call back with an active-waiting loop.
|
||||
/// </summary>
|
||||
@@ -45,18 +51,9 @@ struct AsyncCallbackWebGPU
|
||||
Info.userdata1 = &Data;
|
||||
}
|
||||
|
||||
WGPUWaitStatus Wait()
|
||||
FORCE_INLINE WGPUWaitStatus Wait(WGPUInstance instance)
|
||||
{
|
||||
auto startTime = Platform::GetTimeSeconds();
|
||||
int32 ticksLeft = 500; // Wait max 5 second
|
||||
while (Platform::AtomicRead(&Data.Result) == 0 && ticksLeft-- > 0)
|
||||
emscripten_sleep(10);
|
||||
if (ticksLeft <= 0)
|
||||
{
|
||||
Data.WaitTime = Platform::GetTimeSeconds() - startTime;
|
||||
return WGPUWaitStatus_TimedOut;
|
||||
}
|
||||
return Data.Result == 1 ? WGPUWaitStatus_Success : WGPUWaitStatus_Error;
|
||||
return WebGPUAsyncWait({ instance, &Data });
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -5,8 +5,33 @@
|
||||
#include "Engine/Engine/Engine.h"
|
||||
#include <emscripten/emscripten.h>
|
||||
|
||||
// Reference: https://github.com/kainino0x/webgpu-cross-platform-demo/blob/f5c69c6fccbb2584c1b6f9e559f9a41a38a9b5ad/main.cpp#L692-L704
|
||||
// Reference: https://github.com/kainino0x/webgpu-cross-platform-demo/blob/c26ea3e29ed9f73f9b39bddf7964b482ce3c6964/main.cpp#L737-L758
|
||||
#define WEB_LOOP_MODE 2 // 0 - default, 1 - Asyncify, 2 - JSPI
|
||||
#if WEB_LOOP_MODE != 0
|
||||
// Workaround for JSPI not working in emscripten_set_main_loop. Loosely based on this code:
|
||||
// https://github.com/emscripten-core/emscripten/issues/22493#issuecomment-2330275282
|
||||
// This code only works with JSPI is enabled.
|
||||
typedef bool (*FrameCallback)(); // If callback returns true, continues the loop.
|
||||
EM_JS(void, requestAnimationFrameLoopWithJSPI, (FrameCallback callback), {
|
||||
#if WEB_LOOP_MODE == 2
|
||||
var callback = WebAssembly.promising(getWasmTableEntry(callback));
|
||||
#elif WEB_LOOP_MODE == 1
|
||||
var callback = () = > globalThis['Module']['ccall']("callback", "boolean", [], [], { async: true });
|
||||
#endif
|
||||
async function tick() {
|
||||
// Start the frame callback. 'await' means we won't call
|
||||
// requestAnimationFrame again until it completes.
|
||||
var keepLooping = await callback();
|
||||
if (keepLooping) requestAnimationFrame(tick);
|
||||
}
|
||||
requestAnimationFrame(tick);
|
||||
})
|
||||
#endif
|
||||
|
||||
class PlatformMain
|
||||
{
|
||||
#if WEB_LOOP_MODE == 0
|
||||
static void Loop()
|
||||
{
|
||||
// Tick engine
|
||||
@@ -16,11 +41,30 @@ class PlatformMain
|
||||
{
|
||||
// Exit engine
|
||||
Engine::OnExit();
|
||||
emscripten_cancel_main_loop();
|
||||
emscripten_force_exit(Engine::ExitCode);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static bool Loop()
|
||||
{
|
||||
if (Engine::FatalError != FatalErrorType::None)
|
||||
return false;
|
||||
|
||||
// Tick engine
|
||||
Engine::OnLoop();
|
||||
|
||||
if (Engine::ShouldExit())
|
||||
{
|
||||
// Exit engine
|
||||
Engine::OnExit();
|
||||
emscripten_cancel_main_loop();
|
||||
emscripten_force_exit(Engine::ExitCode);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
static int32 Main()
|
||||
@@ -31,7 +75,11 @@ public:
|
||||
return result;
|
||||
|
||||
// Setup main loop to be called by Emscripten
|
||||
#if WEB_LOOP_MODE == 0
|
||||
emscripten_set_main_loop(Loop, -1, false);
|
||||
#else
|
||||
requestAnimationFrameLoopWithJSPI(Loop);
|
||||
#endif
|
||||
emscripten_set_main_loop_timing(EM_TIMING_RAF, 1); // Run main loop on each animation frame (vsync)
|
||||
|
||||
// Run the first loop
|
||||
|
||||
@@ -125,7 +125,6 @@ void WebPlatform::SetThreadAffinityMask(uint64 affinityMask)
|
||||
|
||||
void WebPlatform::Sleep(int32 milliseconds)
|
||||
{
|
||||
//emscripten_sleep(milliseconds);
|
||||
emscripten_thread_sleep(milliseconds);
|
||||
}
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ namespace Flax.Build.Platforms
|
||||
if (options.CompileEnv.FavorSizeOrSpeed == FavorSizeOrSpeed.SmallCode)
|
||||
args.Add("-Oz");
|
||||
if (options.CompileEnv.FavorSizeOrSpeed == FavorSizeOrSpeed.FastCode)
|
||||
args.Add("-O3");
|
||||
args.Add(debugInformation ? "-O2" : "-O3");
|
||||
else if (optimization && options.Configuration == TargetConfiguration.Release)
|
||||
args.Add("-O3");
|
||||
else if (optimization)
|
||||
@@ -290,7 +290,9 @@ namespace Flax.Build.Platforms
|
||||
{
|
||||
args.Add(string.Format("-o \"{0}\"", outputFilePath.Replace('\\', '/')));
|
||||
|
||||
// Debug options
|
||||
//args.Add("--minify=0");
|
||||
//args.Add("-sASSERTIONS=2");
|
||||
|
||||
AddSharedArgs(args, options, options.LinkEnv.DebugInformation, options.LinkEnv.Optimization);
|
||||
|
||||
@@ -307,7 +309,6 @@ namespace Flax.Build.Platforms
|
||||
initialMemory = Math.Max(initialMemory, 64); // Address Sanitizer needs more memory
|
||||
args.Add($"-sINITIAL_MEMORY={initialMemory}MB");
|
||||
args.Add("-sSTACK_SIZE=4MB");
|
||||
args.Add("-sASYNCIFY_STACK_SIZE=8192");
|
||||
args.Add("-sALLOW_MEMORY_GROWTH=1");
|
||||
//args.Add("-sSAFE_HEAP=1");
|
||||
args.Add("-sABORTING_MALLOC=0");
|
||||
|
||||
Reference in New Issue
Block a user