diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp index 5274e3bff..5e1132a73 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp @@ -31,10 +31,10 @@ void* GPUBufferWebGPU::Map(GPUResourceMapMode mode) userData.Call(status == WGPUMapAsyncStatus_Success, status, message); }; wgpuBufferMapAsync(Buffer, mapMode, 0, _desc.Size, mapRequest.Info); - auto mapRequestResult = mapRequest.Wait(); + auto mapRequestResult = mapRequest.Wait(_device->WebGPUInstance); if (mapRequestResult == WGPUWaitStatus_TimedOut) { - LOG(Error, "WebGPU buffer map request has timed out after {}s", mapRequest.Data.WaitTime); + LOG(Error, "WebGPU buffer map request has timed out after {}s", (int32)mapRequest.Data.WaitTime); return nullptr; } if (mapRequestResult == WGPUWaitStatus_Error) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp index 1abf8a386..025db0d3f 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp @@ -618,10 +618,10 @@ bool GPUDeviceWebGPU::Init() userData.Call(status == WGPURequestDeviceStatus_Success, status, message); }; wgpuAdapterRequestDevice(Adapter->Adapter, &deviceDesc, deviceRequest.Info); - auto deviceRequestResult = deviceRequest.Wait(); + auto deviceRequestResult = deviceRequest.Wait(WebGPUInstance); if (deviceRequestResult == WGPUWaitStatus_TimedOut) { - LOG(Fatal, "WebGPU device request has timed out after {}s", deviceRequest.Data.WaitTime); + LOG(Fatal, "WebGPU device request has timed out after {}s", (int32)deviceRequest.Data.WaitTime); return true; } if (deviceRequestResult == WGPUWaitStatus_Error) @@ -701,6 +701,11 @@ GPUDevice* CreateGPUDeviceWebGPU() { // Create instance WGPUInstanceDescriptor instanceDesc = WGPU_INSTANCE_DESCRIPTOR_INIT; +#if !WEBGPU_ASYNCIFY && 0 + WGPUInstanceFeatureName instanceFeatures[1] = { WGPUInstanceFeatureName_TimedWaitAny }; + instanceDesc.requiredFeatureCount = 1; + instanceDesc.requiredFeatures = instanceFeatures; +#endif WGPUInstance instance = wgpuCreateInstance(&instanceDesc); if (!instance) { @@ -727,10 +732,10 @@ GPUDevice* CreateGPUDeviceWebGPU() userData.Call(status == WGPURequestAdapterStatus_Success, status, message); }; wgpuInstanceRequestAdapter(instance, &adapterOptions, adapterRequest.Info); - auto adapterRequestResult = adapterRequest.Wait(); + auto adapterRequestResult = adapterRequest.Wait(instance); if (adapterRequestResult == WGPUWaitStatus_TimedOut) { - LOG(Fatal, "WebGPU adapter request has timed out after {}s", adapterRequest.Data.WaitTime); + LOG(Fatal, "WebGPU adapter request has timed out after {}s", (int32)adapterRequest.Data.WaitTime); return nullptr; } if (adapterRequestResult == WGPUWaitStatus_Error) @@ -796,7 +801,7 @@ void GPUDeviceWebGPU::Dispose() void GPUDeviceWebGPU::WaitForGPU() { - if (QueueSubmits == 0) + if (QueueSubmits == 0 || Engine::FatalError != FatalErrorType::None) return; QueueSubmits = 0; AsyncCallbackWebGPU workDone(WGPU_QUEUE_WORK_DONE_CALLBACK_INFO_INIT); @@ -806,10 +811,10 @@ void GPUDeviceWebGPU::WaitForGPU() userData.Call(status == WGPUQueueWorkDoneStatus_Success, status, message); }; wgpuQueueOnSubmittedWorkDone(Queue, workDone.Info); - auto workDoneResult = workDone.Wait(); + auto workDoneResult = workDone.Wait(WebGPUInstance); if (workDoneResult == WGPUWaitStatus_TimedOut) { - LOG(Error, "WebGPU queue wait has timed out after {}s", workDone.Data.WaitTime); + LOG(Error, "WebGPU queue wait has timed out after {}s", (int32)workDone.Data.WaitTime); return; } if (workDoneResult == WGPUWaitStatus_Error) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GraphicsDeviceWebGPU.Build.cs b/Source/Engine/GraphicsDevice/WebGPU/GraphicsDeviceWebGPU.Build.cs index def5a7f8f..878dd4ecd 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GraphicsDeviceWebGPU.Build.cs +++ b/Source/Engine/GraphicsDevice/WebGPU/GraphicsDeviceWebGPU.Build.cs @@ -9,15 +9,37 @@ using Flax.Build.Platforms; /// public class GraphicsDeviceWebGPU : GraphicsDeviceBaseModule { + /// + /// Using ASYNCIFY leads to simple code by waiting on async WebGPU API callbacks with emscripten_sleep but doubles the code size and adds some overhead. + /// https://emscripten.org/docs/porting/asyncify.html
+ /// 0 - no async
+ /// 1 - via Asyncify (causes the WASM to be much larger)
+ /// 2 - via JSPI (experimental)
+ ///
+ public int WithAsyncify = 2; + /// public override void Setup(BuildOptions options) { base.Setup(options); var port = "--use-port=emdawnwebgpu:cpp_bindings=false"; - options.CompileEnv.CustomArgs.Add(port); - options.LinkEnv.CustomArgs.Add("-sASYNCIFY"); options.OutputFiles.Add(port); + options.CompileEnv.CustomArgs.Add(port); + if (WithAsyncify == 2) + { + options.PrivateDefinitions.Add("WEBGPU_ASYNCIFY=2"); + options.LinkEnv.CustomArgs.Add("-sJSPI"); + options.LinkEnv.CustomArgs.Add("-sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=$getWasmTableEntry"); + } + else if (WithAsyncify == 1) + { + options.PrivateDefinitions.Add("WEBGPU_ASYNCIFY"); + options.LinkEnv.CustomArgs.Add("-sASYNCIFY"); + options.LinkEnv.CustomArgs.Add("-sASYNCIFY_STACK_SIZE=8192"); + //options.LinkEnv.CustomArgs.Add("-sASYNCIFY_ONLY=[\"main\",\"WebGPUAsyncWait(AsyncWaitParamsWebGPU)\"]"); // TODO: try indirect calls only to reduce the code size + options.LinkEnv.CustomArgs.Add("-sEXPORT_ALL"); // This bloats JS but otherwise dynamic calls don't work properly + } options.PublicDefinitions.Add("GRAPHICS_API_WEBGPU"); options.PrivateIncludePaths.Add(Path.Combine(EmscriptenSdk.Instance.EmscriptenPath, "emscripten/cache/ports/emdawnwebgpu/emdawnwebgpu_pkg/webgpu/include")); options.PrivateDependencies.Add("lz4"); diff --git a/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.cpp index 3dbf3f489..02508e790 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.cpp @@ -4,6 +4,36 @@ #include "RenderToolsWebGPU.h" #include "Engine/Graphics/PixelFormat.h" +#include + +WGPUWaitStatus WebGPUAsyncWait(AsyncWaitParamsWebGPU params) +{ +#if 0 + // This needs WGPUInstanceFeatureName_TimedWaitAny which works only with ASYNCIFY enabled + WGPUFutureWaitInfo futureWaitInfo; + futureWaitInfo.future = future; + futureWaitInfo.completed = WGPU_FALSE; + uint64 timeoutNS = 5000000000ull; // Wait max 5 second + return wgpuInstanceWaitAny(params.Instance, 1, &futureWaitInfo, timeoutNS); +#endif + +#if WEBGPU_ASYNCIFY + auto startTime = Platform::GetTimeSeconds(); + int32 ticksLeft = 500; // Wait max 5 second + while (Platform::AtomicRead(¶ms.Data->Result) == 0 && ticksLeft-- > 0) + emscripten_sleep(10); + if (ticksLeft <= 0) + { + params.Data->WaitTime = Platform::GetTimeSeconds() - startTime; + return WGPUWaitStatus_TimedOut; + } + return params.Data->Result == 1 ? WGPUWaitStatus_Success : WGPUWaitStatus_Error; +#else + // Not possible to implement it here with stack preservation (need to go back with main thread to the browser) + // Make GPU adapter/device requests register custom retry via emscripten_set_main_loop with coroutine or something like that to make it work without ASYNCIFY + return WGPUWaitStatus_Error; +#endif +} WGPUVertexFormat RenderToolsWebGPU::ToVertexFormat(PixelFormat format) { diff --git a/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.h index 88824af63..eca17a98c 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/RenderToolsWebGPU.h @@ -6,7 +6,6 @@ #include "Engine/Core/Types/String.h" #include "IncludeWebGPU.h" -#include enum class PixelFormat : unsigned; @@ -29,6 +28,13 @@ struct AsyncCallbackDataWebGPU } }; +struct AsyncWaitParamsWebGPU +{ + WGPUInstance Instance; + AsyncCallbackDataWebGPU* Data; +}; +WGPUWaitStatus WebGPUAsyncWait(AsyncWaitParamsWebGPU params); + /// /// Helper utility to run WebGPU APIs that use async callback in sync by waiting on the spontaneous call back with an active-waiting loop. /// @@ -45,18 +51,9 @@ struct AsyncCallbackWebGPU Info.userdata1 = &Data; } - WGPUWaitStatus Wait() + FORCE_INLINE WGPUWaitStatus Wait(WGPUInstance instance) { - auto startTime = Platform::GetTimeSeconds(); - int32 ticksLeft = 500; // Wait max 5 second - while (Platform::AtomicRead(&Data.Result) == 0 && ticksLeft-- > 0) - emscripten_sleep(10); - if (ticksLeft <= 0) - { - Data.WaitTime = Platform::GetTimeSeconds() - startTime; - return WGPUWaitStatus_TimedOut; - } - return Data.Result == 1 ? WGPUWaitStatus_Success : WGPUWaitStatus_Error; + return WebGPUAsyncWait({ instance, &Data }); } }; diff --git a/Source/Engine/Main/Web/main.cpp b/Source/Engine/Main/Web/main.cpp index dea543f24..53aef16f8 100644 --- a/Source/Engine/Main/Web/main.cpp +++ b/Source/Engine/Main/Web/main.cpp @@ -5,8 +5,33 @@ #include "Engine/Engine/Engine.h" #include +// Reference: https://github.com/kainino0x/webgpu-cross-platform-demo/blob/f5c69c6fccbb2584c1b6f9e559f9a41a38a9b5ad/main.cpp#L692-L704 +// Reference: https://github.com/kainino0x/webgpu-cross-platform-demo/blob/c26ea3e29ed9f73f9b39bddf7964b482ce3c6964/main.cpp#L737-L758 +#define WEB_LOOP_MODE 2 // 0 - default, 1 - Asyncify, 2 - JSPI +#if WEB_LOOP_MODE != 0 +// Workaround for JSPI not working in emscripten_set_main_loop. Loosely based on this code: +// https://github.com/emscripten-core/emscripten/issues/22493#issuecomment-2330275282 +// This code only works with JSPI is enabled. +typedef bool (*FrameCallback)(); // If callback returns true, continues the loop. +EM_JS(void, requestAnimationFrameLoopWithJSPI, (FrameCallback callback), { +#if WEB_LOOP_MODE == 2 + var callback = WebAssembly.promising(getWasmTableEntry(callback)); +#elif WEB_LOOP_MODE == 1 + var callback = () = > globalThis['Module']['ccall']("callback", "boolean", [], [], { async: true }); +#endif + async function tick() { + // Start the frame callback. 'await' means we won't call + // requestAnimationFrame again until it completes. + var keepLooping = await callback(); + if (keepLooping) requestAnimationFrame(tick); + } + requestAnimationFrame(tick); + }) +#endif + class PlatformMain { +#if WEB_LOOP_MODE == 0 static void Loop() { // Tick engine @@ -16,11 +41,30 @@ class PlatformMain { // Exit engine Engine::OnExit(); - emscripten_cancel_main_loop(); - emscripten_force_exit(Engine::ExitCode); return; } } +#else + static bool Loop() + { + if (Engine::FatalError != FatalErrorType::None) + return false; + + // Tick engine + Engine::OnLoop(); + + if (Engine::ShouldExit()) + { + // Exit engine + Engine::OnExit(); + emscripten_cancel_main_loop(); + emscripten_force_exit(Engine::ExitCode); + return false; + } + + return true; + } +#endif public: static int32 Main() @@ -31,7 +75,11 @@ public: return result; // Setup main loop to be called by Emscripten +#if WEB_LOOP_MODE == 0 emscripten_set_main_loop(Loop, -1, false); +#else + requestAnimationFrameLoopWithJSPI(Loop); +#endif emscripten_set_main_loop_timing(EM_TIMING_RAF, 1); // Run main loop on each animation frame (vsync) // Run the first loop diff --git a/Source/Engine/Platform/Web/WebPlatform.cpp b/Source/Engine/Platform/Web/WebPlatform.cpp index eeb448e1e..20b8720d6 100644 --- a/Source/Engine/Platform/Web/WebPlatform.cpp +++ b/Source/Engine/Platform/Web/WebPlatform.cpp @@ -125,7 +125,6 @@ void WebPlatform::SetThreadAffinityMask(uint64 affinityMask) void WebPlatform::Sleep(int32 milliseconds) { - //emscripten_sleep(milliseconds); emscripten_thread_sleep(milliseconds); } diff --git a/Source/Tools/Flax.Build/Platforms/Web/WebToolchain.cs b/Source/Tools/Flax.Build/Platforms/Web/WebToolchain.cs index f686b280c..28afe7eb9 100644 --- a/Source/Tools/Flax.Build/Platforms/Web/WebToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Web/WebToolchain.cs @@ -123,7 +123,7 @@ namespace Flax.Build.Platforms if (options.CompileEnv.FavorSizeOrSpeed == FavorSizeOrSpeed.SmallCode) args.Add("-Oz"); if (options.CompileEnv.FavorSizeOrSpeed == FavorSizeOrSpeed.FastCode) - args.Add("-O3"); + args.Add(debugInformation ? "-O2" : "-O3"); else if (optimization && options.Configuration == TargetConfiguration.Release) args.Add("-O3"); else if (optimization) @@ -290,7 +290,9 @@ namespace Flax.Build.Platforms { args.Add(string.Format("-o \"{0}\"", outputFilePath.Replace('\\', '/'))); + // Debug options //args.Add("--minify=0"); + //args.Add("-sASSERTIONS=2"); AddSharedArgs(args, options, options.LinkEnv.DebugInformation, options.LinkEnv.Optimization); @@ -307,7 +309,6 @@ namespace Flax.Build.Platforms initialMemory = Math.Max(initialMemory, 64); // Address Sanitizer needs more memory args.Add($"-sINITIAL_MEMORY={initialMemory}MB"); args.Add("-sSTACK_SIZE=4MB"); - args.Add("-sASYNCIFY_STACK_SIZE=8192"); args.Add("-sALLOW_MEMORY_GROWTH=1"); //args.Add("-sSAFE_HEAP=1"); args.Add("-sABORTING_MALLOC=0");