9#ifndef __STDC_FORMAT_MACROS
10#define __STDC_FORMAT_MACROS
15#include "vulkan/vulkan.h"
16#include "glslang_c_interface.h"
17#elif(VKFFT_BACKEND==1)
19#include <cuda_runtime.h>
21#include <cuda_runtime_api.h>
23#elif(VKFFT_BACKEND==2)
24#ifndef __HIP_PLATFORM_HCC__
25#define __HIP_PLATFORM_HCC__
27#include <hip/hip_runtime.h>
28#include <hip/hiprtc.h>
29#include <hip/hip_runtime_api.h>
30#include <hip/hip_complex.h>
31#elif(VKFFT_BACKEND==3)
32#ifndef CL_USE_DEPRECATED_OPENCL_1_2_APIS
33#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
36#include <OpenCL/opencl.h>
44VkResult
CreateDebugUtilsMessengerEXT(
VkGPU* vkGPU,
const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) {
46 PFN_vkCreateDebugUtilsMessengerEXT func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vkGPU->
instance,
"vkCreateDebugUtilsMessengerEXT");
48 return func(vkGPU->
instance, pCreateInfo, pAllocator, pDebugMessenger);
51 return VK_ERROR_EXTENSION_NOT_PRESENT;
56 PFN_vkDestroyDebugUtilsMessengerEXT func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vkGPU->
instance,
"vkDestroyDebugUtilsMessengerEXT");
61static VKAPI_ATTR VkBool32 VKAPI_CALL
debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType,
const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
void* pUserData) {
62 printf(
"validation layer: %s\n", pCallbackData->pMessage);
71 VkDebugUtilsMessengerCreateInfoEXT createInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT };
72 createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
73 createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
77 return VK_ERROR_INITIALIZATION_FAILED;
84 vkEnumerateInstanceLayerProperties(&layerCount, NULL);
86 VkLayerProperties* availableLayers = (VkLayerProperties*)malloc(
sizeof(VkLayerProperties) * layerCount);
87 if (!availableLayers)
return VK_INCOMPLETE;
88 vkEnumerateInstanceLayerProperties(&layerCount, availableLayers);
89 if (availableLayers) {
90 for (uint64_t i = 0; i < layerCount; i++) {
91 if (strcmp(
"VK_LAYER_KHRONOS_validation", availableLayers[i].layerName) == 0) {
92 free(availableLayers);
96 free(availableLayers);
101 return VK_ERROR_LAYER_NOT_PRESENT;
105 std::vector<const char*> extensions;
108 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
111#if (VK_API_VERSION>10)
113 extensions.push_back(
"VK_KHR_get_physical_device_properties2");
126 VkResult res = VK_SUCCESS;
130 if (res != VK_SUCCESS)
return res;
133 VkApplicationInfo applicationInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO };
134 applicationInfo.pApplicationName =
"VkFFT";
136 applicationInfo.pEngineName =
"VkFFT";
137 applicationInfo.engineVersion = 1;
138#if (VK_API_VERSION>=12)
139 applicationInfo.apiVersion = VK_API_VERSION_1_2;
140#elif (VK_API_VERSION==11)
141 applicationInfo.apiVersion = VK_API_VERSION_1_1;
143 applicationInfo.apiVersion = VK_API_VERSION_1_0;
146 VkInstanceCreateInfo createInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO };
147 createInfo.flags = 0;
148 createInfo.pApplicationInfo = &applicationInfo;
151 createInfo.enabledExtensionCount = (uint32_t)(extensions.size());
152 createInfo.ppEnabledExtensionNames = extensions.data();
154 VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT };
157 createInfo.enabledLayerCount = 1;
158 const char* validationLayers =
"VK_LAYER_KHRONOS_validation";
159 createInfo.ppEnabledLayerNames = &validationLayers;
160 debugCreateInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
161 debugCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
163 createInfo.pNext = (VkDebugUtilsMessengerCreateInfoEXT*)&debugCreateInfo;
166 createInfo.enabledLayerCount = 0;
168 createInfo.pNext =
nullptr;
171 res = vkCreateInstance(&createInfo, NULL, &vkGPU->
instance);
172 if (res != VK_SUCCESS)
return res;
179 VkResult res = VK_SUCCESS;
180 uint32_t deviceCount;
181 res = vkEnumeratePhysicalDevices(vkGPU->
instance, &deviceCount, NULL);
182 if (res != VK_SUCCESS)
return res;
183 if (deviceCount == 0) {
184 return VK_ERROR_DEVICE_LOST;
187 VkPhysicalDevice* devices = (VkPhysicalDevice*)malloc(
sizeof(VkPhysicalDevice) * deviceCount);
188 if (!devices)
return VK_INCOMPLETE;
189 res = vkEnumeratePhysicalDevices(vkGPU->
instance, &deviceCount, devices);
190 if (res != VK_SUCCESS)
return res;
197 return VK_INCOMPLETE;
201 uint32_t queueFamilyCount;
204 VkQueueFamilyProperties* queueFamilies = (VkQueueFamilyProperties*)malloc(
sizeof(VkQueueFamilyProperties) * queueFamilyCount);
205 if (!queueFamilies)
return VK_INCOMPLETE;
209 for (; i < queueFamilyCount; i++) {
210 VkQueueFamilyProperties props = queueFamilies[i];
212 if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
217 if (i == queueFamilyCount) {
218 return VK_ERROR_INITIALIZATION_FAILED;
224 return VK_INCOMPLETE;
229 VkResult res = VK_SUCCESS;
230 VkDeviceQueueCreateInfo queueCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO };
232 if (res != VK_SUCCESS)
return res;
234 queueCreateInfo.queueCount = 1;
235 float queuePriorities = 1.0;
236 queueCreateInfo.pQueuePriorities = &queuePriorities;
237 VkDeviceCreateInfo deviceCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
238 VkPhysicalDeviceFeatures deviceFeatures = {};
240 case 1:
case 12:
case 17:
case 18:
case 101:
case 201:
case 1001: {
241 deviceFeatures.shaderFloat64 =
true;
244 deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
245 deviceCreateInfo.queueCreateInfoCount = 1;
246 deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
248 if (res != VK_SUCCESS)
return res;
252#if (VK_API_VERSION>10)
254 VkPhysicalDeviceFeatures2 deviceFeatures2 = {};
255 VkPhysicalDevice16BitStorageFeatures shaderFloat16 = {};
256 shaderFloat16.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES;
257 shaderFloat16.storageBuffer16BitAccess =
true;
262 deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
263 deviceFeatures2.pNext = &shaderFloat16;
264 deviceFeatures2.features = deviceFeatures;
265 vkGetPhysicalDeviceFeatures2(vkGPU->
physicalDevice, &deviceFeatures2);
266 deviceCreateInfo.pNext = &deviceFeatures2;
270 deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
271 deviceCreateInfo.queueCreateInfoCount = 1;
272 deviceCreateInfo.pEnabledFeatures = NULL;
274 if (res != VK_SUCCESS)
return res;
282 deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
283 deviceCreateInfo.queueCreateInfoCount = 1;
284 deviceCreateInfo.pEnabledFeatures = NULL;
285 deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
287 if (res != VK_SUCCESS)
return res;
296 VkResult res = VK_SUCCESS;
297 VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
298 fenceCreateInfo.flags = 0;
299 res = vkCreateFence(vkGPU->
device, &fenceCreateInfo, NULL, &vkGPU->
fence);
304 VkResult res = VK_SUCCESS;
305 VkCommandPoolCreateInfo commandPoolCreateInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
306 commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
308 res = vkCreateCommandPool(vkGPU->
device, &commandPoolCreateInfo, NULL, &vkGPU->
commandPool);
313 VkPhysicalDeviceMemoryProperties memoryProperties = { 0 };
315 vkGetPhysicalDeviceMemoryProperties(vkGPU->
physicalDevice, &memoryProperties);
317 for (uint64_t i = 0; i < memoryProperties.memoryTypeCount; ++i) {
318 if ((memoryTypeBits & ((uint64_t)1 << i)) && ((memoryProperties.memoryTypes[i].propertyFlags & properties) == properties) && (memoryProperties.memoryHeaps[memoryProperties.memoryTypes[i].heapIndex].size >= memorySize))
320 memoryTypeIndex[0] = (uint32_t)i;
327VkFFTResult allocateBuffer(
VkGPU* vkGPU, VkBuffer* buffer, VkDeviceMemory* deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size) {
330 VkResult res = VK_SUCCESS;
331 uint32_t queueFamilyIndices;
332 VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
333 bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
334 bufferCreateInfo.queueFamilyIndexCount = 1;
335 bufferCreateInfo.pQueueFamilyIndices = &queueFamilyIndices;
336 bufferCreateInfo.size = size;
337 bufferCreateInfo.usage = usageFlags;
338 res = vkCreateBuffer(vkGPU->
device, &bufferCreateInfo, NULL, buffer);
340 VkMemoryRequirements memoryRequirements = { 0 };
341 vkGetBufferMemoryRequirements(vkGPU->
device, buffer[0], &memoryRequirements);
342 VkMemoryAllocateInfo memoryAllocateInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
343 memoryAllocateInfo.allocationSize = memoryRequirements.size;
344 resFFT =
findMemoryType(vkGPU, memoryRequirements.memoryTypeBits, memoryRequirements.size, propertyFlags, &memoryAllocateInfo.memoryTypeIndex);
346 res = vkAllocateMemory(vkGPU->
device, &memoryAllocateInfo, NULL, deviceMemory);
348 res = vkBindBufferMemory(vkGPU->
device, buffer[0], deviceMemory[0], 0);
355 VkResult res = VK_SUCCESS;
356 uint64_t stagingBufferSize = bufferSize;
357 VkBuffer stagingBuffer = { 0 };
358 VkDeviceMemory stagingBufferMemory = { 0 };
359 resFFT =
allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
362 res = vkMapMemory(vkGPU->
device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
364 memcpy(data, arr, stagingBufferSize);
365 vkUnmapMemory(vkGPU->
device, stagingBufferMemory);
366 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
367 commandBufferAllocateInfo.commandPool = vkGPU->
commandPool;
368 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
369 commandBufferAllocateInfo.commandBufferCount = 1;
370 VkCommandBuffer commandBuffer = { 0 };
371 res = vkAllocateCommandBuffers(vkGPU->
device, &commandBufferAllocateInfo, &commandBuffer);
373 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
374 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
375 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
377 VkBufferCopy copyRegion = { 0 };
378 copyRegion.srcOffset = 0;
379 copyRegion.dstOffset = 0;
380 copyRegion.size = stagingBufferSize;
381 vkCmdCopyBuffer(commandBuffer, stagingBuffer, buffer[0], 1, ©Region);
382 res = vkEndCommandBuffer(commandBuffer);
384 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
385 submitInfo.commandBufferCount = 1;
386 submitInfo.pCommandBuffers = &commandBuffer;
387 res = vkQueueSubmit(vkGPU->
queue, 1, &submitInfo, vkGPU->
fence);
389 res = vkWaitForFences(vkGPU->
device, 1, &vkGPU->
fence, VK_TRUE, 100000000000);
391 res = vkResetFences(vkGPU->
device, 1, &vkGPU->
fence);
394 vkDestroyBuffer(vkGPU->
device, stagingBuffer, NULL);
395 vkFreeMemory(vkGPU->
device, stagingBufferMemory, NULL);
401 VkResult res = VK_SUCCESS;
402 uint64_t stagingBufferSize = bufferSize;
403 VkBuffer stagingBuffer = { 0 };
404 VkDeviceMemory stagingBufferMemory = { 0 };
405 resFFT =
allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
407 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
408 commandBufferAllocateInfo.commandPool = vkGPU->
commandPool;
409 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
410 commandBufferAllocateInfo.commandBufferCount = 1;
411 VkCommandBuffer commandBuffer = { 0 };
412 res = vkAllocateCommandBuffers(vkGPU->
device, &commandBufferAllocateInfo, &commandBuffer);
414 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
415 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
416 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
418 VkBufferCopy copyRegion = { 0 };
419 copyRegion.srcOffset = 0;
420 copyRegion.dstOffset = 0;
421 copyRegion.size = stagingBufferSize;
422 vkCmdCopyBuffer(commandBuffer, buffer[0], stagingBuffer, 1, ©Region);
423 res = vkEndCommandBuffer(commandBuffer);
425 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
426 submitInfo.commandBufferCount = 1;
427 submitInfo.pCommandBuffers = &commandBuffer;
428 res = vkQueueSubmit(vkGPU->
queue, 1, &submitInfo, vkGPU->
fence);
430 res = vkWaitForFences(vkGPU->
device, 1, &vkGPU->
fence, VK_TRUE, 100000000000);
432 res = vkResetFences(vkGPU->
device, 1, &vkGPU->
fence);
436 res = vkMapMemory(vkGPU->
device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
438 memcpy(arr, data, stagingBufferSize);
439 vkUnmapMemory(vkGPU->
device, stagingBufferMemory);
440 vkDestroyBuffer(vkGPU->
device, stagingBuffer, NULL);
441 vkFreeMemory(vkGPU->
device, stagingBufferMemory, NULL);
448 VkResult res = VK_SUCCESS;
449 VkInstance local_instance = { 0 };
450 VkInstanceCreateInfo createInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO };
451 createInfo.flags = 0;
452 createInfo.pApplicationInfo = NULL;
453 VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT };
454 createInfo.enabledLayerCount = 0;
455 createInfo.enabledExtensionCount = 0;
456 createInfo.pNext = NULL;
457 res = vkCreateInstance(&createInfo, NULL, &local_instance);
460 uint32_t deviceCount;
461 res = vkEnumeratePhysicalDevices(local_instance, &deviceCount, NULL);
464 VkPhysicalDevice* devices = (VkPhysicalDevice*)malloc(
sizeof(VkPhysicalDevice) * deviceCount);
467 res = vkEnumeratePhysicalDevices(local_instance, &deviceCount, devices);
469 for (uint64_t i = 0; i < deviceCount; i++) {
470 VkPhysicalDeviceProperties device_properties;
471 vkGetPhysicalDeviceProperties(devices[i], &device_properties);
472 printf(
"Device id: %" PRIu64
" name: %s API:%d.%d.%d\n", i, device_properties.deviceName, (device_properties.apiVersion >> 22), ((device_properties.apiVersion >> 12) & 0x3ff), (device_properties.apiVersion & 0xfff));
478 vkDestroyInstance(local_instance, NULL);
479#elif(VKFFT_BACKEND==1)
480 CUresult res = CUDA_SUCCESS;
484 res = cuDeviceGetCount(&numDevices);
486 for (uint64_t i = 0; i < numDevices; i++) {
487 char deviceName[256];
488 CUdevice device = {};
489 res = cuDeviceGet(&device, (
int)i);
491 res = cuDeviceGetName(deviceName, 256, device);
493 printf(
"Device id: %" PRIu64
" name: %s\n", i, deviceName);
495#elif(VKFFT_BACKEND==2)
496 hipError_t res = hipSuccess;
500 res = hipGetDeviceCount(&numDevices);
502 for (uint64_t i = 0; i < numDevices; i++) {
503 char deviceName[256];
504 hipDevice_t device = {};
505 res = hipDeviceGet(&device, i);
507 res = hipDeviceGetName(deviceName, 256, device);
509 printf(
"Device id: %" PRIu64
" name: %s\n", i, deviceName);
511#elif(VKFFT_BACKEND==3)
512 cl_int res = CL_SUCCESS;
513 cl_uint numPlatforms;
514 res = clGetPlatformIDs(0, 0, &numPlatforms);
516 cl_platform_id* platforms = (cl_platform_id*)malloc(
sizeof(cl_platform_id) * numPlatforms);
518 res = clGetPlatformIDs(numPlatforms, platforms, 0);
521 for (uint64_t j = 0; j < numPlatforms; j++) {
523 res = clGetDeviceIDs(platforms[j], CL_DEVICE_TYPE_ALL, 0, 0, &numDevices);
524 cl_device_id* deviceList = (cl_device_id*)malloc(
sizeof(cl_device_id) * numDevices);
526 res = clGetDeviceIDs(platforms[j], CL_DEVICE_TYPE_ALL, numDevices, deviceList, 0);
528 for (uint64_t i = 0; i < numDevices; i++) {
529 char deviceName[256];
530 char apiVersion[256];
531 res = clGetDeviceInfo(deviceList[i], CL_DEVICE_NAME, 256 *
sizeof(
char), deviceName, 0);
533 res = clGetDeviceInfo(deviceList[i], CL_DEVICE_VERSION, 256 *
sizeof(
char), apiVersion, 0);
535 printf(
"Platform id: %" PRIu64
" Device id: %" PRIu64
" name: %s API:%s\n", j, k, deviceName, apiVersion);
547 VkResult res = VK_SUCCESS;
548 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
549 commandBufferAllocateInfo.commandPool = vkGPU->
commandPool;
550 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
551 commandBufferAllocateInfo.commandBufferCount = 1;
552 VkCommandBuffer commandBuffer = {};
553 res = vkAllocateCommandBuffers(vkGPU->
device, &commandBufferAllocateInfo, &commandBuffer);
555 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
556 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
557 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
561 for (uint64_t i = 0; i < num_iter; i++) {
565 res = vkEndCommandBuffer(commandBuffer);
567 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
568 submitInfo.commandBufferCount = 1;
569 submitInfo.pCommandBuffers = &commandBuffer;
570 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
571 res = vkQueueSubmit(vkGPU->
queue, 1, &submitInfo, vkGPU->
fence);
573 res = vkWaitForFences(vkGPU->
device, 1, &vkGPU->
fence, VK_TRUE, 100000000000);
575 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
576 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
578 res = vkResetFences(vkGPU->
device, 1, &vkGPU->
fence);
581#elif(VKFFT_BACKEND==1)
582 cudaError_t res = cudaSuccess;
583 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
584 for (uint64_t i = 0; i < num_iter; i++) {
588 res = cudaDeviceSynchronize();
590 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
591 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
592#elif(VKFFT_BACKEND==2)
593 hipError_t res = hipSuccess;
594 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
595 for (uint64_t i = 0; i < num_iter; i++) {
599 res = hipDeviceSynchronize();
601 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
602 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
603#elif(VKFFT_BACKEND==3)
604 cl_int res = CL_SUCCESS;
605 launchParams->commandQueue = &vkGPU->commandQueue;
606 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
607 for (uint64_t i = 0; i < num_iter; i++) {
611 res = clFinish(vkGPU->commandQueue);
613 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
614 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
621 VkResult res = VK_SUCCESS;
622 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
623 commandBufferAllocateInfo.commandPool = vkGPU->
commandPool;
624 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
625 commandBufferAllocateInfo.commandBufferCount = 1;
626 VkCommandBuffer commandBuffer = {};
627 res = vkAllocateCommandBuffers(vkGPU->
device, &commandBufferAllocateInfo, &commandBuffer);
629 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
630 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
631 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
634 for (uint64_t i = 0; i < num_iter; i++) {
640 res = vkEndCommandBuffer(commandBuffer);
642 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
643 submitInfo.commandBufferCount = 1;
644 submitInfo.pCommandBuffers = &commandBuffer;
645 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
646 res = vkQueueSubmit(vkGPU->
queue, 1, &submitInfo, vkGPU->
fence);
648 res = vkWaitForFences(vkGPU->
device, 1, &vkGPU->
fence, VK_TRUE, 100000000000);
650 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
651 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
652 time_result[0] = totTime / num_iter;
653 res = vkResetFences(vkGPU->
device, 1, &vkGPU->
fence);
656#elif(VKFFT_BACKEND==1)
657 cudaError_t res = cudaSuccess;
658 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
659 for (uint64_t i = 0; i < num_iter; i++) {
665 res = cudaDeviceSynchronize();
667 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
668 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
669 time_result[0] = totTime / num_iter;
670#elif(VKFFT_BACKEND==2)
671 hipError_t res = hipSuccess;
672 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
673 for (uint64_t i = 0; i < num_iter; i++) {
679 res = hipDeviceSynchronize();
681 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
682 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
683 time_result[0] = totTime / num_iter;
684#elif(VKFFT_BACKEND==3)
685 cl_int res = CL_SUCCESS;
686 launchParams->commandQueue = &vkGPU->commandQueue;
687 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
688 for (uint64_t i = 0; i < num_iter; i++) {
694 res = clFinish(vkGPU->commandQueue);
696 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
697 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
698 time_result[0] = totTime / num_iter;
static int VkFFTGetVersion()
static VkFFTResult VkFFTAppend(VkFFTApplication *app, int inverse, VkFFTLaunchParams *launchParams)
@ VKFFT_ERROR_FAILED_TO_ALLOCATE_MEMORY
@ VKFFT_ERROR_FAILED_TO_RESET_FENCES
@ VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER
@ VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER
@ VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES
@ VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY
@ VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES
@ VKFFT_ERROR_FAILED_TO_CREATE_BUFFER
@ VKFFT_ERROR_FAILED_TO_INITIALIZE
@ VKFFT_ERROR_FAILED_TO_GET_DEVICE
@ VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE
@ VKFFT_ERROR_FAILED_TO_SYNCHRONIZE
@ VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE
@ VKFFT_ERROR_FAILED_TO_FIND_MEMORY
@ VKFFT_ERROR_MALLOC_FAILED
@ VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS
@ VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID
VkResult createFence(VkGPU *vkGPU)
VkFFTResult transferDataToCPU(VkGPU *vkGPU, void *arr, VkBuffer *buffer, uint64_t bufferSize)
VkResult CreateDebugUtilsMessengerEXT(VkGPU *vkGPU, const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDebugUtilsMessengerEXT *pDebugMessenger)
VkFFTResult findMemoryType(VkGPU *vkGPU, uint64_t memoryTypeBits, uint64_t memorySize, VkMemoryPropertyFlags properties, uint32_t *memoryTypeIndex)
VkFFTResult performVulkanFFTiFFT(VkGPU *vkGPU, VkFFTApplication *app, VkFFTLaunchParams *launchParams, uint64_t num_iter, double *time_result)
VkResult setupDebugMessenger(VkGPU *vkGPU)
VkResult createCommandPool(VkGPU *vkGPU)
VkFFTResult transferDataFromCPU(VkGPU *vkGPU, void *arr, VkBuffer *buffer, uint64_t bufferSize)
VkResult checkValidationLayerSupport()
VkResult createInstance(VkGPU *vkGPU, uint64_t sample_id)
VkFFTResult devices_list()
VkResult createDevice(VkGPU *vkGPU, uint64_t sample_id)
VkResult getComputeQueueFamilyIndex(VkGPU *vkGPU)
void DestroyDebugUtilsMessengerEXT(VkGPU *vkGPU, const VkAllocationCallbacks *pAllocator)
static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData)
VkFFTResult performVulkanFFT(VkGPU *vkGPU, VkFFTApplication *app, VkFFTLaunchParams *launchParams, int inverse, uint64_t num_iter)
VkFFTResult allocateBuffer(VkGPU *vkGPU, VkBuffer *buffer, VkDeviceMemory *deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size)
std::vector< const char * > getRequiredExtensions(VkGPU *vkGPU, uint64_t sample_id)
VkResult findPhysicalDevice(VkGPU *vkGPU)
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueCount, NULL)
VkCommandBuffer * commandBuffer
uint64_t enableValidationLayers
VkCommandPool commandPool
uint64_t queueFamilyIndex
VkPhysicalDevice physicalDevice
VkDebugUtilsMessengerEXT debugMessenger
std::vector< const char * > enabledDeviceExtensions