PeriDyno 1.0.0
Loading...
Searching...
No Matches
VkFFT_Utils.cpp
Go to the documentation of this file.
1//general parts
2#include <stdio.h>
3#include <vector>
4#include <memory>
5#include <string.h>
6#include <chrono>
7#include <thread>
8#include <iostream>
9#ifndef __STDC_FORMAT_MACROS
10#define __STDC_FORMAT_MACROS
11#endif
12#include <inttypes.h>
13
14#if(VKFFT_BACKEND==0)
15#include "vulkan/vulkan.h"
16#include "glslang_c_interface.h"
17#elif(VKFFT_BACKEND==1)
18#include <cuda.h>
19#include <cuda_runtime.h>
20#include <nvrtc.h>
21#include <cuda_runtime_api.h>
22#include <cuComplex.h>
23#elif(VKFFT_BACKEND==2)
24#ifndef __HIP_PLATFORM_HCC__
25#define __HIP_PLATFORM_HCC__
26#endif
27#include <hip/hip_runtime.h>
28#include <hip/hiprtc.h>
29#include <hip/hip_runtime_api.h>
30#include <hip/hip_complex.h>
31#elif(VKFFT_BACKEND==3)
32#ifndef CL_USE_DEPRECATED_OPENCL_1_2_APIS
33#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
34#endif
35#ifdef __APPLE__
36#include <OpenCL/opencl.h>
37#else
38#include <CL/cl.h>
39#endif
40#endif
41#include "VkFFT_Utils.h"
42#if(VKFFT_BACKEND==0)
43
44VkResult CreateDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) {
45 //pointer to the function, as it is not part of the core. Function creates debugging messenger
46 PFN_vkCreateDebugUtilsMessengerEXT func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vkGPU->instance, "vkCreateDebugUtilsMessengerEXT");
47 if (func != NULL) {
48 return func(vkGPU->instance, pCreateInfo, pAllocator, pDebugMessenger);
49 }
50 else {
51 return VK_ERROR_EXTENSION_NOT_PRESENT;
52 }
53}
54void DestroyDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkAllocationCallbacks* pAllocator) {
55 //pointer to the function, as it is not part of the core. Function destroys debugging messenger
56 PFN_vkDestroyDebugUtilsMessengerEXT func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vkGPU->instance, "vkDestroyDebugUtilsMessengerEXT");
57 if (func != NULL) {
58 func(vkGPU->instance, vkGPU->debugMessenger, pAllocator);
59 }
60}
61static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) {
62 printf("validation layer: %s\n", pCallbackData->pMessage);
63 return VK_FALSE;
64}
65
66
67VkResult setupDebugMessenger(VkGPU* vkGPU) {
68 //function that sets up the debugging messenger
69 if (vkGPU->enableValidationLayers == 0) return VK_SUCCESS;
70
71 VkDebugUtilsMessengerCreateInfoEXT createInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT };
72 createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
73 createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
74 createInfo.pfnUserCallback = debugCallback;
75
76 if (CreateDebugUtilsMessengerEXT(vkGPU, &createInfo, NULL, &vkGPU->debugMessenger) != VK_SUCCESS) {
77 return VK_ERROR_INITIALIZATION_FAILED;
78 }
79 return VK_SUCCESS;
80}
82 //check if validation layers are supported when an instance is created
83 uint32_t layerCount;
84 vkEnumerateInstanceLayerProperties(&layerCount, NULL);
85
86 VkLayerProperties* availableLayers = (VkLayerProperties*)malloc(sizeof(VkLayerProperties) * layerCount);
87 if (!availableLayers) return VK_INCOMPLETE;
88 vkEnumerateInstanceLayerProperties(&layerCount, availableLayers);
89 if (availableLayers) {
90 for (uint64_t i = 0; i < layerCount; i++) {
91 if (strcmp("VK_LAYER_KHRONOS_validation", availableLayers[i].layerName) == 0) {
92 free(availableLayers);
93 return VK_SUCCESS;
94 }
95 }
96 free(availableLayers);
97 }
98 else {
99 return VK_INCOMPLETE;
100 }
101 return VK_ERROR_LAYER_NOT_PRESENT;
102}
103
104std::vector<const char*> getRequiredExtensions(VkGPU* vkGPU, uint64_t sample_id) {
105 std::vector<const char*> extensions;
106
107 if (vkGPU->enableValidationLayers) {
108 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
109 }
110 switch (sample_id) {
111#if (VK_API_VERSION>10)
112 case 2: case 102:
113 extensions.push_back("VK_KHR_get_physical_device_properties2");
114 break;
115#endif
116 default:
117 break;
118 }
119
120
121 return extensions;
122}
123
124VkResult createInstance(VkGPU* vkGPU, uint64_t sample_id) {
125 //create instance - a connection between the application and the Vulkan library
126 VkResult res = VK_SUCCESS;
127 //check if validation layers are supported
128 if (vkGPU->enableValidationLayers == 1) {
130 if (res != VK_SUCCESS) return res;
131 }
132
133 VkApplicationInfo applicationInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO };
134 applicationInfo.pApplicationName = "VkFFT";
135 applicationInfo.applicationVersion = (uint32_t)VkFFTGetVersion();
136 applicationInfo.pEngineName = "VkFFT";
137 applicationInfo.engineVersion = 1;
138#if (VK_API_VERSION>=12)
139 applicationInfo.apiVersion = VK_API_VERSION_1_2;
140#elif (VK_API_VERSION==11)
141 applicationInfo.apiVersion = VK_API_VERSION_1_1;
142#else
143 applicationInfo.apiVersion = VK_API_VERSION_1_0;
144#endif
145
146 VkInstanceCreateInfo createInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO };
147 createInfo.flags = 0;
148 createInfo.pApplicationInfo = &applicationInfo;
149
150 auto extensions = getRequiredExtensions(vkGPU, sample_id);
151 createInfo.enabledExtensionCount = (uint32_t)(extensions.size());
152 createInfo.ppEnabledExtensionNames = extensions.data();
153
154 VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT };
155 if (vkGPU->enableValidationLayers) {
156 //query for the validation layer support in the instance
157 createInfo.enabledLayerCount = 1;
158 const char* validationLayers = "VK_LAYER_KHRONOS_validation";
159 createInfo.ppEnabledLayerNames = &validationLayers;
160 debugCreateInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
161 debugCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
162 debugCreateInfo.pfnUserCallback = debugCallback;
163 createInfo.pNext = (VkDebugUtilsMessengerCreateInfoEXT*)&debugCreateInfo;
164 }
165 else {
166 createInfo.enabledLayerCount = 0;
167
168 createInfo.pNext = nullptr;
169 }
170
171 res = vkCreateInstance(&createInfo, NULL, &vkGPU->instance);
172 if (res != VK_SUCCESS) return res;
173
174 return res;
175}
176
177VkResult findPhysicalDevice(VkGPU* vkGPU) {
178 //check if there are GPUs that support Vulkan and select one
179 VkResult res = VK_SUCCESS;
180 uint32_t deviceCount;
181 res = vkEnumeratePhysicalDevices(vkGPU->instance, &deviceCount, NULL);
182 if (res != VK_SUCCESS) return res;
183 if (deviceCount == 0) {
184 return VK_ERROR_DEVICE_LOST;
185 }
186
187 VkPhysicalDevice* devices = (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * deviceCount);
188 if (!devices) return VK_INCOMPLETE;
189 res = vkEnumeratePhysicalDevices(vkGPU->instance, &deviceCount, devices);
190 if (res != VK_SUCCESS) return res;
191 if (devices) {
192 vkGPU->physicalDevice = devices[vkGPU->device_id];
193 free(devices);
194 return VK_SUCCESS;
195 }
196 else
197 return VK_INCOMPLETE;
198}
200 //find a queue family for a selected GPU, select the first available for use
201 uint32_t queueFamilyCount;
202 vkGetPhysicalDeviceQueueFamilyProperties(vkGPU->physicalDevice, &queueFamilyCount, NULL);
203
204 VkQueueFamilyProperties* queueFamilies = (VkQueueFamilyProperties*)malloc(sizeof(VkQueueFamilyProperties) * queueFamilyCount);
205 if (!queueFamilies) return VK_INCOMPLETE;
206 if (queueFamilies) {
207 vkGetPhysicalDeviceQueueFamilyProperties(vkGPU->physicalDevice, &queueFamilyCount, queueFamilies);
208 uint64_t i = 0;
209 for (; i < queueFamilyCount; i++) {
210 VkQueueFamilyProperties props = queueFamilies[i];
211
212 if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
213 break;
214 }
215 }
216 free(queueFamilies);
217 if (i == queueFamilyCount) {
218 return VK_ERROR_INITIALIZATION_FAILED;
219 }
220 vkGPU->queueFamilyIndex = i;
221 return VK_SUCCESS;
222 }
223 else
224 return VK_INCOMPLETE;
225}
226
227VkResult createDevice(VkGPU* vkGPU, uint64_t sample_id) {
228 //create logical device representation
229 VkResult res = VK_SUCCESS;
230 VkDeviceQueueCreateInfo queueCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO };
231 res = getComputeQueueFamilyIndex(vkGPU);
232 if (res != VK_SUCCESS) return res;
233 queueCreateInfo.queueFamilyIndex = (uint32_t)vkGPU->queueFamilyIndex;
234 queueCreateInfo.queueCount = 1;
235 float queuePriorities = 1.0;
236 queueCreateInfo.pQueuePriorities = &queuePriorities;
237 VkDeviceCreateInfo deviceCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
238 VkPhysicalDeviceFeatures deviceFeatures = {};
239 switch (sample_id) {
240 case 1: case 12: case 17: case 18: case 101: case 201: case 1001: {
241 deviceFeatures.shaderFloat64 = true;
242 deviceCreateInfo.enabledExtensionCount = (uint32_t)vkGPU->enabledDeviceExtensions.size();
243 deviceCreateInfo.ppEnabledExtensionNames = vkGPU->enabledDeviceExtensions.data();
244 deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
245 deviceCreateInfo.queueCreateInfoCount = 1;
246 deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
247 res = vkCreateDevice(vkGPU->physicalDevice, &deviceCreateInfo, NULL, &vkGPU->device);
248 if (res != VK_SUCCESS) return res;
249 vkGetDeviceQueue(vkGPU->device, (uint32_t)vkGPU->queueFamilyIndex, 0, &vkGPU->queue);
250 break;
251 }
252#if (VK_API_VERSION>10)
253 case 2: case 102: {
254 VkPhysicalDeviceFeatures2 deviceFeatures2 = {};
255 VkPhysicalDevice16BitStorageFeatures shaderFloat16 = {};
256 shaderFloat16.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES;
257 shaderFloat16.storageBuffer16BitAccess = true;
258 /*VkPhysicalDeviceShaderFloat16Int8Features shaderFloat16 = {};
259 shaderFloat16.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
260 shaderFloat16.shaderFloat16 = true;
261 shaderFloat16.shaderInt8 = true;*/
262 deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
263 deviceFeatures2.pNext = &shaderFloat16;
264 deviceFeatures2.features = deviceFeatures;
265 vkGetPhysicalDeviceFeatures2(vkGPU->physicalDevice, &deviceFeatures2);
266 deviceCreateInfo.pNext = &deviceFeatures2;
267 vkGPU->enabledDeviceExtensions.push_back("VK_KHR_16bit_storage");
268 deviceCreateInfo.enabledExtensionCount = (uint32_t)vkGPU->enabledDeviceExtensions.size();
269 deviceCreateInfo.ppEnabledExtensionNames = vkGPU->enabledDeviceExtensions.data();
270 deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
271 deviceCreateInfo.queueCreateInfoCount = 1;
272 deviceCreateInfo.pEnabledFeatures = NULL;
273 res = vkCreateDevice(vkGPU->physicalDevice, &deviceCreateInfo, NULL, &vkGPU->device);
274 if (res != VK_SUCCESS) return res;
275 vkGetDeviceQueue(vkGPU->device, (uint32_t)vkGPU->queueFamilyIndex, 0, &vkGPU->queue);
276 break;
277 }
278#endif
279 default: {
280 deviceCreateInfo.enabledExtensionCount = (uint32_t)vkGPU->enabledDeviceExtensions.size();
281 deviceCreateInfo.ppEnabledExtensionNames = vkGPU->enabledDeviceExtensions.data();
282 deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
283 deviceCreateInfo.queueCreateInfoCount = 1;
284 deviceCreateInfo.pEnabledFeatures = NULL;
285 deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
286 res = vkCreateDevice(vkGPU->physicalDevice, &deviceCreateInfo, NULL, &vkGPU->device);
287 if (res != VK_SUCCESS) return res;
288 vkGetDeviceQueue(vkGPU->device, (uint32_t)vkGPU->queueFamilyIndex, 0, &vkGPU->queue);
289 break;
290 }
291 }
292 return res;
293}
294VkResult createFence(VkGPU* vkGPU) {
295 //create fence for synchronization
296 VkResult res = VK_SUCCESS;
297 VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
298 fenceCreateInfo.flags = 0;
299 res = vkCreateFence(vkGPU->device, &fenceCreateInfo, NULL, &vkGPU->fence);
300 return res;
301}
302VkResult createCommandPool(VkGPU* vkGPU) {
303 //create a place, command buffer memory is allocated from
304 VkResult res = VK_SUCCESS;
305 VkCommandPoolCreateInfo commandPoolCreateInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
306 commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
307 commandPoolCreateInfo.queueFamilyIndex = (uint32_t)vkGPU->queueFamilyIndex;
308 res = vkCreateCommandPool(vkGPU->device, &commandPoolCreateInfo, NULL, &vkGPU->commandPool);
309 return res;
310}
311
312VkFFTResult findMemoryType(VkGPU* vkGPU, uint64_t memoryTypeBits, uint64_t memorySize, VkMemoryPropertyFlags properties, uint32_t* memoryTypeIndex) {
313 VkPhysicalDeviceMemoryProperties memoryProperties = { 0 };
314
315 vkGetPhysicalDeviceMemoryProperties(vkGPU->physicalDevice, &memoryProperties);
316
317 for (uint64_t i = 0; i < memoryProperties.memoryTypeCount; ++i) {
318 if ((memoryTypeBits & ((uint64_t)1 << i)) && ((memoryProperties.memoryTypes[i].propertyFlags & properties) == properties) && (memoryProperties.memoryHeaps[memoryProperties.memoryTypes[i].heapIndex].size >= memorySize))
319 {
320 memoryTypeIndex[0] = (uint32_t)i;
321 return VKFFT_SUCCESS;
322 }
323 }
325}
326
327VkFFTResult allocateBuffer(VkGPU* vkGPU, VkBuffer* buffer, VkDeviceMemory* deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size) {
328 //allocate the buffer used by the GPU with specified properties
329 VkFFTResult resFFT = VKFFT_SUCCESS;
330 VkResult res = VK_SUCCESS;
331 uint32_t queueFamilyIndices;
332 VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
333 bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
334 bufferCreateInfo.queueFamilyIndexCount = 1;
335 bufferCreateInfo.pQueueFamilyIndices = &queueFamilyIndices;
336 bufferCreateInfo.size = size;
337 bufferCreateInfo.usage = usageFlags;
338 res = vkCreateBuffer(vkGPU->device, &bufferCreateInfo, NULL, buffer);
339 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_BUFFER;
340 VkMemoryRequirements memoryRequirements = { 0 };
341 vkGetBufferMemoryRequirements(vkGPU->device, buffer[0], &memoryRequirements);
342 VkMemoryAllocateInfo memoryAllocateInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
343 memoryAllocateInfo.allocationSize = memoryRequirements.size;
344 resFFT = findMemoryType(vkGPU, memoryRequirements.memoryTypeBits, memoryRequirements.size, propertyFlags, &memoryAllocateInfo.memoryTypeIndex);
345 if (resFFT != VKFFT_SUCCESS) return resFFT;
346 res = vkAllocateMemory(vkGPU->device, &memoryAllocateInfo, NULL, deviceMemory);
347 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE_MEMORY;
348 res = vkBindBufferMemory(vkGPU->device, buffer[0], deviceMemory[0], 0);
349 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY;
350 return resFFT;
351}
352VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_t bufferSize) {
353 //a function that transfers data from the CPU to the GPU using staging buffer, because the GPU memory is not host-coherent
354 VkFFTResult resFFT = VKFFT_SUCCESS;
355 VkResult res = VK_SUCCESS;
356 uint64_t stagingBufferSize = bufferSize;
357 VkBuffer stagingBuffer = { 0 };
358 VkDeviceMemory stagingBufferMemory = { 0 };
359 resFFT = allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
360 if (resFFT != VKFFT_SUCCESS) return resFFT;
361 void* data;
362 res = vkMapMemory(vkGPU->device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
363 if (resFFT != VKFFT_SUCCESS) return resFFT;
364 memcpy(data, arr, stagingBufferSize);
365 vkUnmapMemory(vkGPU->device, stagingBufferMemory);
366 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
367 commandBufferAllocateInfo.commandPool = vkGPU->commandPool;
368 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
369 commandBufferAllocateInfo.commandBufferCount = 1;
370 VkCommandBuffer commandBuffer = { 0 };
371 res = vkAllocateCommandBuffers(vkGPU->device, &commandBufferAllocateInfo, &commandBuffer);
372 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
373 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
374 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
375 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
376 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
377 VkBufferCopy copyRegion = { 0 };
378 copyRegion.srcOffset = 0;
379 copyRegion.dstOffset = 0;
380 copyRegion.size = stagingBufferSize;
381 vkCmdCopyBuffer(commandBuffer, stagingBuffer, buffer[0], 1, &copyRegion);
382 res = vkEndCommandBuffer(commandBuffer);
383 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
384 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
385 submitInfo.commandBufferCount = 1;
386 submitInfo.pCommandBuffers = &commandBuffer;
387 res = vkQueueSubmit(vkGPU->queue, 1, &submitInfo, vkGPU->fence);
388 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
389 res = vkWaitForFences(vkGPU->device, 1, &vkGPU->fence, VK_TRUE, 100000000000);
390 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
391 res = vkResetFences(vkGPU->device, 1, &vkGPU->fence);
392 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
393 vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &commandBuffer);
394 vkDestroyBuffer(vkGPU->device, stagingBuffer, NULL);
395 vkFreeMemory(vkGPU->device, stagingBufferMemory, NULL);
396 return resFFT;
397}
398VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_t bufferSize) {
399 //a function that transfers data from the GPU to the CPU using staging buffer, because the GPU memory is not host-coherent
400 VkFFTResult resFFT = VKFFT_SUCCESS;
401 VkResult res = VK_SUCCESS;
402 uint64_t stagingBufferSize = bufferSize;
403 VkBuffer stagingBuffer = { 0 };
404 VkDeviceMemory stagingBufferMemory = { 0 };
405 resFFT = allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
406 if (resFFT != VKFFT_SUCCESS) return resFFT;
407 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
408 commandBufferAllocateInfo.commandPool = vkGPU->commandPool;
409 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
410 commandBufferAllocateInfo.commandBufferCount = 1;
411 VkCommandBuffer commandBuffer = { 0 };
412 res = vkAllocateCommandBuffers(vkGPU->device, &commandBufferAllocateInfo, &commandBuffer);
413 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
414 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
415 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
416 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
417 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
418 VkBufferCopy copyRegion = { 0 };
419 copyRegion.srcOffset = 0;
420 copyRegion.dstOffset = 0;
421 copyRegion.size = stagingBufferSize;
422 vkCmdCopyBuffer(commandBuffer, buffer[0], stagingBuffer, 1, &copyRegion);
423 res = vkEndCommandBuffer(commandBuffer);
424 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
425 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
426 submitInfo.commandBufferCount = 1;
427 submitInfo.pCommandBuffers = &commandBuffer;
428 res = vkQueueSubmit(vkGPU->queue, 1, &submitInfo, vkGPU->fence);
429 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
430 res = vkWaitForFences(vkGPU->device, 1, &vkGPU->fence, VK_TRUE, 100000000000);
431 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
432 res = vkResetFences(vkGPU->device, 1, &vkGPU->fence);
433 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
434 vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &commandBuffer);
435 void* data;
436 res = vkMapMemory(vkGPU->device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
437 if (resFFT != VKFFT_SUCCESS) return resFFT;
438 memcpy(arr, data, stagingBufferSize);
439 vkUnmapMemory(vkGPU->device, stagingBufferMemory);
440 vkDestroyBuffer(vkGPU->device, stagingBuffer, NULL);
441 vkFreeMemory(vkGPU->device, stagingBufferMemory, NULL);
442 return resFFT;
443}
444#endif
446 //this function creates an instance and prints the list of available devices
447#if(VKFFT_BACKEND==0)
448 VkResult res = VK_SUCCESS;
449 VkInstance local_instance = { 0 };
450 VkInstanceCreateInfo createInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO };
451 createInfo.flags = 0;
452 createInfo.pApplicationInfo = NULL;
453 VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT };
454 createInfo.enabledLayerCount = 0;
455 createInfo.enabledExtensionCount = 0;
456 createInfo.pNext = NULL;
457 res = vkCreateInstance(&createInfo, NULL, &local_instance);
458 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE;
459
460 uint32_t deviceCount;
461 res = vkEnumeratePhysicalDevices(local_instance, &deviceCount, NULL);
462 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES;
463
464 VkPhysicalDevice* devices = (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * deviceCount);
465 if (!devices) return VKFFT_ERROR_MALLOC_FAILED;
466 if (devices) {
467 res = vkEnumeratePhysicalDevices(local_instance, &deviceCount, devices);
468 if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES;
469 for (uint64_t i = 0; i < deviceCount; i++) {
470 VkPhysicalDeviceProperties device_properties;
471 vkGetPhysicalDeviceProperties(devices[i], &device_properties);
472 printf("Device id: %" PRIu64 " name: %s API:%d.%d.%d\n", i, device_properties.deviceName, (device_properties.apiVersion >> 22), ((device_properties.apiVersion >> 12) & 0x3ff), (device_properties.apiVersion & 0xfff));
473 }
474 free(devices);
475 }
476 else
478 vkDestroyInstance(local_instance, NULL);
479#elif(VKFFT_BACKEND==1)
480 CUresult res = CUDA_SUCCESS;
481 res = cuInit(0);
482 if (res != CUDA_SUCCESS) return VKFFT_ERROR_FAILED_TO_INITIALIZE;
483 int numDevices;
484 res = cuDeviceGetCount(&numDevices);
485 if (res != CUDA_SUCCESS) return VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID;
486 for (uint64_t i = 0; i < numDevices; i++) {
487 char deviceName[256];
488 CUdevice device = {};
489 res = cuDeviceGet(&device, (int)i);
490 if (res != CUDA_SUCCESS) return VKFFT_ERROR_FAILED_TO_GET_DEVICE;
491 res = cuDeviceGetName(deviceName, 256, device);
492 if (res != CUDA_SUCCESS) return VKFFT_ERROR_FAILED_TO_GET_DEVICE;
493 printf("Device id: %" PRIu64 " name: %s\n", i, deviceName);
494 }
495#elif(VKFFT_BACKEND==2)
496 hipError_t res = hipSuccess;
497 res = hipInit(0);
498 if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_INITIALIZE;
499 int numDevices;
500 res = hipGetDeviceCount(&numDevices);
501 if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID;
502 for (uint64_t i = 0; i < numDevices; i++) {
503 char deviceName[256];
504 hipDevice_t device = {};
505 res = hipDeviceGet(&device, i);
506 if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_GET_DEVICE;
507 res = hipDeviceGetName(deviceName, 256, device);
508 if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_GET_DEVICE;
509 printf("Device id: %" PRIu64 " name: %s\n", i, deviceName);
510 }
511#elif(VKFFT_BACKEND==3)
512 cl_int res = CL_SUCCESS;
513 cl_uint numPlatforms;
514 res = clGetPlatformIDs(0, 0, &numPlatforms);
515 if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_INITIALIZE;
516 cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
517 if (!platforms) return VKFFT_ERROR_MALLOC_FAILED;
518 res = clGetPlatformIDs(numPlatforms, platforms, 0);
519 if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_INITIALIZE;
520 uint64_t k = 0;
521 for (uint64_t j = 0; j < numPlatforms; j++) {
522 cl_uint numDevices;
523 res = clGetDeviceIDs(platforms[j], CL_DEVICE_TYPE_ALL, 0, 0, &numDevices);
524 cl_device_id* deviceList = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices);
525 if (!deviceList) return VKFFT_ERROR_MALLOC_FAILED;
526 res = clGetDeviceIDs(platforms[j], CL_DEVICE_TYPE_ALL, numDevices, deviceList, 0);
527 if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_GET_DEVICE;
528 for (uint64_t i = 0; i < numDevices; i++) {
529 char deviceName[256];
530 char apiVersion[256];
531 res = clGetDeviceInfo(deviceList[i], CL_DEVICE_NAME, 256 * sizeof(char), deviceName, 0);
532 if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES;
533 res = clGetDeviceInfo(deviceList[i], CL_DEVICE_VERSION, 256 * sizeof(char), apiVersion, 0);
534 if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES;
535 printf("Platform id: %" PRIu64 " Device id: %" PRIu64 " name: %s API:%s\n", j, k, deviceName, apiVersion);
536 k++;
537 }
538 free(deviceList);
539 }
540 free(platforms);
541#endif
542 return VKFFT_SUCCESS;
543}
544VkFFTResult performVulkanFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchParams* launchParams, int inverse, uint64_t num_iter) {
545 VkFFTResult resFFT = VKFFT_SUCCESS;
546#if(VKFFT_BACKEND==0)
547 VkResult res = VK_SUCCESS;
548 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
549 commandBufferAllocateInfo.commandPool = vkGPU->commandPool;
550 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
551 commandBufferAllocateInfo.commandBufferCount = 1;
552 VkCommandBuffer commandBuffer = {};
553 res = vkAllocateCommandBuffers(vkGPU->device, &commandBufferAllocateInfo, &commandBuffer);
555 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
556 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
557 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
559 launchParams->commandBuffer = &commandBuffer;
560 //Record commands num_iter times. Allows to perform multiple convolutions/transforms in one submit.
561 for (uint64_t i = 0; i < num_iter; i++) {
562 resFFT = VkFFTAppend(app, inverse, launchParams);
563 if (resFFT != VKFFT_SUCCESS) return resFFT;
564 }
565 res = vkEndCommandBuffer(commandBuffer);
566 if (res != 0) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
567 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
568 submitInfo.commandBufferCount = 1;
569 submitInfo.pCommandBuffers = &commandBuffer;
570 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
571 res = vkQueueSubmit(vkGPU->queue, 1, &submitInfo, vkGPU->fence);
572 if (res != 0) return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
573 res = vkWaitForFences(vkGPU->device, 1, &vkGPU->fence, VK_TRUE, 100000000000);
574 if (res != 0) return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
575 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
576 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
577 //printf("Pure submit execution time per num_iter: %.3f ms\n", totTime / num_iter);
578 res = vkResetFences(vkGPU->device, 1, &vkGPU->fence);
579 if (res != 0) return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
580 vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &commandBuffer);
581#elif(VKFFT_BACKEND==1)
582 cudaError_t res = cudaSuccess;
583 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
584 for (uint64_t i = 0; i < num_iter; i++) {
585 resFFT = VkFFTAppend(app, inverse, launchParams);
586 if (resFFT != VKFFT_SUCCESS) return resFFT;
587 }
588 res = cudaDeviceSynchronize();
589 if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
590 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
591 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
592#elif(VKFFT_BACKEND==2)
593 hipError_t res = hipSuccess;
594 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
595 for (uint64_t i = 0; i < num_iter; i++) {
596 resFFT = VkFFTAppend(app, inverse, launchParams);
597 if (resFFT != VKFFT_SUCCESS) return resFFT;
598 }
599 res = hipDeviceSynchronize();
600 if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
601 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
602 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
603#elif(VKFFT_BACKEND==3)
604 cl_int res = CL_SUCCESS;
605 launchParams->commandQueue = &vkGPU->commandQueue;
606 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
607 for (uint64_t i = 0; i < num_iter; i++) {
608 resFFT = VkFFTAppend(app, inverse, launchParams);
609 if (resFFT != VKFFT_SUCCESS) return resFFT;
610 }
611 res = clFinish(vkGPU->commandQueue);
612 if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
613 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
614 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
615#endif
616 return resFFT;
617}
618VkFFTResult performVulkanFFTiFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchParams* launchParams, uint64_t num_iter, double* time_result) {
619 VkFFTResult resFFT = VKFFT_SUCCESS;
620#if(VKFFT_BACKEND==0)
621 VkResult res = VK_SUCCESS;
622 VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
623 commandBufferAllocateInfo.commandPool = vkGPU->commandPool;
624 commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
625 commandBufferAllocateInfo.commandBufferCount = 1;
626 VkCommandBuffer commandBuffer = {};
627 res = vkAllocateCommandBuffers(vkGPU->device, &commandBufferAllocateInfo, &commandBuffer);
629 VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
630 commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
631 res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
633 launchParams->commandBuffer = &commandBuffer;
634 for (uint64_t i = 0; i < num_iter; i++) {
635 resFFT = VkFFTAppend(app, -1, launchParams);
636 if (resFFT != VKFFT_SUCCESS) return resFFT;
637 resFFT = VkFFTAppend(app, 1, launchParams);
638 if (resFFT != VKFFT_SUCCESS) return resFFT;
639 }
640 res = vkEndCommandBuffer(commandBuffer);
641 if (res != 0) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
642 VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
643 submitInfo.commandBufferCount = 1;
644 submitInfo.pCommandBuffers = &commandBuffer;
645 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
646 res = vkQueueSubmit(vkGPU->queue, 1, &submitInfo, vkGPU->fence);
647 if (res != 0) return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
648 res = vkWaitForFences(vkGPU->device, 1, &vkGPU->fence, VK_TRUE, 100000000000);
649 if (res != 0) return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
650 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
651 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
652 time_result[0] = totTime / num_iter;
653 res = vkResetFences(vkGPU->device, 1, &vkGPU->fence);
654 if (res != 0) return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
655 vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &commandBuffer);
656#elif(VKFFT_BACKEND==1)
657 cudaError_t res = cudaSuccess;
658 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
659 for (uint64_t i = 0; i < num_iter; i++) {
660 resFFT = VkFFTAppend(app, -1, launchParams);
661 if (resFFT != VKFFT_SUCCESS) return resFFT;
662 resFFT = VkFFTAppend(app, 1, launchParams);
663 if (resFFT != VKFFT_SUCCESS) return resFFT;
664 }
665 res = cudaDeviceSynchronize();
666 if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
667 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
668 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
669 time_result[0] = totTime / num_iter;
670#elif(VKFFT_BACKEND==2)
671 hipError_t res = hipSuccess;
672 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
673 for (uint64_t i = 0; i < num_iter; i++) {
674 resFFT = VkFFTAppend(app, -1, launchParams);
675 if (resFFT != VKFFT_SUCCESS) return resFFT;
676 resFFT = VkFFTAppend(app, 1, launchParams);
677 if (resFFT != VKFFT_SUCCESS) return resFFT;
678 }
679 res = hipDeviceSynchronize();
680 if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
681 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
682 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
683 time_result[0] = totTime / num_iter;
684#elif(VKFFT_BACKEND==3)
685 cl_int res = CL_SUCCESS;
686 launchParams->commandQueue = &vkGPU->commandQueue;
687 std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
688 for (uint64_t i = 0; i < num_iter; i++) {
689 resFFT = VkFFTAppend(app, -1, launchParams);
690 if (resFFT != VKFFT_SUCCESS) return resFFT;
691 resFFT = VkFFTAppend(app, 1, launchParams);
692 if (resFFT != VKFFT_SUCCESS) return resFFT;
693 }
694 res = clFinish(vkGPU->commandQueue);
695 if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
696 std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
697 double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
698 time_result[0] = totTime / num_iter;
699#endif
700 return resFFT;
701}
static int VkFFTGetVersion()
static VkFFTResult VkFFTAppend(VkFFTApplication *app, int inverse, VkFFTLaunchParams *launchParams)
VkFFTResult
Definition VkFFT_Defs.h:232
@ VKFFT_ERROR_FAILED_TO_ALLOCATE_MEMORY
Definition VkFFT_Defs.h:289
@ VKFFT_ERROR_FAILED_TO_RESET_FENCES
Definition VkFFT_Defs.h:272
@ VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER
Definition VkFFT_Defs.h:268
@ VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER
Definition VkFFT_Defs.h:269
@ VKFFT_SUCCESS
Definition VkFFT_Defs.h:233
@ VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES
Definition VkFFT_Defs.h:314
@ VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY
Definition VkFFT_Defs.h:290
@ VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES
Definition VkFFT_Defs.h:271
@ VKFFT_ERROR_FAILED_TO_CREATE_BUFFER
Definition VkFFT_Defs.h:288
@ VKFFT_ERROR_FAILED_TO_INITIALIZE
Definition VkFFT_Defs.h:306
@ VKFFT_ERROR_FAILED_TO_GET_DEVICE
Definition VkFFT_Defs.h:308
@ VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE
Definition VkFFT_Defs.h:282
@ VKFFT_ERROR_FAILED_TO_SYNCHRONIZE
Definition VkFFT_Defs.h:292
@ VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE
Definition VkFFT_Defs.h:270
@ VKFFT_ERROR_FAILED_TO_FIND_MEMORY
Definition VkFFT_Defs.h:291
@ VKFFT_ERROR_MALLOC_FAILED
Definition VkFFT_Defs.h:234
@ VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS
Definition VkFFT_Defs.h:267
@ VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID
Definition VkFFT_Defs.h:307
VkResult createFence(VkGPU *vkGPU)
VkFFTResult transferDataToCPU(VkGPU *vkGPU, void *arr, VkBuffer *buffer, uint64_t bufferSize)
VkResult CreateDebugUtilsMessengerEXT(VkGPU *vkGPU, const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDebugUtilsMessengerEXT *pDebugMessenger)
VkFFTResult findMemoryType(VkGPU *vkGPU, uint64_t memoryTypeBits, uint64_t memorySize, VkMemoryPropertyFlags properties, uint32_t *memoryTypeIndex)
VkFFTResult performVulkanFFTiFFT(VkGPU *vkGPU, VkFFTApplication *app, VkFFTLaunchParams *launchParams, uint64_t num_iter, double *time_result)
VkResult setupDebugMessenger(VkGPU *vkGPU)
VkResult createCommandPool(VkGPU *vkGPU)
VkFFTResult transferDataFromCPU(VkGPU *vkGPU, void *arr, VkBuffer *buffer, uint64_t bufferSize)
VkResult checkValidationLayerSupport()
VkResult createInstance(VkGPU *vkGPU, uint64_t sample_id)
VkFFTResult devices_list()
VkResult createDevice(VkGPU *vkGPU, uint64_t sample_id)
VkResult getComputeQueueFamilyIndex(VkGPU *vkGPU)
void DestroyDebugUtilsMessengerEXT(VkGPU *vkGPU, const VkAllocationCallbacks *pAllocator)
static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData)
VkFFTResult performVulkanFFT(VkGPU *vkGPU, VkFFTApplication *app, VkFFTLaunchParams *launchParams, int inverse, uint64_t num_iter)
VkFFTResult allocateBuffer(VkGPU *vkGPU, VkBuffer *buffer, VkDeviceMemory *deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size)
std::vector< const char * > getRequiredExtensions(VkGPU *vkGPU, uint64_t sample_id)
VkResult findPhysicalDevice(VkGPU *vkGPU)
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueCount, NULL)
VkCommandBuffer * commandBuffer
Definition VkFFT_Defs.h:203
uint64_t enableValidationLayers
Definition VkFFT_Defs.h:17
VkCommandPool commandPool
Definition VkFFT_Defs.h:14
uint64_t device_id
Definition VkFFT_Defs.h:30
uint64_t queueFamilyIndex
Definition VkFFT_Defs.h:12
VkPhysicalDevice physicalDevice
Definition VkFFT_Defs.h:7
VkDebugUtilsMessengerEXT debugMessenger
Definition VkFFT_Defs.h:11
VkDevice device
Definition VkFFT_Defs.h:10
VkFence fence
Definition VkFFT_Defs.h:15
VkInstance instance
Definition VkFFT_Defs.h:6
VkQueue queue
Definition VkFFT_Defs.h:13
std::vector< const char * > enabledDeviceExtensions
Definition VkFFT_Defs.h:16