PeriDyno 1.0.0
Loading...
Searching...
No Matches
VkFFT_Defs.h
Go to the documentation of this file.
1#ifndef VKFFT_DEFS_H
2#define VKFFT_DEFS_H
3#include <vulkan/vulkan.h>
4typedef struct {
5#if(VKFFT_BACKEND==0)
6 VkInstance instance;//a connection between the application and the Vulkan library
7 VkPhysicalDevice physicalDevice;//a handle for the graphics card used in the application
8 VkPhysicalDeviceProperties physicalDeviceProperties;//bastic device properties
9 VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties;//bastic memory properties of the device
10 VkDevice device;//a logical device, interacting with physical device
11 VkDebugUtilsMessengerEXT debugMessenger;//extension for debugging
12 uint64_t queueFamilyIndex;//if multiple queues are available, specify the used one
13 VkQueue queue;//a place, where all operations are submitted
14 VkCommandPool commandPool;//an opaque objects that command buffer memory is allocated from
15 VkFence fence;//a vkGPU->fence used to synchronize dispatches
16 std::vector<const char*> enabledDeviceExtensions;
18#elif(VKFFT_BACKEND==1)
19 CUdevice device;
20 CUcontext context;
21#elif(VKFFT_BACKEND==2)
22 hipDevice_t device;
23 hipCtx_t context;
24#elif(VKFFT_BACKEND==3)
25 cl_platform_id platform;
26 cl_device_id device;
27 cl_context context;
28 cl_command_queue commandQueue;
29#endif
30 uint64_t device_id;//an id of a device, reported by Vulkan device list
31} VkGPU;//an example structure containing Vulkan primitives
32
33typedef struct {
34 //WHDCN layout
35
36 //required parameters:
37 uint64_t FFTdim; //FFT dimensionality (1, 2 or 3)
38 uint64_t size[3]; // WHD -system dimensions
39
40#if(VKFFT_BACKEND==0)
41 VkPhysicalDevice* physicalDevice;//pointer to Vulkan physical device, obtained from vkEnumeratePhysicalDevices
42 VkDevice* device;//pointer to Vulkan device, created with vkCreateDevice
43 VkQueue* queue;//pointer to Vulkan queue, created with vkGetDeviceQueue
44 VkCommandPool* commandPool;//pointer to Vulkan command pool, created with vkCreateCommandPool
45 VkFence* fence;//pointer to Vulkan fence, created with vkCreateFence
46 uint64_t isCompilerInitialized;//specify if glslang compiler has been intialized before (0 - off, 1 - on). Default 0
47#elif(VKFFT_BACKEND==1)
48 CUdevice* device;//pointer to CUDA device, obtained from cuDeviceGet
49 //CUcontext* context;//pointer to CUDA context, obtained from cuDeviceGet
50 cudaStream_t* stream;//pointer to streams (can be more than 1), where to execute the kernels
51 uint64_t num_streams;//try to submit CUDA kernels in multiple streams for asynchronous execution. Default 1
52#elif(VKFFT_BACKEND==2)
53 hipDevice_t* device;//pointer to HIP device, obtained from hipDeviceGet
54 //hipCtx_t* context;//pointer to HIP context, obtained from hipDeviceGet
55 hipStream_t* stream;//pointer to streams (can be more than 1), where to execute the kernels
56 uint64_t num_streams;//try to submit HIP kernels in multiple streams for asynchronous execution. Default 1
57#elif(VKFFT_BACKEND==3)
58 cl_platform_id* platform;
59 cl_device_id* device;
60 cl_context* context;
61#endif
62
63 //data parameters:
64 uint64_t userTempBuffer; //buffer allocated by app automatically if needed to reorder Four step algorithm. Setting to non zero value enables manual user allocation (0 - off, 1 - on)
65
66 uint64_t bufferNum;//multiple buffer sequence storage is Vulkan only. Default 1
67 uint64_t tempBufferNum;//multiple buffer sequence storage is Vulkan only. Default 1, buffer allocated by app automatically if needed to reorder Four step algorithm. Setting to non zero value enables manual user allocation
68 uint64_t inputBufferNum;//multiple buffer sequence storage is Vulkan only. Default 1, if isInputFormatted is enabled
69 uint64_t outputBufferNum;//multiple buffer sequence storage is Vulkan only. Default 1, if isOutputFormatted is enabled
70 uint64_t kernelNum;//multiple buffer sequence storage is Vulkan only. Default 1, if performConvolution is enabled
71
72 //sizes are obligatory in Vulkan backend, optional in others
73 uint64_t* bufferSize;//array of buffers sizes in bytes
74 uint64_t* tempBufferSize;//array of temp buffers sizes in bytes. Default set to bufferSize sum, buffer allocated by app automatically if needed to reorder Four step algorithm. Setting to non zero value enables manual user allocation
75 uint64_t* inputBufferSize;//array of input buffers sizes in bytes, if isInputFormatted is enabled
76 uint64_t* outputBufferSize;//array of output buffers sizes in bytes, if isOutputFormatted is enabled
77 uint64_t* kernelSize;//array of kernel buffers sizes in bytes, if performConvolution is enabled
78
79#if(VKFFT_BACKEND==0)
80 VkBuffer* buffer;//pointer to array of buffers (or one buffer) used for computations
81 VkBuffer* tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same sum size or bigger as buffer (can be split in multiple). Default 0. Setting to non zero value enables manual user allocation
82 VkBuffer* inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled
83 VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled
84 VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled
85#elif(VKFFT_BACKEND==1)
86 void** buffer;//pointer to device buffer used for computations
87 void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
88 void** inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
89 void** outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
90 void** kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
91#elif(VKFFT_BACKEND==2)
92 void** buffer;//pointer to device buffer used for computations
93 void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
94 void** inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
95 void** outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
96 void** kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
97#elif(VKFFT_BACKEND==3)
98 cl_mem* buffer;//pointer to device buffer used for computations
99 cl_mem* tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
100 cl_mem* inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
101 cl_mem* outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
102 cl_mem* kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
103#endif
104 uint64_t bufferOffset;//specify if VkFFT has to offset the first element position inside the buffer. In bytes. Default 0
105 uint64_t tempBufferOffset;//specify if VkFFT has to offset the first element position inside the temp buffer. In bytes. Default 0
106 uint64_t inputBufferOffset;//specify if VkFFT has to offset the first element position inside the input buffer. In bytes. Default 0
107 uint64_t outputBufferOffset;//specify if VkFFT has to offset the first element position inside the output buffer. In bytes. Default 0
108 uint64_t kernelOffset;//specify if VkFFT has to offset the first element position inside the kernel. In bytes. Default 0
109
110 //optional: (default 0 if not stated otherwise)
111 uint64_t coalescedMemory;//in bytes, for Nvidia and AMD is equal to 32, Intel is equal 64, scaled for half precision. Gonna work regardles, but if specified by user correctly, the performance will be higher.
112 uint64_t aimThreads;//aim at this many threads per block. Default 128
113 uint64_t numSharedBanks;//how many banks shared memory has. Default 32
114 uint64_t inverseReturnToInputBuffer;//return data to the input buffer in inverse transform (0 - off, 1 - on). isInputFormatted must be enabled
115 uint64_t numberBatches;// N - used to perform multiple batches of initial data. Default 1
116 uint64_t useUint64;//use 64-bit addressing mode in generated kernels
117 uint64_t omitDimension[3];//disable FFT for this dimension (0 - FFT enabled, 1 - FFT disabled). Default 0. Doesn't work for R2C dimension 0 for now. Doesn't work with convolutions.
118 uint64_t fixMaxRadixBluestein;//controls the padding of sequences in Bluestein convolution. If specified, padded sequence will be made of up to fixMaxRadixBluestein primes. Default: 2 for CUDA and Vulkan/OpenCL/HIP up to 1048576 combined dimension FFT system, 7 for Vulkan/OpenCL/HIP past after. Min = 2, Max = 13.
119 uint64_t performBandwidthBoost;//try to reduce coalsesced number by a factor of X to get bigger sequence in one upload for strided axes. Default: -1 for DCT, 2 for Bluestein's algorithm (or -1 if DCT), 0 otherwise
120
121 uint64_t doublePrecision; //perform calculations in double precision (0 - off, 1 - on).
122 uint64_t halfPrecision; //perform calculations in half precision (0 - off, 1 - on)
123 uint64_t halfPrecisionMemoryOnly; //use half precision only as input/output buffer. Input/Output have to be allocated as half, buffer/tempBuffer have to be allocated as float (out of place mode only). Specify isInputFormatted and isOutputFormatted to use (0 - off, 1 - on)
124 uint64_t doublePrecisionFloatMemory; //use FP64 precision for all calculations, while all memory storage is done in FP32.
125
126 uint64_t performR2C; //perform R2C/C2R decomposition (0 - off, 1 - on)
127 uint64_t performDCT; //perform DCT transformation (X - DCT type, 1-4)
128 uint64_t disableMergeSequencesR2C; //disable merging of two real sequences to reduce calculations (0 - off, 1 - on)
129 uint64_t normalize; //normalize inverse transform (0 - off, 1 - on)
130 uint64_t disableReorderFourStep; // disables unshuffling of Four step algorithm. Requires tempbuffer allocation (0 - off, 1 - on)
131 uint64_t useLUT; //switches from calculating sincos to using precomputed LUT tables (0 - off, 1 - on). Configured by initialization routine
132 uint64_t makeForwardPlanOnly; //generate code only for forward FFT (0 - off, 1 - on)
133 uint64_t makeInversePlanOnly; //generate code only for inverse FFT (0 - off, 1 - on)
134
135 uint64_t bufferStride[3];//buffer strides - default set to x - x*y - x*y*z values
136 uint64_t isInputFormatted; //specify if input buffer is padded - 0 - padded, 1 - not padded. For example if it is not padded for R2C if out-of-place mode is selected (only if numberBatches==1 and numberKernels==1)
137 uint64_t isOutputFormatted; //specify if output buffer is padded - 0 - padded, 1 - not padded. For example if it is not padded for R2C if out-of-place mode is selected (only if numberBatches==1 and numberKernels==1)
138 uint64_t inputBufferStride[3];//input buffer strides. Used if isInputFormatted is enabled. Default set to bufferStride values
139 uint64_t outputBufferStride[3];//output buffer strides. Used if isInputFormatted is enabled. Default set to bufferStride values
140
141 uint64_t considerAllAxesStrided;//will create plan for nonstrided axis similar as a strided axis - used with disableReorderFourStep to get the same layout for Bluestein kernel (0 - off, 1 - on)
142 uint64_t keepShaderCode;//will keep shader code and print all executed shaders during the plan execution in order (0 - off, 1 - on)
143 uint64_t printMemoryLayout;//will print order of buffers used in shaders (0 - off, 1 - on)
144
145 //optional zero padding control parameters: (default 0 if not stated otherwise)
146 uint64_t performZeropadding[3]; // don't read some data/perform computations if some input sequences are zeropadded for each axis (0 - off, 1 - on)
147 uint64_t fft_zeropad_left[3];//specify start boundary of zero block in the system for each axis
148 uint64_t fft_zeropad_right[3];//specify end boundary of zero block in the system for each axis
149 uint64_t frequencyZeroPadding; //set to 1 if zeropadding of frequency domain, default 0 - spatial zeropadding
150
151 //optional convolution control parameters: (default 0 if not stated otherwise)
152 uint64_t performConvolution; //perform convolution in this application (0 - off, 1 - on). Disables reorderFourStep parameter
153 uint64_t conjugateConvolution;//0 off, 1 - conjugation of the sequence FFT is currently done on, 2 - conjugation of the convolution kernel
154 uint64_t crossPowerSpectrumNormalization;//normalize the FFT x kernel multiplication in frequency domain
155 uint64_t coordinateFeatures; // C - coordinate, or dimension of features vector. In matrix convolution - size of vector
156 uint64_t matrixConvolution; //if equal to 2 perform 2x2, if equal to 3 perform 3x3 matrix-vector convolution. Overrides coordinateFeatures
157 uint64_t symmetricKernel; //specify if kernel in 2x2 or 3x3 matrix convolution is symmetric
158 uint64_t numberKernels;// N - only used in convolution step - specify how many kernels were initialized before. Expands one input to multiple (batched) output
159 uint64_t kernelConvolution;// specify if this application is used to create kernel for convolution, so it has the same properties. performConvolution has to be set to 0 for kernel creation
160
161 //register overutilization (experimental): (default 0 if not stated otherwise)
162 uint64_t registerBoost; //specify if register file size is bigger than shared memory and can be used to extend it X times (on Nvidia 256KB register file can be used instead of 32KB of shared memory, set this constant to 4 to emulate 128KB of shared memory). Default 1
163 uint64_t registerBoostNonPow2; //specify if register overutilization should be used on non power of 2 sequences (0 - off, 1 - on)
164 uint64_t registerBoost4Step; //specify if register file overutilization should be used in big sequences (>2^14), same definition as registerBoost. Default 1
165
166 //not used techniques:
167 uint64_t swapTo3Stage4Step; //specify at which power of 2 to switch from 2 upload to 3 upload 4-step FFT, in case if making max sequence size lower than coalesced sequence helps to combat TLB misses. Default 0 - disabled. Must be at least 17
168 uint64_t devicePageSize;//in KB, the size of a page on the GPU. Setting to 0 disables local buffer split in pages
169 uint64_t localPageSize;//in KB, the size to split page into if sequence spans multiple devicePageSize pages
170
171 //automatically filled based on device info (still can be reconfigured by user):
172 uint64_t maxComputeWorkGroupCount[3]; // maxComputeWorkGroupCount from VkPhysicalDeviceLimits
173 uint64_t maxComputeWorkGroupSize[3]; // maxComputeWorkGroupCount from VkPhysicalDeviceLimits
174 uint64_t maxThreadsNum; //max number of threads from VkPhysicalDeviceLimits
175 uint64_t sharedMemorySizeStatic; //available for static allocation shared memory size, in bytes
176 uint64_t sharedMemorySize; //available for allocation shared memory size, in bytes
177 uint64_t sharedMemorySizePow2; //power of 2 which is less or equal to sharedMemorySize, in bytes
178 uint64_t warpSize; //number of threads per warp/wavefront.
179 uint64_t halfThreads;//Intel fix
180 uint64_t allocateTempBuffer; //buffer allocated by app automatically if needed to reorder Four step algorithm. Parameter to check if it has been allocated
181 uint64_t reorderFourStep; // unshuffle Four step algorithm. Requires tempbuffer allocation (0 - off, 1 - on). Default 1.
182 int64_t maxCodeLength; //specify how big can be buffer used for code generation (in char). Default 1000000 chars.
183 int64_t maxTempLength; //specify how big can be buffer used for intermediate string sprintfs be (in char). Default 5000 chars. If code segfaults for some reason - try increasing this number.
184#if(VKFFT_BACKEND==0)
185 VkDeviceMemory tempBufferDeviceMemory;//Filled at app creation
186 VkCommandBuffer* commandBuffer;//Filled at app execution
187 VkMemoryBarrier* memory_barrier;//Filled at app creation
188#elif(VKFFT_BACKEND==1)
189 cudaEvent_t* stream_event;//Filled at app creation
190 uint64_t streamCounter;//Filled at app creation
191 uint64_t streamID;//Filled at app creation
192#elif(VKFFT_BACKEND==2)
193 hipEvent_t* stream_event;//Filled at app creation
194 uint64_t streamCounter;//Filled at app creation
195 uint64_t streamID;//Filled at app creation
196#elif(VKFFT_BACKEND==3)
197 cl_command_queue* commandQueue;
198#endif
199} VkFFTConfiguration;//parameters specified at plan creation
200
201typedef struct {
202#if(VKFFT_BACKEND==0)
203 VkCommandBuffer* commandBuffer;//commandBuffer to which FFT is appended
204
205 VkBuffer* buffer;//pointer to array of buffers (or one buffer) used for computations
206 VkBuffer* tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same sum size or bigger as buffer (can be split in multiple). Default 0. Setting to non zero value enables manual user allocation
207 VkBuffer* inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled
208 VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled
209 VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled
210#elif(VKFFT_BACKEND==1)
211 void** buffer;//pointer to device buffer used for computations
212 void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
213 void** inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
214 void** outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
215 void** kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
216#elif(VKFFT_BACKEND==2)
217 void** buffer;//pointer to device buffer used for computations
218 void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
219 void** inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
220 void** outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
221 void** kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
222#elif(VKFFT_BACKEND==3)
223 cl_command_queue* commandQueue;//commandBuffer to which FFT is appended
224
225 cl_mem* buffer;//pointer to device buffer used for computations
226 cl_mem* tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
227 cl_mem* inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
228 cl_mem* outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
229 cl_mem* kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
230#endif
231} VkFFTLaunchParams;//parameters specified at plan execution
232typedef enum VkFFTResult {
318typedef struct {
319 uint64_t size[3];
320 uint64_t localSize[3];
322 uint64_t fftDim;
323 uint64_t inverse;
326 uint64_t zeropad[2];
327 uint64_t zeropadBluestein[2];
328 uint64_t axis_id;
336 uint64_t LUT;
342 uint64_t startDCT3LUT;
343 uint64_t startDCT4LUT;
344 uint64_t performR2C;
346 uint64_t performDCT;
349 uint64_t performZeropaddingFull[3]; // don't do read/write if full sequence is omitted
350 uint64_t performZeropaddingInput[3]; // don't read if input is zeropadded (0 - off, 1 - on)
351 uint64_t performZeropaddingOutput[3]; // don't write if output is zeropadded (0 - off, 1 - on)
362 uint64_t inputStride[5];
363 uint64_t outputStride[5];
364 uint64_t fft_dim_full;
367 uint64_t fft_dim_x;
369 uint64_t numStages;
370 uint64_t stageRadix[20];
371 uint64_t inputOffset;
372 uint64_t kernelOffset;
373 uint64_t outputOffset;
383 uint64_t matrixConvolution; //if equal to 2 perform 2x2, if equal to 3 perform 3x3 matrix-vector convolution. Overrides coordinateFeatures
384 uint64_t numBatches;
385 uint64_t numKernels;
391 uint64_t normalize;
392 uint64_t complexSize;
397 uint64_t unroll;
400 uint64_t supportAxis;
401 uint64_t cacheShuffle;
403 uint64_t warpSize;
409 uint64_t axisSwapped;
411
412 uint64_t numBuffersBound[6];
414 uint64_t LUTBindingID;
417
419 uint64_t useUint64;
420 char** regIDs;
423 char sdataID[50];
424 char inoutID[50];
425 char combinedID[50];
432 char tshuffle[50];
433 char sharedStride[50];
440 char tempReg[50];
443 char temp[50];
444 char w[50];
445 char iw[50];
446 char locID[13][40];
447 char* code0;
448 char* output;
449 char* tempStr;
450 int64_t tempLen;
451 int64_t currentLen;
455typedef struct {
456 uint32_t workGroupShift[3];
458typedef struct {
459 uint64_t workGroupShift[3];
461typedef struct {
462 uint64_t numBindings;
463 uint64_t axisBlock[4];
464 uint64_t groupedBatch;
469#if(VKFFT_BACKEND==0)
470 VkBuffer* inputBuffer;
471 VkBuffer* outputBuffer;
472 VkDescriptorPool descriptorPool;
473 VkDescriptorSetLayout descriptorSetLayout;
474 VkDescriptorSet descriptorSet;
475 VkPipelineLayout pipelineLayout;
476 VkPipeline pipeline;
477 VkDeviceMemory bufferLUTDeviceMemory;
478 VkBuffer bufferLUT;
483#elif(VKFFT_BACKEND==1)
484 void** inputBuffer;
485 void** outputBuffer;
486 CUmodule VkFFTModule;
487 CUfunction VkFFTKernel;
488 void* bufferLUT;
489 CUdeviceptr consts_addr;
490 void** bufferBluestein;
491 void** bufferBluesteinFFT;
492#elif(VKFFT_BACKEND==2)
493 void** inputBuffer;
494 void** outputBuffer;
495 hipModule_t VkFFTModule;
496 hipFunction_t VkFFTKernel;
497 void* bufferLUT;
498 hipDeviceptr_t consts_addr;
499 void** bufferBluestein;
500 void** bufferBluesteinFFT;
501#elif(VKFFT_BACKEND==3)
502 cl_mem* inputBuffer;
503 cl_mem* outputBuffer;
504 cl_program program;
505 cl_kernel kernel;
506 cl_mem bufferLUT;
507 cl_mem* bufferBluestein;
508 cl_mem* bufferBluesteinFFT;
509#endif
511 uint64_t referenceLUT;
512} VkFFTAxis;
513
514typedef struct {
515 uint64_t actualFFTSizePerAxis[3][3];
516 uint64_t numAxisUploads[3];
517 uint64_t axisSplit[3][4];
519
521 uint64_t actualPerformR2CPerAxis[3]; // automatically specified, shows if R2C is actually performed or inside FFT or as a separate step
524} VkFFTPlan;
525typedef struct {
528 VkFFTPlan* localFFTPlan_inverse; //additional inverse plan
529
531 uint64_t firstAxis;
532 uint64_t lastAxis;
533 //Bluestein buffers reused among plans
534 uint64_t useBluesteinFFT[3];
535#if(VKFFT_BACKEND==0)
539 VkBuffer bufferBluestein[3];
542#elif(VKFFT_BACKEND==1)
543 void* bufferBluestein[3];
544 void* bufferBluesteinFFT[3];
545 void* bufferBluesteinIFFT[3];
546#elif(VKFFT_BACKEND==2)
547 void* bufferBluestein[3];
548 void* bufferBluesteinFFT[3];
549 void* bufferBluesteinIFFT[3];
550#elif(VKFFT_BACKEND==3)
551 cl_mem bufferBluestein[3];
552 cl_mem bufferBluesteinFFT[3];
553 cl_mem bufferBluesteinIFFT[3];
554#endif
557
558#endif
VkFFTResult
Definition VkFFT_Defs.h:232
@ VKFFT_ERROR_EMPTY_inputBuffer
Definition VkFFT_Defs.h:255
@ VKFFT_ERROR_FAILED_TO_CREATE_PROGRAM
Definition VkFFT_Defs.h:294
@ VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE
Definition VkFFT_Defs.h:313
@ VKFFT_ERROR_FAILED_TO_GET_FUNCTION
Definition VkFFT_Defs.h:300
@ VKFFT_ERROR_INVALID_FENCE
Definition VkFFT_Defs.h:243
@ VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH
Definition VkFFT_Defs.h:261
@ VKFFT_ERROR_FAILED_TO_ALLOCATE_MEMORY
Definition VkFFT_Defs.h:289
@ VKFFT_ERROR_INVALID_PLATFORM
Definition VkFFT_Defs.h:247
@ VKFFT_ERROR_FAILED_TO_RESET_FENCES
Definition VkFFT_Defs.h:272
@ VKFFT_ERROR_UNSUPPORTED_RADIX
Definition VkFFT_Defs.h:260
@ VKFFT_ERROR_FAILED_TO_CREATE_CONTEXT
Definition VkFFT_Defs.h:309
@ VKFFT_ERROR_FAILED_TO_COPY
Definition VkFFT_Defs.h:293
@ VKFFT_ERROR_FAILED_SHADER_PREPROCESS
Definition VkFFT_Defs.h:277
@ VKFFT_ERROR_EMPTY_FFTdim
Definition VkFFT_Defs.h:248
@ VKFFT_ERROR_EMPTY_kernel
Definition VkFFT_Defs.h:259
@ VKFFT_ERROR_INSUFFICIENT_CODE_BUFFER
Definition VkFFT_Defs.h:235
@ VKFFT_ERROR_EMPTY_size
Definition VkFFT_Defs.h:249
@ VKFFT_ERROR_FAILED_TO_SET_KERNEL_ARG
Definition VkFFT_Defs.h:311
@ VKFFT_ERROR_FAILED_TO_GET_CODE_SIZE
Definition VkFFT_Defs.h:296
@ VKFFT_ERROR_EMPTY_inputBufferSize
Definition VkFFT_Defs.h:254
@ VKFFT_ERROR_EMPTY_tempBuffer
Definition VkFFT_Defs.h:253
@ VKFFT_ERROR_FAILED_TO_CREATE_SHADER_MODULE
Definition VkFFT_Defs.h:281
@ VKFFT_ERROR_FAILED_TO_ADD_NAME_EXPRESSION
Definition VkFFT_Defs.h:305
@ VKFFT_ERROR_EMPTY_outputBuffer
Definition VkFFT_Defs.h:257
@ VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER
Definition VkFFT_Defs.h:268
@ VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER
Definition VkFFT_Defs.h:269
@ VKFFT_SUCCESS
Definition VkFFT_Defs.h:233
@ VKFFT_ERROR_INSUFFICIENT_TEMP_BUFFER
Definition VkFFT_Defs.h:236
@ VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES
Definition VkFFT_Defs.h:314
@ VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE
Definition VkFFT_Defs.h:312
@ VKFFT_ERROR_FAILED_TO_MODULE_GET_GLOBAL
Definition VkFFT_Defs.h:302
@ VKFFT_ERROR_FAILED_TO_DESTROY_PROGRAM
Definition VkFFT_Defs.h:298
@ VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY
Definition VkFFT_Defs.h:290
@ VKFFT_ERROR_FAILED_TO_GET_CODE
Definition VkFFT_Defs.h:297
@ VKFFT_ERROR_PLAN_NOT_INITIALIZED
Definition VkFFT_Defs.h:237
@ VKFFT_ERROR_INVALID_COMMAND_POOL
Definition VkFFT_Defs.h:242
@ VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES
Definition VkFFT_Defs.h:271
@ VKFFT_ERROR_FAILED_TO_CREATE_DESCRIPTOR_POOL
Definition VkFFT_Defs.h:273
@ VKFFT_ERROR_EMPTY_bufferSize
Definition VkFFT_Defs.h:250
@ VKFFT_ERROR_EMPTY_tempBufferSize
Definition VkFFT_Defs.h:252
@ VKFFT_ERROR_FAILED_TO_CREATE_DEVICE
Definition VkFFT_Defs.h:285
@ VKFFT_ERROR_FAILED_TO_FIND_PHYSICAL_DEVICE
Definition VkFFT_Defs.h:284
@ VKFFT_ERROR_FAILED_TO_CREATE_BUFFER
Definition VkFFT_Defs.h:288
@ VKFFT_ERROR_INVALID_CONTEXT
Definition VkFFT_Defs.h:246
@ VKFFT_ERROR_FAILED_TO_INITIALIZE
Definition VkFFT_Defs.h:306
@ VKFFT_ERROR_FAILED_TO_GET_DEVICE
Definition VkFFT_Defs.h:308
@ VKFFT_ERROR_FAILED_TO_LAUNCH_KERNEL
Definition VkFFT_Defs.h:303
@ VKFFT_ERROR_ONLY_INVERSE_FFT_INITIALIZED
Definition VkFFT_Defs.h:245
@ VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE
Definition VkFFT_Defs.h:282
@ VKFFT_ERROR_FAILED_TO_SETUP_DEBUG_MESSENGER
Definition VkFFT_Defs.h:283
@ VKFFT_ERROR_EMPTY_buffer
Definition VkFFT_Defs.h:251
@ VKFFT_ERROR_INVALID_DEVICE
Definition VkFFT_Defs.h:240
@ VKFFT_ERROR_FAILED_TO_ALLOCATE_DESCRIPTOR_SETS
Definition VkFFT_Defs.h:275
@ VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_POOL
Definition VkFFT_Defs.h:287
@ VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE
Definition VkFFT_Defs.h:315
@ VKFFT_ERROR_FAILED_TO_CREATE_EVENT
Definition VkFFT_Defs.h:316
@ VKFFT_ERROR_FAILED_TO_SYNCHRONIZE
Definition VkFFT_Defs.h:292
@ VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE_LAYOUT
Definition VkFFT_Defs.h:276
@ VKFFT_ERROR_UNSUPPORTED_FFT_OMIT
Definition VkFFT_Defs.h:264
@ VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE
Definition VkFFT_Defs.h:310
@ VKFFT_ERROR_FAILED_TO_LOAD_MODULE
Definition VkFFT_Defs.h:299
@ VKFFT_ERROR_FAILED_TO_CREATE_DESCRIPTOR_SET_LAYOUT
Definition VkFFT_Defs.h:274
@ VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT
Definition VkFFT_Defs.h:263
@ VKFFT_ERROR_FAILED_TO_ALLOCATE
Definition VkFFT_Defs.h:265
@ VKFFT_ERROR_EMPTY_kernelSize
Definition VkFFT_Defs.h:258
@ VKFFT_ERROR_FAILED_TO_MAP_MEMORY
Definition VkFFT_Defs.h:266
@ VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2C
Definition VkFFT_Defs.h:262
@ VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE
Definition VkFFT_Defs.h:270
@ VKFFT_ERROR_FAILED_TO_FIND_MEMORY
Definition VkFFT_Defs.h:291
@ VKFFT_ERROR_FAILED_SHADER_LINK
Definition VkFFT_Defs.h:279
@ VKFFT_ERROR_INVALID_QUEUE
Definition VkFFT_Defs.h:241
@ VKFFT_ERROR_MALLOC_FAILED
Definition VkFFT_Defs.h:234
@ VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS
Definition VkFFT_Defs.h:267
@ VKFFT_ERROR_ONLY_FORWARD_FFT_INITIALIZED
Definition VkFFT_Defs.h:244
@ VKFFT_ERROR_FAILED_TO_CREATE_FENCE
Definition VkFFT_Defs.h:286
@ VKFFT_ERROR_FAILED_TO_EVENT_RECORD
Definition VkFFT_Defs.h:304
@ VKFFT_ERROR_FAILED_SHADER_PARSE
Definition VkFFT_Defs.h:278
@ VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID
Definition VkFFT_Defs.h:307
@ VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM
Definition VkFFT_Defs.h:295
@ VKFFT_ERROR_FAILED_TO_SET_DYNAMIC_SHARED_MEMORY
Definition VkFFT_Defs.h:301
@ VKFFT_ERROR_INVALID_PHYSICAL_DEVICE
Definition VkFFT_Defs.h:239
@ VKFFT_ERROR_FAILED_SPIRV_GENERATE
Definition VkFFT_Defs.h:280
@ VKFFT_ERROR_NULL_TEMP_PASSED
Definition VkFFT_Defs.h:238
@ VKFFT_ERROR_EMPTY_outputBufferSize
Definition VkFFT_Defs.h:256
VkFFTConfiguration configuration
Definition VkFFT_Defs.h:526
VkBuffer bufferBluesteinFFT[3]
Definition VkFFT_Defs.h:540
uint64_t lastAxis
Definition VkFFT_Defs.h:532
uint64_t useBluesteinFFT[3]
Definition VkFFT_Defs.h:534
VkBuffer bufferBluestein[3]
Definition VkFFT_Defs.h:539
VkBuffer bufferBluesteinIFFT[3]
Definition VkFFT_Defs.h:541
uint64_t actualNumBatches
Definition VkFFT_Defs.h:530
VkDeviceMemory bufferBluesteinIFFTDeviceMemory[3]
Definition VkFFT_Defs.h:538
VkDeviceMemory bufferBluesteinFFTDeviceMemory[3]
Definition VkFFT_Defs.h:537
uint64_t bufferBluesteinSize[3]
Definition VkFFT_Defs.h:555
VkFFTPlan * localFFTPlan_inverse
Definition VkFFT_Defs.h:528
VkFFTPlan * localFFTPlan
Definition VkFFT_Defs.h:527
VkDeviceMemory bufferBluesteinDeviceMemory[3]
Definition VkFFT_Defs.h:536
uint64_t firstAxis
Definition VkFFT_Defs.h:531
VkFFTPushConstantsLayoutUint32 pushConstantsUint32
Definition VkFFT_Defs.h:466
VkPipelineLayout pipelineLayout
Definition VkFFT_Defs.h:475
uint64_t referenceLUT
Definition VkFFT_Defs.h:511
VkDeviceMemory * bufferBluesteinDeviceMemory
Definition VkFFT_Defs.h:479
VkDeviceMemory bufferLUTDeviceMemory
Definition VkFFT_Defs.h:477
uint64_t bufferLUTSize
Definition VkFFT_Defs.h:510
VkDeviceMemory * bufferBluesteinFFTDeviceMemory
Definition VkFFT_Defs.h:480
VkDescriptorPool descriptorPool
Definition VkFFT_Defs.h:472
VkDescriptorSetLayout descriptorSetLayout
Definition VkFFT_Defs.h:473
uint64_t groupedBatch
Definition VkFFT_Defs.h:464
uint64_t numBindings
Definition VkFFT_Defs.h:462
VkBuffer * inputBuffer
Definition VkFFT_Defs.h:470
VkBuffer * outputBuffer
Definition VkFFT_Defs.h:471
VkBuffer bufferLUT
Definition VkFFT_Defs.h:478
VkBuffer * bufferBluesteinFFT
Definition VkFFT_Defs.h:482
VkFFTPushConstantsLayoutUint64 pushConstants
Definition VkFFT_Defs.h:467
VkDescriptorSet descriptorSet
Definition VkFFT_Defs.h:474
VkFFTSpecializationConstantsLayout specializationConstants
Definition VkFFT_Defs.h:465
uint64_t axisBlock[4]
Definition VkFFT_Defs.h:463
VkPipeline pipeline
Definition VkFFT_Defs.h:476
uint64_t updatePushConstants
Definition VkFFT_Defs.h:468
VkBuffer * bufferBluestein
Definition VkFFT_Defs.h:481
uint64_t inputBufferOffset
Definition VkFFT_Defs.h:106
uint64_t printMemoryLayout
Definition VkFFT_Defs.h:143
uint64_t numberBatches
Definition VkFFT_Defs.h:115
uint64_t outputBufferOffset
Definition VkFFT_Defs.h:107
uint64_t disableMergeSequencesR2C
Definition VkFFT_Defs.h:128
uint64_t registerBoost4Step
Definition VkFFT_Defs.h:164
uint64_t sharedMemorySize
Definition VkFFT_Defs.h:176
uint64_t makeForwardPlanOnly
Definition VkFFT_Defs.h:132
uint64_t isCompilerInitialized
Definition VkFFT_Defs.h:46
VkBuffer * outputBuffer
Definition VkFFT_Defs.h:83
VkBuffer * inputBuffer
Definition VkFFT_Defs.h:82
uint64_t devicePageSize
Definition VkFFT_Defs.h:168
VkBuffer * kernel
Definition VkFFT_Defs.h:84
uint64_t fixMaxRadixBluestein
Definition VkFFT_Defs.h:118
uint64_t coordinateFeatures
Definition VkFFT_Defs.h:155
uint64_t isOutputFormatted
Definition VkFFT_Defs.h:137
VkMemoryBarrier * memory_barrier
Definition VkFFT_Defs.h:187
uint64_t maxComputeWorkGroupSize[3]
Definition VkFFT_Defs.h:173
uint64_t * bufferSize
Definition VkFFT_Defs.h:73
uint64_t doublePrecisionFloatMemory
Definition VkFFT_Defs.h:124
uint64_t makeInversePlanOnly
Definition VkFFT_Defs.h:133
uint64_t inputBufferNum
Definition VkFFT_Defs.h:68
uint64_t * inputBufferSize
Definition VkFFT_Defs.h:75
uint64_t localPageSize
Definition VkFFT_Defs.h:169
VkCommandPool * commandPool
Definition VkFFT_Defs.h:44
uint64_t numberKernels
Definition VkFFT_Defs.h:158
uint64_t halfPrecisionMemoryOnly
Definition VkFFT_Defs.h:123
uint64_t keepShaderCode
Definition VkFFT_Defs.h:142
uint64_t symmetricKernel
Definition VkFFT_Defs.h:157
VkPhysicalDevice * physicalDevice
Definition VkFFT_Defs.h:41
uint64_t swapTo3Stage4Step
Definition VkFFT_Defs.h:167
uint64_t isInputFormatted
Definition VkFFT_Defs.h:136
uint64_t matrixConvolution
Definition VkFFT_Defs.h:156
uint64_t coalescedMemory
Definition VkFFT_Defs.h:111
uint64_t size[3]
Definition VkFFT_Defs.h:38
uint64_t outputBufferNum
Definition VkFFT_Defs.h:69
uint64_t performConvolution
Definition VkFFT_Defs.h:152
uint64_t frequencyZeroPadding
Definition VkFFT_Defs.h:149
uint64_t registerBoostNonPow2
Definition VkFFT_Defs.h:163
uint64_t tempBufferNum
Definition VkFFT_Defs.h:67
uint64_t tempBufferOffset
Definition VkFFT_Defs.h:105
uint64_t considerAllAxesStrided
Definition VkFFT_Defs.h:141
uint64_t doublePrecision
Definition VkFFT_Defs.h:121
uint64_t bufferStride[3]
Definition VkFFT_Defs.h:135
uint64_t omitDimension[3]
Definition VkFFT_Defs.h:117
uint64_t sharedMemorySizePow2
Definition VkFFT_Defs.h:177
uint64_t performBandwidthBoost
Definition VkFFT_Defs.h:119
uint64_t fft_zeropad_left[3]
Definition VkFFT_Defs.h:147
uint64_t registerBoost
Definition VkFFT_Defs.h:162
uint64_t halfPrecision
Definition VkFFT_Defs.h:122
VkDeviceMemory tempBufferDeviceMemory
Definition VkFFT_Defs.h:185
uint64_t numSharedBanks
Definition VkFFT_Defs.h:113
uint64_t * tempBufferSize
Definition VkFFT_Defs.h:74
VkBuffer * tempBuffer
Definition VkFFT_Defs.h:81
VkCommandBuffer * commandBuffer
Definition VkFFT_Defs.h:186
uint64_t allocateTempBuffer
Definition VkFFT_Defs.h:180
uint64_t performZeropadding[3]
Definition VkFFT_Defs.h:146
uint64_t bufferOffset
Definition VkFFT_Defs.h:104
uint64_t sharedMemorySizeStatic
Definition VkFFT_Defs.h:175
uint64_t kernelOffset
Definition VkFFT_Defs.h:108
uint64_t inverseReturnToInputBuffer
Definition VkFFT_Defs.h:114
VkBuffer * buffer
Definition VkFFT_Defs.h:80
uint64_t maxThreadsNum
Definition VkFFT_Defs.h:174
uint64_t userTempBuffer
Definition VkFFT_Defs.h:64
uint64_t disableReorderFourStep
Definition VkFFT_Defs.h:130
uint64_t inputBufferStride[3]
Definition VkFFT_Defs.h:138
uint64_t kernelConvolution
Definition VkFFT_Defs.h:159
uint64_t crossPowerSpectrumNormalization
Definition VkFFT_Defs.h:154
uint64_t outputBufferStride[3]
Definition VkFFT_Defs.h:139
uint64_t * outputBufferSize
Definition VkFFT_Defs.h:76
uint64_t * kernelSize
Definition VkFFT_Defs.h:77
uint64_t conjugateConvolution
Definition VkFFT_Defs.h:153
uint64_t maxComputeWorkGroupCount[3]
Definition VkFFT_Defs.h:172
uint64_t fft_zeropad_right[3]
Definition VkFFT_Defs.h:148
uint64_t reorderFourStep
Definition VkFFT_Defs.h:181
VkDevice * device
Definition VkFFT_Defs.h:42
VkBuffer * outputBuffer
Definition VkFFT_Defs.h:208
VkBuffer * inputBuffer
Definition VkFFT_Defs.h:207
VkBuffer * kernel
Definition VkFFT_Defs.h:209
VkCommandBuffer * commandBuffer
Definition VkFFT_Defs.h:203
VkBuffer * buffer
Definition VkFFT_Defs.h:205
VkBuffer * tempBuffer
Definition VkFFT_Defs.h:206
VkFFTAxis axes[3][4]
Definition VkFFT_Defs.h:518
uint64_t numAxisUploads[3]
Definition VkFFT_Defs.h:516
VkFFTAxis R2Cdecomposition
Definition VkFFT_Defs.h:522
uint64_t axisSplit[3][4]
Definition VkFFT_Defs.h:517
VkFFTAxis inverseBluesteinAxes[3][4]
Definition VkFFT_Defs.h:523
uint64_t actualFFTSizePerAxis[3][3]
Definition VkFFT_Defs.h:515
uint64_t multiUploadR2C
Definition VkFFT_Defs.h:520
uint64_t actualPerformR2CPerAxis[3]
Definition VkFFT_Defs.h:521
VkPhysicalDeviceProperties physicalDeviceProperties
Definition VkFFT_Defs.h:8
uint64_t enableValidationLayers
Definition VkFFT_Defs.h:17
VkCommandPool commandPool
Definition VkFFT_Defs.h:14
uint64_t device_id
Definition VkFFT_Defs.h:30
uint64_t queueFamilyIndex
Definition VkFFT_Defs.h:12
VkPhysicalDevice physicalDevice
Definition VkFFT_Defs.h:7
VkDebugUtilsMessengerEXT debugMessenger
Definition VkFFT_Defs.h:11
VkDevice device
Definition VkFFT_Defs.h:10
VkFence fence
Definition VkFFT_Defs.h:15
VkInstance instance
Definition VkFFT_Defs.h:6
VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties
Definition VkFFT_Defs.h:9
VkQueue queue
Definition VkFFT_Defs.h:13
std::vector< const char * > enabledDeviceExtensions
Definition VkFFT_Defs.h:16