PeriDyno 1.0.0
Loading...
Searching...
No Matches
GPUBuffer.cpp
Go to the documentation of this file.
1#include "GPUBuffer.h"
2#include "Shader.h"
3
4#include <glad/glad.h>
5#include <iostream>
6
7#ifdef CUDA_BACKEND
8#include <cuda_gl_interop.h>
9#endif
10
11#ifdef VK_BACKEND
12#include <VkSystem.h>
13#include <VkContext.h>
14
15#ifdef WIN32
16#include <handleapi.h>
17#else
18#include <unistd.h>
19#endif // WIN32
20
21#endif // VK_BACKEND
22
23namespace dyno
24{
25 class BufferCopy {
26 public:
27 static BufferCopy* instance() {
28 static BufferCopy inst;
29 return &inst;
30 }
31
32 void proc(GLuint src, GLuint dst,
33 int src_pitch,
34 int dst_pitch,
35 int count)
36 {
37 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, src);
38 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, dst);
39
40 int pitch = src_pitch < dst_pitch ? src_pitch : dst_pitch;
41 program.use();
42 program.setInt("uSrcPitch", src_pitch);
43 program.setInt("uDstPitch", dst_pitch);
45 glDispatchCompute(count, pitch, 1);
47 }
48
49 private:
51 const char* src = R"===(
52#version 430
53layout(local_size_x=1,local_size_y=1) in;
54layout(binding=1,std430) buffer BufferSrc { int vSrc[]; };
55layout(binding=2,std430) buffer BufferDst { int vDst[]; };
56uniform int uSrcPitch = 1;
57uniform int uDstPitch = 1;
58void main() { vDst[uDstPitch * gl_GlobalInvocationID.x + gl_GlobalInvocationID.y]
59 = vSrc[uSrcPitch * gl_GlobalInvocationID.x + gl_GlobalInvocationID.y]; }
60)===";
61 Shader shader;
62 shader.createFromSource(GL_COMPUTE_SHADER, src);
63 program.create();
64 program.attachShader(shader);
65 program.link();
66 shader.release();
67 }
68
70 };
71
72#ifdef VK_BACKEND
73 template<typename T>
74 void XBuffer<T>::allocateVkBuffer(int size) {
75
77 auto device = ctx->deviceHandle();
78
79 // free current buffer
80 vkDestroyBuffer(device, buffer, nullptr);
81 vkFreeMemory(device, memory, nullptr);
82
83 VkExternalMemoryHandleTypeFlags type;
84 // OS platforms
85#ifdef WIN32
86 type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
87#else
88 type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
89#endif
90
91 // create vulkan buffer
92 {
93 VkBufferCreateInfo bufferInfo{};
94 bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
95 bufferInfo.size = size;
96 bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
97 bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
98
99 VkExternalMemoryBufferCreateInfo externalInfo{};
100 externalInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO;
101 externalInfo.handleTypes = type;
102 bufferInfo.pNext = &externalInfo;
103
104 if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) {
105 throw std::runtime_error("failed to create buffer!");
106 }
107 }
108
109 // create memory
110 {
111 VkMemoryRequirements memRequirements;
112 vkGetBufferMemoryRequirements(device, buffer, &memRequirements);
113
114 VkMemoryAllocateInfo allocInfo{};
115 allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
116 allocInfo.allocationSize = memRequirements.size;
117 allocInfo.memoryTypeIndex = ctx->getMemoryType(memRequirements.memoryTypeBits,
118 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
119
120 // enable export memory
121 VkExportMemoryAllocateInfo memoryHandleEx{};
122 memoryHandleEx.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
123 memoryHandleEx.handleTypes = type;
124 allocInfo.pNext = &memoryHandleEx; // <-- Enabling Export
125
126 if (vkAllocateMemory(device, &allocInfo, nullptr, &memory) != VK_SUCCESS) {
127 throw std::runtime_error("failed to allocate buffer memory!");
128 }
129 }
130
131 vkBindBufferMemory(device, buffer, memory, 0);
132
133 // get the real allocated size of the buffer
134 VkMemoryRequirements req;
135 vkGetBufferMemoryRequirements(device, buffer, &req);
136 this->allocatedSize = size;
137
138 // get memory handle for import
139#ifdef WIN32
140 VkMemoryGetWin32HandleInfoKHR info{};
141 info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
142 info.memory = memory;
143 info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
144
145 auto vkGetMemoryWin32HandleKHR =
146 PFN_vkGetMemoryWin32HandleKHR(vkGetDeviceProcAddr(device, "vkGetMemoryWin32HandleKHR"));
147 vkGetMemoryWin32HandleKHR(device, &info, &handle);
148#else
149 // TODO: for linux and other OS
150#endif
151 // memory handle changed
152 resized = true;
153 printf("Buffer allocated %d bytes\n", allocatedSize);
154 }
155
156 template<typename T>
157 void XBuffer<T>::loadVkBuffer(VkBuffer src, int size) {
158
159 srcBufferSize = size;
160 if (src == nullptr || size <= 0) return;
161
162 // simple strategy to reduce frequently memory allocation
163 if (size > this->allocatedSize || size < (this->allocatedSize / 4)) {
164 //if (size != allocatedSize) {
165 this->allocateVkBuffer(size * 2);
166 }
167
168 // copy data
169 {
170 dyno::VkContext* vkCtx = dyno::VkSystem::instance()->currentContext();
171
172 if (copyCmd == VK_NULL_HANDLE) {
173 copyCmd = vkCtx->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY);
174 }
175
176 // begin
177 VkCommandBufferBeginInfo beginInfo{};
178 beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
179 beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
180 VK_CHECK_RESULT(vkBeginCommandBuffer(copyCmd, &beginInfo));
181
182 VkBufferCopy copyRegion{};
183 copyRegion.srcOffset = 0; // Optional
184 copyRegion.dstOffset = 0; // Optional
185 copyRegion.size = size;
186 vkCmdCopyBuffer(copyCmd, src, buffer, 1, &copyRegion);
187
188 // end and flush
189 vkCtx->flushCommandBuffer(copyCmd, vkCtx->transferQueue, false);
190 }
191 }
192
193#endif // VK_BACKEND
194
195
196 template<typename T>
198 {
199#ifdef CUDA_BACKEND
200 int size = buffer.size() * sizeof(T);
201 if (size == 0)
202 return;
203
204 int newSize = this->size;
205
206 // shrink
207 if (size < (this->size / 2))
208 newSize = size;
209 // expand
210 if (size > this->size)
211 newSize = size * 1.5;
212
213 // resized
214 if(newSize != this->size) {
215 allocate(newSize);
216 // need re-register resource
217 if(resource != 0)
218 cuSafeCall(cudaGraphicsUnregisterResource(resource));
219 cuSafeCall(cudaGraphicsGLRegisterBuffer(&resource, id, cudaGraphicsRegisterFlagsWriteDiscard));
220 }
221
222 size_t size0;
223 void* devicePtr = 0;
224 cuSafeCall(cudaGraphicsMapResources(1, &resource));
225 cuSafeCall(cudaGraphicsResourceGetMappedPointer(&devicePtr, &size0, resource));
226 cuSafeCall(cudaMemcpy(devicePtr, buffer.begin(), size, cudaMemcpyDeviceToDevice));
227 cuSafeCall(cudaGraphicsUnmapResources(1, &resource));
228
229#endif // CUDA_BACKEND
230
231#ifdef VK_BACKEND
232 // we need to re-create buffer and memory object when buffer is resized...
233 if (resized)
234 {
235 resized = false;
236 // re-import memory object
237 if (memoryObject)
238 glDeleteMemoryObjectsEXT(1, &memoryObject);
239 glCreateMemoryObjectsEXT(1, &memoryObject);
240#ifdef WIN32
241 glImportMemoryWin32HandleEXT(memoryObject, allocatedSize, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, handle);
242#else
243 //glImportMemoryFdEXT(bufGl.memoryObject, size, GL_HANDLE_TYPE_OPAQUE_FD_EXT, bufGl.fd);
244 // fd got consumed
245 //bufGl.fd = -1;
246#endif
247 // named buffer
248 if (tempBuffer != GL_INVALID_INDEX)
249 glDeleteBuffers(1, &tempBuffer);
250 glGenBuffers(1, &tempBuffer);
251 glNamedBufferStorageMemEXT(tempBuffer, allocatedSize, memoryObject, 0);
252 glCheckError();
253
254 // allocate target buffer size
255 this->allocate(allocatedSize);
256 }
257
258 // copy data with stride...
260 int src_pitch = sizeof(T) / sizeof(int);
261 int dst_pitch = sizeof(T) / sizeof(int);
262 if (typeid(T) == typeid(dyno::Vec3f) || typeid(T) == typeid(dyno::Vec3i)) {
263 dst_pitch = 3;
264 }
265 copy->proc(tempBuffer, this->id, src_pitch, dst_pitch, count());
266#endif // VK_BACKEND
267 }
268
269 template<typename T>
271 {
272#ifdef VK_BACKEND
273 return srcBufferSize / sizeof(T);
274#endif
275
276#ifdef CUDA_BACKEND
277 return buffer.size();
278#endif
279 }
280
281}
#define glCheckError()
#define VK_CHECK_RESULT(f)
Definition VulkanTools.h:55
static BufferCopy * instance()
Definition GPUBuffer.cpp:27
void proc(GLuint src, GLuint dst, int src_pitch, int dst_pitch, int count)
Definition GPUBuffer.cpp:32
virtual void allocate(int size)
Definition Buffer.cpp:43
void release()
Definition Shader.cpp:75
bool createFromSource(unsigned int type, const std::string &src)
Definition Shader.cpp:8
void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, VkCommandPool pool, bool free=true)
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, VkCommandPool pool, bool begin=false)
VkQueue transferQueue
Definition VkContext.h:75
VkDevice deviceHandle()
Definition VkContext.h:26
VkContext * currentContext()
Definition VkSystem.h:21
static VkSystem * instance()
Definition VkSystem.cpp:10
#define T(t)
This is an implementation of AdditiveCCD based on peridyno.
Definition Array.h:25
Vector< int, 3 > Vec3i
Definition Vector3D.h:95
Vector< float, 3 > Vec3f
Definition Vector3D.h:93