几个cuda源代码示例 1.cuda矩阵例子.cu 2.实现矩阵的乘法.cu 3.... /*****************************************************************************************/ /核心代码部分,实现了矩阵的乘法运算/核心代码部分,实现了矩阵的乘法运算 /*****************************************************************************************/ / // Matrix multiplication kernel –thread specification __global__ void MatrixMulKernel(Matrix M, Matrix N, Matrix P) { // 2D Thread ID int tx = threadIdx.x; int ty = threadIdx.y; // Pvalue is used to store the element of the matrix // that is co mputed by the thread float Pvalue = 0; // 并行计算体现在次,多个thread并行进行 // 每个thread计算一行与一列的乘积 for (int k = 0; k < M.width; ++k) { float Melement = M.elements[ty * M.pitch + k]; float Nelement = Nd.elements[k * N.pitch + tx]; Pvalue += Melement * Nelement; } // Write the matrix to device memory; // each thread writes one element P.elements[ty * P.pitch + tx] = Pvalue; }