Cuda code generation using codegen command

Question

Hi , I am generating cuda code for following equation :
function [out] = My_Fcn(A,B,C,D,X,U)  %#codgen
n = size(U,2);
DU = D*U(:,1:n);
out = coder.nullcopy(zeros(size(U)));
coder.gpu.kernelfun();
 for i=1:1:n
     X(:,i+1) = A*X(:,i) + B*U(:,i);
 end
 
 out = C*X(:,1:n) + DU;

end

And I am generating code using following method :
A = double(zeros(3));
B = double(zeros(3));
C = double(zeros(3));
D = double(zeros(3));
X = double(zeros(3,4));
U = double(zeros(3));

cfg = coder.gpuConfig('lib');
codegen -args {A,B,C,D,X,U} -config cfg My_Fcn -report

When I am generating with 3*3 matrix, I am getting correct kernels as follows :
#include "My_Fcn.h"
#include "MWCudaDimUtility.hpp"

// Function Declarations
static __global__ void My_Fcn_kernel1(const double U[9], const double B[9],
  const double X[12], const int i, const double A[9], double b_A[3], double b_B
  [3]);
static __global__ void My_Fcn_kernel2(const double B[3], const double A[3],
  const int i, double X[12]);
static __global__ void My_Fcn_kernel3(const double U[9], const double D[9],
  const double X[12], const double C[9], double b_C[9], double b_D[9]);
static __global__ void My_Fcn_kernel4(const double D[9], const double C[9],
  double out[9]);
But when I am increasing the size of matrices for example 100*100, The kernels created are as follows:
// Include Files
#include "My_Fcn.h"
#include "MWCudaDimUtility.hpp"
#include "My_Fcn_data.h"
#include "My_Fcn_initialize.h"

// Type Definitions
#include "cublas_v2.h"

// Function Declarations
static __global__ void My_Fcn_kernel1(double DU[10000]);
static __global__ void My_Fcn_kernel2(double C[100]);
static __global__ void My_Fcn_kernel3(double C[100]);
static __global__ void My_Fcn_kernel4(const int i, double C[100], double b_C[100],
  double X[10100]);
static __global__ void My_Fcn_kernel5(double out[10000]);
static __global__ void My_Fcn_kernel6(double DU[10000], double out[10000]);
static cublasHandle_t getCublasGlobalHandle();

Here I cannot pass A,B,C,D,X,U matrices. I want to use this kernels to run ptx file on GPU, but as these kernels are not generating properly, I am not able to proceed.
Can you please help me with solution or workaround?
Thank You

Cuda code generation using codegen command

0 Comments
Show -2 older comments Hide -2 older comments

Answers (0)

Categories

Products

Release

Tags

Community Treasure Hunt

Cuda code generation using codegen command

0 Comments Show -2 older comments Hide -2 older comments

Answers (0)

Categories

Products

Release

Tags

See Also

Community Treasure Hunt

0 Comments
Show -2 older comments Hide -2 older comments