clear all
BatchSize = 100;
SP_Size = [32 32];
InputOverlap = rand(SP_Size(1)*SP_Size(2),BatchSize,'gpuArray');
Radius =4;
InputOverlap = reshape(InputOverlap,SP_Size(1),SP_Size(2),size(InputOverlap,2));
test_size2 = size (InputOverlap) ; 
test_size = test_size2(1:2);
test_Overlap = InputOverlap;
test = zeros(test_size2,'gpuArray') ; 
InputOverlap = [InputOverlap(:,1:Radius,:) InputOverlap InputOverlap(:,end+1-Radius:end,:)];
InputOverlap = [InputOverlap(1:Radius,:,:) ; InputOverlap ; InputOverlap(end+1-Radius:end,:,:) ];
t5 = tic;
  parfor r=1:test_size(1)
      InputOverlapTemp = InputOverlap;
      temp = zeros(1,test_size(1),test_size2(3),'gpuArray');
      Kmax = zeros(1,1,test_size2(3),'gpuArray');
      for c=1:test_size(1)
          Neighbour = InputOverlap(r:r+2*Radius,c:c+2*Radius,:);
          Kmax(1,1,:) = max(max(Neighbour)) ;
          temp(1,c,:) = ((test_Overlap(r,c,:)>=  Kmax));  
      end
      test(r,:,:) = temp;
  end
time5 = toc(t5)