Input: Output: 1. Sub String Program
Input: Output: 1. Sub String Program
int main()
{
int a[SIZE] = {1,2,3,4};
int b[SIZE] = {1,2,3,4};
int c = 0;
Input:
Output:
Dot Product: 30
1. Sub String
Program:
__global__ void subString(char *da, char *db, int lena, int lenb, int *count)
{
int i = threadIdx.x, c = 0;
int c = 0, *dc;
cudaMalloc((void **)&dc, sizeof(int));
cudaMemcpy(dc, &c, sizeof(int), cudaMemcpyHostToDevice);
subString << <1, lena - lenb >> >(da, db, lena, lenb, dc);
cudaDeviceSynchronize();
return 0;
}
Input:
Output:
HaiHelloHowru 13 H 1
Count : 3
2. Matrix Multiplication
Program:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
using namespace std;
#define SIZE 20
#define TILESIZE 2
#define WINDOW 2
/* dim3 block(TILESIZE);
dim3 grid(SIZE/TILESIZE+1);
matMul<<<grid,block>>>(da, db, dc);
cudaMemcpy(c, dc, size, cudaMemcpyDeviceToHost);
/* cout<<endl;
printMatrix(c);
cout<<endl;
printMatrix(d);*/
}
Input
Matrix A
22222
22222
22222
22222
22222
Matrix B
11111
11111
11111
11111
11111
Output:
Matrix C
10 10 10 10 10
10 10 10 10 10
10 10 10 10 10
10 10 10 10 10
10 10 10 10 10
3. Quick Sort
Program
#define N 5
__global__ void quickSort(int *x, int *dfirst, int *dlast, int *list)
{
int idx = threadIdx.x;
int first = dfirst[idx];
int last = dlast[idx];
list[idx] = 0;
if(first<last)
{
int pivot, j, temp, i;
pivot = first;
i = first;
j = last;
while(i<j)
{
while(x[i]<=x[pivot] && i<last)
i++;
while(x[j] > x[pivot])
j--;
if(i<j)
{
temp = x[i];
x[i] = x[j];
x[j] = temp;
}
}
temp = x[pivot];
x[pivot] = x[j];
x[j] = temp;
int main()
{
int a[N] = {1, 5, 9, 3, 6}, *da, i, size = N*sizeof(int), len = 0;
int *list, *dlist, *dfirst, *dlast;
cudaMalloc(&da, size);
cudaMemcpy(da, a, size, cudaMemcpyHostToDevice);
vector<int> v;
while(true)
{
size = (++len)*sizeof(int);
first[0] = 0;
last[len-1] = N-1;
cudaMalloc(&dfirst, size);
cudaMemcpy(dfirst, first, size, cudaMemcpyHostToDevice);
cudaMalloc(&dlast, size);
cudaMemcpy(dlast, last, size, cudaMemcpyHostToDevice);
cudaMalloc(&dlist, size);
v.clear();
for(i=0; i<len; i++)
if(list[i] != 0)
v.push_back(list[i]-1);
len = v.size();
if(len == 0)
break;
}
Input:
15936
Output:
13569
4. Gauss Elimination
Program
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define N 4
#define M N+1
int main()
{
float A[N*(N + 1)] = { 2, 1, -1, 2, 5, 4, 5, -3, 6, 9, -2, 5, -2, 6,
4, 4, 11, -4, 8, 2 };
float *dev_a;
int size = N*(N + 1)*sizeof(float), i, j;
print(A);
cudaMalloc(&dev_a, size);
cudaMemcpy(dev_a, A, size, cudaMemcpyHostToDevice);