Commit f0252c98 authored by marco.lourenco's avatar marco.lourenco
Browse files

Merge branch 'cleanup'

Conflicts:
	src/lib/cpu/MatrixGenerator.cpp
	src/lib/gpu/ADMMGscon.cu
parents f91af112 3a9dc272
......@@ -9,6 +9,10 @@
#define Vector3D Eigen::Matrix<SOURCE_PRECISION, 1, 3>
/**
* Convert 2D array to eigen compatible matrix
*/
EIGEN_MATRIX * convertMatrix(SOURCE_PRECISION **table, const int rows, const int cols){
EIGEN_MATRIX *temp = new EIGEN_MATRIX(rows, cols);
......@@ -21,9 +25,6 @@ EIGEN_MATRIX * convertMatrix(SOURCE_PRECISION **table, const int rows, const int
return temp;
}
/**
* Debug function used for profiling
**/
double timeDifferenceMS(struct timespec &start, struct timespec &end){
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
......@@ -37,6 +38,9 @@ double timeDifferenceMS(struct timespec &start, struct timespec &end){
return temp.tv_sec * 1000 + temp.tv_nsec / 1000000.;
}
/**
* Sorter comparison function used to sort sparse matrix values by col first
*/
bool sorter(const result &a, const result&b){
if(a.y == b.y){
return a.x < b.x;
......
......@@ -11,16 +11,24 @@
#define EIGEN_MATRIX Eigen::Matrix<SOURCE_PRECISION, -1, -1, Eigen::RowMajor>
/**
* This struct represents one value of the A matrix.
* It contains its coordinates and the value associated
*/
struct result {
int x;
int y;
OPTIMIZATION_PRECISION value;
};
/**
* Debug function used for profiling
**/
double timeDifferenceMS(struct timespec &start, struct timespec &end);
class MatrixGenerator {
public:
MatrixGenerator();
void setBeamletPointsInPatientCoords(SOURCE_PRECISION **table, const int rows, const int cols);
......@@ -29,23 +37,42 @@ public:
void setPerpendicularPlaneVectors(SOURCE_PRECISION **table, const int rows, const int cols);
/**
* Set the kernel (beamlet doses) to be used for the A matrix and the dosemap
*/
void setKernel(SOURCE_PRECISION **table, const int rows, const int cols);
/**
* Set the labelmap to be used for (patient/optimization specific)
*/
void setLabelmap(SOURCE_PRECISION *pOffset, SOURCE_PRECISION *pPixelDimensions, int* pLabelMapDimensions, int *pLabelMap);
/**
* Sort the resulting A matrix values by row/col
* If set to true, the matrix created by generateMatrx() will be sorted by row/col.
* Defaults to false
*/
void setSortAMatrix(bool sort);
/**
* Generate the A matrix used for the optimization
*/
std::vector<result> * generateMatrix();
/**
* Generate the dosemap based on a particular solution.
*/
SOLUTION_PRECISION * generateDosemap(OPTIMIZATION_PRECISION *xSol, const int xSolLength);
void setThreads(const int threads);
/**
* Return the number of rows of the A matrix that has been generated
*/
int getRows();
/**
* Return the number of columns of the A matrix that has been generated
*/
int getCols();
private:
......@@ -63,7 +90,6 @@ private:
*/
EIGEN_MATRIX* generateRealCoordinates(const int threshold);
//TODO: change all to float (not double)
EIGEN_MATRIX * beamletPointsInPatientCoords;
EIGEN_MATRIX * perpPlaneVectorsInPatientCoords;
EIGEN_MATRIX * sourcePositionsInPatientCoords;
......
This diff is collapsed.
/*
* ADMMGscon.h
* ADMMGscon.h
*
* Created on: Jan 4, 2015
* Author: localuser
* Author: Marco Lourenço, Beat Wolf
* Company: HEIA-FR
*/
#ifndef ADMMGSCON_H_
......@@ -44,14 +45,48 @@ private:
int * csrRowHostPtrA;
float * csrValHostPtrA;
int * csrRowDevicePtrA;
int * csrColDevicePtrA;
float * csrValDevicePtrA;
int * csrColHostPtrB;
int * csrRowHostPtrB;
float * csrValHostPtrB;
int * csrColDevicePtrB;
int * csrRowDevicePtrB;
float * csrValDevicePtrB;
int * csrRowDevicePtrSmallB;
int * csrColDevicePtrSmallB;
float * csrValDevicePtrSmallB;
int * cscRowDevicePtrSmallB;
int * cscColDevicePtrSmallB;
float * cscValDevicePtrSmallB;
int** csrRowDevicePtrSplittedSmallB;
int** csrColDevicePtrSplittedSmallB;
float** csrValDevicePtrSplittedSmallB;
int** cscRowDevicePtrSplitSB;
int** cscColDevicePtrSplitSB;
float** cscValDevicePtrSplitSB;
float** xsolDevicePtr;
float** smallXsolDevicePtr;
float* cDevicePtr;
float* zDevicePtr;
int Ns;
int Ms;
int Ms1;
int nnz;
int NG;
int NB;
int* nbOfNewLines;
OPTIMIZATION_PRECISION* xsolHostFinal;
int xsolHostFinalSize;
......@@ -77,6 +112,20 @@ private:
cusparseMatDescr_t descr;
cublasStatus_t blasStatus;
cublasHandle_t blasHandle;
cudaStream_t copyAStream;
// resizeMatrix
int newNNZ;
int Ns1;
int NG1;
int* normvecIndexHostPtr;
//createYFromLabelMap
float * yDevicePtr;
int * yLabelMapDevicePtr;
int* lowerBoundRegionIDs;
int lowerBoundRegions;
int** linesIDHostPtr;
/*
* Private methods
......@@ -113,6 +162,11 @@ private:
int* &csrRowDevicePtrB,int* &csrColDevicePtrB,float* &csrValDevicePtrB,
int* linesIDHostPtr,float augmentedSign, int &nbOfNewLines);
int nbOfEltOfCsrLine(int* csrRowDevicePtr, int lineID);
void resizeMatrix();
void createYFromLabelmap();
int clean();
void augmentY();
void createB();
/*
* Inline methods
......@@ -124,8 +178,6 @@ private:
* resultDevicePtr must be of size M and not useful for other methods (values will be erased)
*
* resultDevicePtr will contain result of operation : B*x(1:N) + x(N+1:N1)
*
* With actual problem size, N should be 400'000+ and M 11'000+
*/
cusparseStatus_t B2(int* csrRowDevicePtrB, int* csrColDevicePtrB, float* csrValDevicePtrB,
float* xDevicePtr, float* resultDevicePtr, int nnz, int M, int N, float* done, float* dzero,
......@@ -146,8 +198,6 @@ private:
* resultDevicePtr must be of size N1 and not useful for other methods (values will be erased)
*
* resultDevicePtr will contain result of operation : [B'*x;x]
*
* With actual problem size, M should be 400'000+ and N 11'000+
*/
cusparseStatus_t B2t(int* csrRowDevicePtrBT, int* csrColDevicePtrBT, float* csrValDevicePtrBT,
float* xDevicePtr, float* resultDevicePtr, int nnz, int M, int N, float* done,
......
......@@ -5,12 +5,9 @@ if( CUDA_FOUND )
set(FILES
${CMAKE_CURRENT_SOURCE_DIR}/time_utils.h
${CMAKE_CURRENT_SOURCE_DIR}/matrix_utils.h
${CMAKE_CURRENT_SOURCE_DIR}/cuda_utils.h
${CMAKE_CURRENT_SOURCE_DIR}/ADMMGscon.h
${CMAKE_CURRENT_SOURCE_DIR}/ADMMGscon.cu
${CMAKE_CURRENT_SOURCE_DIR}/mmio.h
${CMAKE_CURRENT_SOURCE_DIR}/mmio.c
)
message(STATUS ${FILES})
......
/*
* cuda_utils.h
* cuda_utils.h
*
* Created on: Jan 17, 2015
* Author: localuser
* Author: Marco Lourenço
* Company: HEIA-FR
*/
#ifndef CUDA_UTILS_H_
#define CUDA_UTILS_H_
/*
* Useful to debug CUDA methods
*/
static void HandleError( cudaError_t err,
const char *file,
int line ) {
......
/*
* MatrixGenerator.h
*
* Created on: Sep 19, 2014
* Author: bigmatrix
*/
#ifndef MATRIX_UTILS_H_
#define MATRIX_UTILS_H_
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include <boost/random/uniform_real_distribution.hpp>
extern "C"
{
#include "mmio.h"
}
#define DEFAULT_SEED 42u
void createCSRMatrix(int* csrRowHostPtr,int* csrColHostPtr,float* csrValHostPtr,
int matrixWidth,int matrixHeight,float nonZeroPercentage,long nnz,int isRandom, int writeMatrix){
if(isRandom)
srand(time(NULL));
else
srand(DEFAULT_SEED);
boost::random::mt19937 gen(DEFAULT_SEED);
if(isRandom)
gen.seed(time(NULL));
// choose between 0 and matriHeight to place y of a bloc
boost::random::uniform_int_distribution<> dist(1, 1000);
// generating random values
for(long int i = 0;i<nnz;i++)
{
csrValHostPtr[i] = ((float)dist(gen))/((float)dist(gen));
}
// generating position of the blocks of values
long int blockSizeH = matrixHeight*nonZeroPercentage;
long int blockSizeW = blockSizeH;
int numberOfBlocks = matrixWidth / blockSizeW;
long int xIndex = 0;
int* blockPositions = (int *) malloc(2*numberOfBlocks*sizeof(blockPositions[0]));
int* eltPerLine = (int *) malloc(matrixHeight*sizeof(eltPerLine[0]));
long int min = 0;
long int max = matrixHeight - blockSizeH;
boost::random::uniform_int_distribution<> distBlocks(min, max);
/* place blocs of data randomly
*
* ++ ++
* ++ ++ ++
* ++ ++
* ++
*
* blockPositions contains location (i,j) of the top left element of a bloc
*/
blockPositions[0] = 0;
blockPositions[1] = 0;
//printf("blocks : %d, %d\n",blockPositions[0],blockPositions[1]);
xIndex += blockSizeW;
for(int j = 0;j<blockSizeH;j++)
{
eltPerLine[blockPositions[1]+j] += blockSizeW;
}
for(long int i = 2;i<2*numberOfBlocks;i+=2)
{
blockPositions[i] = xIndex;
blockPositions[i+1] = distBlocks(gen);
//printf("blocks : %d, %d\n",blockPositions[i],blockPositions[i+1]);
// next blockSizeH lines will be filled with blockSizeW elts
for(int j = 0;j<blockSizeH;j++)
{
eltPerLine[blockPositions[i+1]+j] += blockSizeW;
}
xIndex += blockSizeW;
}
long int rowIndex = 0;
csrRowHostPtr[rowIndex++] = 0;
// parse array to create row data
for(int i = 0;i<matrixHeight;i++)
{
if(eltPerLine[i] != 0)
{
csrRowHostPtr[rowIndex] = csrRowHostPtr[rowIndex-1] + eltPerLine[i]; // row has some nnz
}
else
{
csrRowHostPtr[rowIndex] = csrRowHostPtr[rowIndex-1]; // empty row
csrRowHostPtr[rowIndex-1] = csrRowHostPtr[rowIndex-2];
}
rowIndex++;
}
long int crtColIndex = 0;
// parse array to create col data
for(long int i = 0;i<matrixHeight;i++)
{
for(int j = 0;j<2*numberOfBlocks;j+=2){
// if current block is on current line i
if(i>=blockPositions[j+1] && i<blockPositions[j+1]+blockSizeH)
{
for(int k = blockPositions[j];k<blockPositions[j]+blockSizeW;k++)
{
csrColHostPtr[crtColIndex++] = k;
}
}
}
}
// normally nnz + 0 because we use zero-based indexing
csrRowHostPtr[rowIndex] = nnz+csrRowHostPtr[0];
if(writeMatrix)
{
// writing matrix to file in COO MTX format, will be used in matlab or in other libs
std::cout << "Writing matrix to file" << std::endl;
FILE *f = fopen("output_matrix.mtx", "w");
if (f == NULL)
{
std::cout << "Error opening file!" << std::endl;
exit(1);
}
int crtLine = 0;
long int crtColIndex = 0;
long int crtCol = csrColHostPtr[crtColIndex++];
long int nnzPtr = 0;
long int addedElts = 0;
fprintf(f, "%%%%MatrixMarket matrix coordinate real general\n");
fprintf(f, "%d %d %ld\n",matrixHeight,matrixWidth,nnz);
for(int i = 0;i<matrixHeight;i++)
{
if(eltPerLine[i] == 0)
crtLine++;
for(int j = 0;j<matrixWidth;j++)
{
// value is nnz
if(i == crtLine && j == crtCol)
{
fprintf(f, "%d %d %lf\n", i+1, j+1,csrValHostPtr[nnzPtr++]);
addedElts++;
crtCol = csrColHostPtr[crtColIndex++];
}
// value is not nnz
// else
// {
// fprintf(f, "%d %d %lf\n", i+1, j+1,0.0);
// }
if(addedElts != 0 && addedElts == eltPerLine[crtLine])
{
crtLine++;
addedElts = 0;
}
}
}
fclose(f);
}
}
void readMTXMatrixHeader(int* m, int* n, int* nnz,const std::string filename)
{
FILE* f;
if((f = fopen(filename.c_str(),"r")) == NULL)
{
std::cout << "Can't open file " << filename << std::endl;
exit(1);
}
MM_typecode matcode;
if (mm_read_banner(f, &matcode) != 0)
{
std::cout << "readMTXMatrixToCSR could not process Matrix Market banner."<< std::endl;
exit(1);
}
if (mm_read_mtx_crd_size(f, m, n, nnz) !=0) {
std::cout << "readMTXMatrixToCSR matrix size error"<< std::endl;
exit(1);
}
fclose(f);
}
void readMTXMatrixToCSR(int* csrRowHostPtr,int* csrColHostPtr,float* csrValHostPtr,const std::string filename)
{
FILE* f;
if((f = fopen(filename.c_str(),"r")) == NULL)
{
std::cout << "Can't open file " << filename << std::endl;
exit(1);
}
MM_typecode matcode;
if (mm_read_banner(f, &matcode) != 0)
{
std::cout << "readMTXMatrixToCSR could not process Matrix Market banner."<< std::endl;
exit(1);
}
/* find out size of sparse matrix .... */
int M,N; // line, col
int nnz;
if (mm_read_mtx_crd_size(f, &M, &N, &nnz) !=0) {
std::cout << "readMTXMatrixToCSR matrix size error"<< std::endl;
exit(1);
}
int col, row;
float val;
int ret;
int crtValIndex = 0;
int crtRowIndex = 0;
int memoryCrtRow = 0;
int fileCrtRow = 0;
int nextLineStart = 0;
for (int i=0; i<nnz; i++) {
ret = fscanf(f,"%d %d %f",&row, &col, &val);
if(ret != 3)
{
printf("Warning, fscanf did not read 3 items from matrix mtx at line %d!\n",i);
}
fileCrtRow = row-1; // 1-based to 0-based (matlab is 1-based)
while(fileCrtRow >= memoryCrtRow)
{
csrRowHostPtr[crtRowIndex++] = nextLineStart;
memoryCrtRow++;
}
csrValHostPtr[crtValIndex] = val;
csrColHostPtr[crtValIndex++] = col-1; // 1 based to 0 based
nextLineStart = crtValIndex;
}
// if empty lines at the end
while(crtRowIndex < M)
{
csrRowHostPtr[crtRowIndex++] = nnz;
}
csrRowHostPtr[crtRowIndex] = nnz; // last index contains nnz (csr specs)
fclose(f);
//std::cout << "readMTXMatrixToCSR: filename="<< filename << "; done"<< std::endl;
}
void readMTXMatrixToCSR(int* csrRowHostPtr,int* csrColHostPtr,double* csrValHostPtr,const std::string filename)
{
FILE* f;
if((f = fopen(filename.c_str(),"r")) == NULL)
{
std::cout << "Can't open file " << filename << std::endl;
exit(1);
}
MM_typecode matcode;
if (mm_read_banner(f, &matcode) != 0)
{
std::cout << "readMTXMatrixToCSR could not process Matrix Market banner."<< std::endl;
exit(1);
}
/* find out size of sparse matrix .... */
int M,N; // line, col
int nnz;
if (mm_read_mtx_crd_size(f, &M, &N, &nnz) !=0) {
std::cout << "readMTXMatrixToCSR matrix size error"<< std::endl;
exit(1);
}
int col, row;
double val;
int ret;
int crtValIndex = 0;
int crtRowIndex = 0;
int memoryCrtRow = 0;
int fileCrtRow = 0;
int nextLineStart = 0;
for (int i=0; i<nnz; i++) {
// data is inversed because we use file with transpose of A (matlab is col major)
ret = fscanf(f,"%d %d %lf",&row, &col, &val);
if(ret != 3)
{
printf("Warning, fscanf did not read 3 items from matrix mtx at line %d!\n",i);
}
fileCrtRow = row-1; // 1-based to 0-based (matlab is 1-based)
while(fileCrtRow >= memoryCrtRow)
{
csrRowHostPtr[crtRowIndex++] = nextLineStart;
memoryCrtRow++;
}
csrValHostPtr[crtValIndex] = val;
csrColHostPtr[crtValIndex++] = col-1; // 1 based to 0 based
nextLineStart = crtValIndex;
}
// if empty lines at the end
while(crtRowIndex < M)
{
csrRowHostPtr[crtRowIndex++] = nnz;
}
csrRowHostPtr[crtRowIndex] = nnz; // last index contains nnz (csr specs)
fclose(f);
//std::cout << "readMTXMatrixToCSR: filename="<< filename << "; done"<< std::endl;
}
void saveDeviceVector(float* devicePtr, int vectorSize, const char* name)
{
float* hostPtr = 0;
hostPtr = (float*) malloc(vectorSize*sizeof(hostPtr[0]));
cudaMemcpy(hostPtr,devicePtr,vectorSize*sizeof(hostPtr[0]),cudaMemcpyDeviceToHost);
std::cout << "Writing "<< name <<" to file" << std::endl;
FILE *f = fopen(name, "w");
if (f == NULL)
{
std::cout << "Error opening file!" << std::endl;
exit(1);
}
for(int i = 0;i<vectorSize;i++)
{
fprintf(f, "%.16e\n", hostPtr[i]);
}
fclose(f);
free(hostPtr);
}
void saveDeviceVector(int* devicePtr, int vectorSize, const char* name)
{
int* hostPtr = 0;
hostPtr = (int*) malloc(vectorSize*sizeof(hostPtr[0]));
cudaMemcpy(hostPtr,devicePtr,vectorSize*sizeof(hostPtr[0]),cudaMemcpyDeviceToHost);
std::cout << "Writing "<< name <<" to file" << std::endl;
FILE *f = fopen(name, "w");
if (f == NULL)
{
std::cout << "Error opening file!" << std::endl;
exit(1);
}
for(int i = 0;i<vectorSize;i++)
{
fprintf(f, "%d\n", hostPtr[i]);
}
fclose(f);
free(hostPtr);
}
/**
* Write a CSR formatted matrix into an ASCII file with MTX format
*/
int writeCSRMatrixToMTX(int Ms, int Ns1, int newNNZ, int* csrRowDevicePtrSmallB, int* csrColDevicePtrSmallB,
float* csrValDevicePtrSmallB, cusparseHandle_t handle)
{
int* cooRowDevicePtrSmallB = 0;
cudaMalloc((void **)&cooRowDevicePtrSmallB,newNNZ*sizeof(cooRowDevicePtrSmallB[0]));
cusparseStatus_t status = cusparseXcsr2coo(handle, csrRowDevicePtrSmallB, newNNZ, Ms, cooRowDevicePtrSmallB,CUSPARSE_INDEX_BASE_ZERO);
if (status != CUSPARSE_STATUS_SUCCESS) {
printf("CSR 2 COO failed\n");
return 1;
}
int* cooRowHostPtrSmallB = (int*)malloc(newNNZ*sizeof(cooRowHostPtrSmallB[0]));
cudaMemcpy(cooRowHostPtrSmallB,cooRowDevicePtrSmallB,newNNZ*sizeof(cooRowHostPtrSmallB[0]),cudaMemcpyDeviceToHost);
int* cooColHostPtrSmallB = (int*)malloc(newNNZ*sizeof(cooColHostPtrSmallB[0]));
cudaMemcpy(cooColHostPtrSmallB,csrColDevicePtrSmallB,newNNZ*sizeof(cooColHostPtrSmallB[0]),cudaMemcpyDeviceToHost);
float* cooValHostPtrSmallB = (float*)malloc(newNNZ*sizeof(cooValHostPtrSmallB[0]));