5 #include PROTEUS_LAPACK_H
15 void jacobi_NR_solve(SuperMatrix *A,
double* M,
double*
R,
int *node_order,
double* dX)
36 NRformat *AStore = (NRformat*) A->Store;
40 nzval = (
double*)AStore->nzval;
41 colind = AStore->colind;
42 rowptr = AStore->rowptr;
43 memset(dX,0,
sizeof(
double)*N);
46 int cnode = node_order[i];
50 double val =
R[cnode];
51 for (j=rowptr[cnode]; j<rowptr[cnode+1]; j++)
54 if (colind[j] == cnode)
57 if (fabs(nzval[j]) >= tol)
62 dX[cnode] =
w*val/diag;
65 printf(
"D[%d] = %12.5e \n",cnode,diag);
66 ABORT(
"Diagonal element is 0 within tol in Jacobi or is not in sparse matrix.");
81 NRformat *AStore = (NRformat*) A->Store;
85 nzval = (
double*)AStore->nzval;
86 colind = AStore->colind;
87 rowptr = AStore->rowptr;
94 for (j=rowptr[i]; j<rowptr[i+1]; j++)
100 if (fabs(nzval[j]) >= tol)
108 printf(
"D[%d] = %12.5e \n",i,diag);
109 ABORT(
"Diagonal element is 0 within tol in Gauss-Seidel or is not in sparse matrix.");
123 NRformat *AStore = (NRformat*) A->Store;
127 nzval = (
double*)AStore->nzval;
128 colind = AStore->colind;
129 rowptr = AStore->rowptr;
130 memset(dX,0,
sizeof(
double)*N);
133 int cnode = node_order[i];
135 double val =
R[cnode];
136 for (j=rowptr[cnode]; j<rowptr[cnode+1]; j++)
137 val -= nzval[j]*dX[colind[j]];
138 dX[cnode] = M[cnode]*val;
152 NRformat *AStore = (NRformat*) A->Store;
156 nzval = (
double*)AStore->nzval;
157 colind = AStore->colind;
158 rowptr = AStore->rowptr;
159 memset(dX,0,
sizeof(
double)*N);
162 int cnode = node_order[i];
166 double val =
R[cnode];
168 for (j=rowptr[cnode]; j<rowptr[cnode+1]; j++)
171 if (colind[j] == cnode)
174 if (fabs(nzval[j]) >= tol)
177 val -= nzval[j]*dX[colind[j]];
180 dX[cnode] =
w*val/diag;
183 printf(
"D[%d] = %12.5e \n",cnode,diag);
184 ABORT(
"Diagonal element is 0 within tol in Gauss-Seidel or is not in sparse matrix.");
190 int** subdomain_dim_p,
192 double*** subdomain_L_p,
193 double*** subdomain_R_p,
194 double*** subdomain_dX_p,
195 PROTEUS_LAPACK_INTEGER*** subdomain_pivots_p)
200 double** subdomain_L;
201 double** subdomain_R;
202 double** subdomain_dX;
203 PROTEUS_LAPACK_INTEGER** subdomain_pivots;
204 NRformat* ANR = (NRformat*)A->Store;
206 *subdomain_dim_p = (
int*)malloc(N*
sizeof(
int));
207 *subdomain_pivots_p = (PROTEUS_LAPACK_INTEGER**)malloc(N*
sizeof(PROTEUS_LAPACK_INTEGER*));
208 *l2g_L_p = (
int**)malloc(N*
sizeof(
int*));
209 *subdomain_R_p = (
double**)malloc(N*
sizeof(
double*));
210 *subdomain_dX_p = (
double**)malloc(N*
sizeof(
double*));
211 *subdomain_L_p = (
double**)malloc(N*
sizeof(
double*));
212 if ( (*subdomain_dim_p == NULL) ||
213 (*l2g_L_p == NULL) ||
214 (*subdomain_R_p == NULL) ||
215 (*subdomain_dX_p == NULL) ||
216 (*subdomain_L_p == NULL) ||
217 (*subdomain_pivots_p == NULL) )
221 subdomain_dim = *subdomain_dim_p;
222 subdomain_pivots = *subdomain_pivots_p;
224 subdomain_R = *subdomain_R_p;
225 subdomain_dX =*subdomain_dX_p;
226 subdomain_L =*subdomain_L_p;
230 subdomain_dim[i] = ANR->rowptr[i+1]-ANR->rowptr[i];
231 subdomain_pivots[i] = (PROTEUS_LAPACK_INTEGER*)malloc(subdomain_dim[i]*
sizeof(PROTEUS_LAPACK_INTEGER));
232 l2g_L[i] = (
int*)malloc(subdomain_dim[i]*subdomain_dim[i]*
sizeof(
int));
233 subdomain_R[i] = (
double*)malloc(subdomain_dim[i]*
sizeof(
double));
234 subdomain_dX[i] = (
double*)malloc(subdomain_dim[i]*
sizeof(
double));
235 subdomain_L[i] = (
double*)malloc(subdomain_dim[i]*subdomain_dim[i]*
sizeof(
double));
236 if ((l2g_L[i] == NULL) ||
237 (subdomain_R[i] == NULL) ||
238 (subdomain_dX[i] == NULL) ||
239 (subdomain_L[i] == NULL))
248 for (kk=0; kk < subdomain_dim[i]; kk++)
250 k = ANR->colind[ANR->rowptr[i] + kk];
251 for (jj=0;jj<subdomain_dim[i]; jj++)
253 l2g_L[i][kk*subdomain_dim[i]+jj] = -1;
254 for (j=ANR->rowptr[k];j<ANR->rowptr[k+1];j++)
256 if (ANR->colind[j] == ANR->colind[ANR->rowptr[i] + jj])
258 l2g_L[i][kk*subdomain_dim[i]+jj] = j;
271 double** subdomain_L,
272 double** subdomain_R,
273 double** subdomain_dX,
274 PROTEUS_LAPACK_INTEGER** subdomain_pivots)
280 free(subdomain_pivots[i]);
282 free(subdomain_R[i]);
283 free(subdomain_dX[i]);
284 free(subdomain_L[i]);
286 free(subdomain_pivots);
297 PROTEUS_LAPACK_INTEGER** subdomainPivots)
302 NRformat *ANR = (NRformat*) A->Store;
303 double *nzval = (
double*) ANR->nzval;
304 for (i=0; i<A->nrow; i++)
307 for (ii=0;ii<subdomain_dim[i];ii++)
308 for (jj=0;jj<subdomain_dim[i];jj++)
310 int index = ii*subdomain_dim[i] + jj;
311 if (l2g_L[i][index] != -1)
313 subdomainL[i][ii + jj*subdomain_dim[i]] =
314 nzval[l2g_L[i][index]];
317 subdomainL[i][ii+jj*subdomain_dim[i]] = 0.0;
319 PROTEUS_LAPACK_INTEGER La_N=((PROTEUS_LAPACK_INTEGER)subdomain_dim[i]),INFO=0;
320 dgetrf_(&La_N,&La_N,subdomainL[i],&La_N,subdomainPivots[i],&INFO);
332 double** subdomain_dX,
334 PROTEUS_LAPACK_INTEGER** subdomainPivots)
343 NRformat *AStore = (NRformat*) A->Store;
346 PROTEUS_LAPACK_INTEGER La_N,INFO=0,NRHS=1;
350 nzval = (
double*)AStore->nzval;
351 colind = AStore->colind;
352 rowptr = AStore->rowptr;
353 memset(dX,0,
sizeof(
double)*N);
356 int cnode = node_order[i];
359 for (jj = 0;jj<subdomain_dim[cnode];jj++)
361 subdomainR[cnode][jj] =
R[colind[rowptr[cnode] + jj]];
363 for (ii=0;ii<subdomain_dim[cnode];ii++)
365 k = colind[rowptr[cnode]+ii];
366 for (j=rowptr[k];j<rowptr[k+1];j++)
367 subdomainR[cnode][ii] -= nzval[j]*dX[colind[j]];
370 for (jj = 0;jj<subdomain_dim[cnode];jj++)
372 subdomain_dX[cnode][jj] = subdomainR[cnode][jj];
375 La_N = (PROTEUS_LAPACK_INTEGER) subdomain_dim[cnode];
381 subdomainPivots[cnode],
386 for (jj = 0;jj<subdomain_dim[cnode];jj++)
387 dX[colind[rowptr[cnode] + jj]] +=
w*subdomain_dX[cnode][jj];
393 int** subdomain_dim_p,
395 double*** subdomain_L_p,
396 double*** subdomain_R_p,
397 double*** subdomain_dX_p,
398 PROTEUS_LAPACK_INTEGER*** subdomain_pivots_p,
399 PROTEUS_LAPACK_INTEGER*** subdomain_col_pivots_p)
404 double** subdomain_L;
405 double** subdomain_R;
406 double** subdomain_dX;
407 PROTEUS_LAPACK_INTEGER** subdomain_pivots;
408 PROTEUS_LAPACK_INTEGER** subdomain_col_pivots;
409 NRformat* ANR = (NRformat*)A->Store;
412 int local_nonzero_entries = 0;
413 int n_unique_local_dofs=0;
415 int *unique_local_dofs;
417 assert((A->nrow % rowBlocks) == 0);
418 N = A->nrow/rowBlocks;
420 for (i=0; i < N; i++)
422 local_nonzero_entries = ANR->rowptr[(i+1)*rowBlocks]-ANR->rowptr[i*rowBlocks];
423 if (local_nonzero_entries > max_local_dim)
424 max_local_dim = local_nonzero_entries;
426 unique_local_dofs = (
int*) malloc(max_local_dim*
sizeof(
int));
427 for (i=0; i < max_local_dim; i++)
428 unique_local_dofs[i]=-1;
430 *subdomain_dim_p = (
int*)malloc(N*
sizeof(
int));
431 *subdomain_pivots_p = (PROTEUS_LAPACK_INTEGER**)malloc(N*
sizeof(PROTEUS_LAPACK_INTEGER*));
432 *subdomain_col_pivots_p = (PROTEUS_LAPACK_INTEGER**)malloc(N*
sizeof(PROTEUS_LAPACK_INTEGER*));
433 *l2g_L_p = (
int**)malloc(N*
sizeof(
int*));
434 *subdomain_R_p = (
double**)malloc(N*
sizeof(
double*));
435 *subdomain_dX_p = (
double**)malloc(N*
sizeof(
double*));
436 *subdomain_L_p = (
double**)malloc(N*
sizeof(
double*));
437 if ( (*subdomain_dim_p == NULL) ||
438 (*l2g_L_p == NULL) ||
439 (*subdomain_R_p == NULL) ||
440 (*subdomain_dX_p == NULL) ||
441 (*subdomain_L_p == NULL) ||
442 (*subdomain_pivots_p == NULL) ||
443 (*subdomain_col_pivots_p == NULL))
446 free(unique_local_dofs);
449 subdomain_dim = *subdomain_dim_p;
450 subdomain_pivots = *subdomain_pivots_p;
451 subdomain_col_pivots = *subdomain_col_pivots_p;
453 subdomain_R = *subdomain_R_p;
454 subdomain_dX =*subdomain_dX_p;
455 subdomain_L =*subdomain_L_p;
461 local_nonzero_entries = ANR->rowptr[(i+1)*rowBlocks]-ANR->rowptr[i*rowBlocks];
462 for (jj=0; jj < local_nonzero_entries; jj++)
463 unique_local_dofs[jj] = -12345;
464 n_unique_local_dofs = 0;
465 for (jj=0; jj < local_nonzero_entries; jj++)
468 j = ANR->colind[ANR->rowptr[i*rowBlocks]+jj];
471 for (kk=0; kk < n_unique_local_dofs; kk++)
473 if (unique_local_dofs[kk] == j)
481 unique_local_dofs[n_unique_local_dofs] = j;
482 n_unique_local_dofs++;
484 assert(n_unique_local_dofs <= local_nonzero_entries);
487 subdomain_dim[i] = n_unique_local_dofs;
488 subdomain_pivots[i] = (PROTEUS_LAPACK_INTEGER*)malloc(subdomain_dim[i]*
sizeof(PROTEUS_LAPACK_INTEGER));
489 subdomain_col_pivots[i] = (PROTEUS_LAPACK_INTEGER*)malloc(subdomain_dim[i]*
sizeof(PROTEUS_LAPACK_INTEGER));
490 l2g_L[i] = (
int*)malloc(subdomain_dim[i]*subdomain_dim[i]*
sizeof(
int));
491 subdomain_R[i] = (
double*)malloc(subdomain_dim[i]*
sizeof(
double));
492 subdomain_dX[i] = (
double*)malloc(subdomain_dim[i]*
sizeof(
double));
493 subdomain_L[i] = (
double*)malloc(subdomain_dim[i]*subdomain_dim[i]*
sizeof(
double));
494 if ((l2g_L[i] == NULL) ||
495 (subdomain_R[i] == NULL) ||
496 (subdomain_dX[i] == NULL) ||
497 (subdomain_L[i] == NULL))
500 free(unique_local_dofs);
507 int j, jj, k, kk, found_j;
509 local_nonzero_entries = ANR->rowptr[(i+1)*rowBlocks]-ANR->rowptr[i*rowBlocks];
510 for (jj=0; jj < local_nonzero_entries; jj++)
511 unique_local_dofs[jj] = -12345;
512 n_unique_local_dofs = 0;
513 for (jj=0; jj < local_nonzero_entries; jj++)
516 j = ANR->colind[ANR->rowptr[i*rowBlocks]+jj];
519 for (kk=0; kk < n_unique_local_dofs; kk++)
521 if (unique_local_dofs[kk] == j)
529 unique_local_dofs[n_unique_local_dofs] = j;
530 n_unique_local_dofs++;
532 assert(n_unique_local_dofs <= local_nonzero_entries);
534 assert(subdomain_dim[i] == n_unique_local_dofs);
535 for (kk=0; kk < subdomain_dim[i]; kk++)
537 k = unique_local_dofs[kk];
538 for (jj=0;jj<subdomain_dim[i]; jj++)
540 l2g_L[i][kk*subdomain_dim[i]+jj] = -1;
541 for (j=ANR->rowptr[k];j<ANR->rowptr[k+1];j++)
543 if (ANR->colind[j] == unique_local_dofs[jj])
545 l2g_L[i][kk*subdomain_dim[i]+jj] = j;
553 printf(
"BASM_init N= %d rowBlocks=%d \n",N,rowBlocks);
554 for (i=0; i < N; i++)
557 printf(
"l2g_L[%d] dim= %d\n",i,subdomain_dim[i]);
558 for (kk=0; kk < subdomain_dim[i]; kk++)
561 for (jj=0; jj < subdomain_dim[i]; jj++)
563 printf(
" %d ",l2g_L[i][kk*subdomain_dim[i] + jj]);
569 free(unique_local_dofs);
577 double** subdomain_L,
578 double** subdomain_R,
579 double** subdomain_dX,
580 PROTEUS_LAPACK_INTEGER** subdomain_pivots,
581 PROTEUS_LAPACK_INTEGER** subdomain_col_pivots)
587 free(subdomain_pivots[i]);
588 free(subdomain_col_pivots[i]);
590 free(subdomain_R[i]);
591 free(subdomain_dX[i]);
592 free(subdomain_L[i]);
594 free(subdomain_pivots);
595 free(subdomain_col_pivots);
608 PROTEUS_LAPACK_INTEGER** subdomainPivots,
609 PROTEUS_LAPACK_INTEGER** subdomainColPivots)
614 NRformat *ANR = (NRformat*) A->Store;
615 double *nzval = (
double*) ANR->nzval;
616 assert (N*rowBlocks == A->nrow);
621 for (ii=0; ii<subdomain_dim[i]; ii++)
623 subdomainPivots[i][ii] = 0; subdomainColPivots[i][ii] = 0;
624 for (jj=0; jj<subdomain_dim[i]; jj++)
625 subdomainL[i][jj*subdomain_dim[i]+ii] = 0.0;
631 for (ii=0;ii<subdomain_dim[i];ii++)
632 for (jj=0;jj<subdomain_dim[i];jj++)
634 int index = ii*subdomain_dim[i] + jj;
635 if (l2g_L[i][index] != -1)
637 subdomainL[i][ii + jj*subdomain_dim[i]] =
638 nzval[l2g_L[i][index]];
641 subdomainL[i][ii+jj*subdomain_dim[i]] = 0.0;
643 PROTEUS_LAPACK_INTEGER La_N=((PROTEUS_LAPACK_INTEGER)subdomain_dim[i]),INFO=0;
644 dgetc2_(&La_N,subdomainL[i],&La_N,subdomainPivots[i],subdomainColPivots[i],&INFO);
649 printf(
"basm prepare jac dgetc2 INFO=%d nN=%d \n",(
int)(INFO),subdomain_dim[i]);
650 for (ii=0;ii<subdomain_dim[i];ii++)
652 for(jj=0;jj<subdomain_dim[i];jj++)
655 printf(
"%12.5e \t",subdomainL[i][ii*subdomain_dim[i] + jj]);
659 for (ii=0; ii<subdomain_dim[i];ii++)
661 printf(
"colPivot[%d]= %dl \t",ii,subdomainColPivots[i][ii]);
662 printf(
"pivot[%d]= %dl \t",ii,subdomainPivots[i][ii]);
678 double** subdomain_dX,
680 PROTEUS_LAPACK_INTEGER** subdomainPivots,
681 PROTEUS_LAPACK_INTEGER** subdomainColPivots)
690 NRformat *AStore = (NRformat*) A->Store;
691 assert(N*rowBlocks == A->nrow);
693 PROTEUS_LAPACK_INTEGER La_N;
696 int local_nonzero_entries = 0;
697 int n_unique_local_dofs=0;
699 int *unique_local_dofs;
703 for (i=0; i < N; i++)
705 local_nonzero_entries = AStore->rowptr[(i+1)*rowBlocks]-AStore->rowptr[i*rowBlocks];
706 if (local_nonzero_entries > max_local_dim)
707 max_local_dim = local_nonzero_entries;
709 unique_local_dofs = (
int*) malloc(max_local_dim*
sizeof(
int));
710 for (i=0; i < max_local_dim; i++)
711 unique_local_dofs[i]=-1;
714 nzval = (
double*)AStore->nzval;
715 colind = AStore->colind;
716 rowptr = AStore->rowptr;
717 memset(dX,0,
sizeof(
double)*N*rowBlocks);
720 int cnode = node_order[i];
721 int j, k,kk, jj, ii, found_j=-1;
723 local_nonzero_entries = AStore->rowptr[(cnode+1)*rowBlocks]-AStore->rowptr[cnode*rowBlocks];
724 for (jj=0; jj < local_nonzero_entries; jj++)
725 unique_local_dofs[jj] = -12345;
726 n_unique_local_dofs = 0;
727 for (jj=0; jj < local_nonzero_entries; jj++)
730 j = AStore->colind[AStore->rowptr[cnode*rowBlocks]+jj];
733 for (kk=0; kk < n_unique_local_dofs; kk++)
735 if (unique_local_dofs[kk] == j)
743 unique_local_dofs[n_unique_local_dofs] = j;
744 n_unique_local_dofs++;
746 assert(n_unique_local_dofs <= local_nonzero_entries);
748 assert(subdomain_dim[cnode] == n_unique_local_dofs);
750 for (jj = 0;jj<subdomain_dim[cnode];jj++)
752 subdomainR[cnode][jj] =
R[unique_local_dofs[jj]];
754 for (ii=0;ii<subdomain_dim[cnode];ii++)
756 k = unique_local_dofs[ii];
757 for (j=rowptr[k];j<rowptr[k+1];j++)
758 subdomainR[cnode][ii] -= nzval[j]*dX[colind[j]];
761 for (jj = 0;jj<subdomain_dim[cnode];jj++)
763 subdomain_dX[cnode][jj] = subdomainR[cnode][jj];
766 La_N = (PROTEUS_LAPACK_INTEGER) subdomain_dim[cnode];
768 if (La_N == 1 && fabs(subdomainL[cnode][0]) <= 1.0e-64)
769 subdomain_dX[cnode][0] = 0.0;
776 subdomainPivots[cnode],
777 subdomainColPivots[cnode],
781 for (jj = 0;jj<subdomain_dim[cnode];jj++)
784 dX[unique_local_dofs[jj]] +=
w*subdomain_dX[cnode][jj];