/*
This code is a fusion of the modified version of the standard code from
the Adafruit LSM9DS1 library and the version of Magneto authored by S. James Remington
magneto 1.4 magnetometer/accelerometer calibration code
from http://sailboatinstruments.blogspot.com/2011/08/improved-magnetometer-calibration.html
tested and works with Code::Blocks 10.05 through 20.03

Example I2C hardware setup with the TinyS3:
LSM9DS1 -------- ESP32
3V ------------- 3.3V
G -------------- GND or -
SCL ------------ 9
SDA ------------ 8
*/

#include <Arduino.h>
#include <Wire.h>
#include <SPI.h>
#include <Adafruit_LSM9DS1.h>
#include <Adafruit_Sensor.h>

// C-Libraries needed for Magneto
#include <stdio.h>
#include <math.h>
#include <malloc.h>
#include <string.h>
#include <float.h>

// I2C!
Adafruit_LSM9DS1 lsm = Adafruit_LSM9DS1();

#define LSM9DS1_SCK A5
#define LSM9DS1_MISO 12
#define LSM9DS1_MOSI A4
#define LSM9DS1_XGCS 6
#define LSM9DS1_MCS 5


#define MEASUREMENT_INTERVAL 100 // 100 ms between measurements
#define NUM_MEASUREMENTS 300 //
int numMeasurements = 0; 
double sensorMagData[NUM_MEASUREMENTS][3]; // Static array to store the magnetometer measurements
double sensorAccData[NUM_MEASUREMENTS][3]; // Static array to store the accelerometer measurements

void setupSensor()
{
    // IMPORTANT: Accuracy is inversely proportional to the sensor range!
    // Ringinator works with 
    // 1.) Set the accelerometer range
    lsm.setupAccel(lsm.LSM9DS1_ACCELRANGE_2G, lsm.LSM9DS1_ACCELDATARATE_10HZ);
    //lsm.setupAccel(lsm.LSM9DS1_ACCELRANGE_4G, lsm.LSM9DS1_ACCELDATARATE_119HZ);
    //lsm.setupAccel(lsm.LSM9DS1_ACCELRANGE_8G, lsm.LSM9DS1_ACCELDATARATE_476HZ);
    //lsm.setupAccel(lsm.LSM9DS1_ACCELRANGE_16G, lsm.LSM9DS1_ACCELDATARATE_952HZ);

    // 2.) Set the magnetometer sensitivity
    lsm.setupMag(lsm.LSM9DS1_MAGGAIN_4GAUSS);
    //lsm.setupMag(lsm.LSM9DS1_MAGGAIN_8GAUSS);
    //lsm.setupMag(lsm.LSM9DS1_MAGGAIN_12GAUSS);
    //lsm.setupMag(lsm.LSM9DS1_MAGGAIN_16GAUSS);

    // 3.) Setup the gyroscope
    lsm.setupGyro(lsm.LSM9DS1_GYROSCALE_245DPS);
    //lsm.setupGyro(lsm.LSM9DS1_GYROSCALE_500DPS);
    //lsm.setupGyro(lsm.LSM9DS1_GYROSCALE_2000DPS);
}


// Forward declarations of mymathlib.com routines
void Multiply_Self_Transpose(double*, double*, int, int);
void Get_Submatrix(double*, int, int, double*, int, int, int);
int Choleski_LU_Decomposition(double*, int);
int Choleski_LU_Inverse(double*, int);
void Multiply_Matrices(double*, double*, int, int, double*, int);
void Identity_Matrix(double*, int);

int Hessenberg_Form_Elementary(double*, double*, int);
static void Hessenberg_Elementary_Transform(double*, double*, int[], int);

void Copy_Vector(double*, double*, int);

int QR_Hessenberg_Matrix(double*, double*, double[], double[], int, int);
static void One_Real_Eigenvalue(double[], double[], double[], int, double);
static void Two_Eigenvalues(double*, double*, double[], double[], int, int, double);
static void Update_Row(double*, double, double, int, int);
static void Update_Column(double*, double, double, int, int);
static void Update_Transformation(double*, double, double, int, int);
static void Double_QR_Iteration(double*, double*, int, int, int, double*, int);
static void Product_and_Sum_of_Shifts(double*, int, int, double*, double*, double*, int);
static int Two_Consecutive_Small_Subdiagonal(double*, int, int, int, double, double);
static void Double_QR_Step(double*, int, int, int, double, double, double*, int);
static void BackSubstitution(double*, double[], double[], int);
static void BackSubstitute_Real_Vector(double*, double[], double[], int, double, int);
static void BackSubstitute_Complex_Vector(double*, double[], double[], int, double, int);
static void Calculate_Eigenvectors(double*, double*, double[], double[], int);
static void Complex_Division(double, double, double, double, double*, double*);

void Transpose_Square_Matrix(double*, int);

void setup() 
{
    Serial.begin(115200);

    while (!Serial) {
    delay(1); // will pause Zero, Leonardo, etc until serial console opens
    }

    Serial.println("LSM9DS1 data read demo");

    // Try to initialise and warn if we couldn't detect the chip
    if (!lsm.begin())
    {
    Serial.println("Oops ... unable to initialize the LSM9DS1. Check your wiring!");
    while (1);
    }
    Serial.println("Found LSM9DS1 9DOF");

    // helper to just set the default scaling we want, see above!
    setupSensor();
    
    delay(2000); // 2 Second delay because the TinyS3 has to be reset and the button press shakes the device
    Serial.println("Collecting gyro data, hold still");
    // get gyro offset
    float gxa = 0, gya = 0, gza = 0; //change to double if number is big
    for (int i = 0; i < 300; i++) {
        lsm.read(); /* ask it to read in the data */ 
        sensors_event_t a, m, g, temp;
        lsm.getEvent(&a, &m, &g, &temp); 
        gxa += g.gyro.x;
        gya += g.gyro.y;
        gza += g.gyro.z;
    }
    Serial.println(F("gyro offsets"));
    Serial.print(gxa / 300);
    Serial.print(", ");
    Serial.print(gya / 300);
    Serial.print(", ");
    Serial.println(gza / 300);
    Serial.println();
    
    Serial.println(F("rotate slowly and carefully in 3D"));
    delay(3000);
    Serial.println("starting");


}

void loop() 
{

    if (numMeasurements < NUM_MEASUREMENTS) {
        lsm.read();  /* ask it to read in the data */ 

        /* Get a new sensor event */ 
        sensors_event_t a, m, g, temp;

        lsm.getEvent(&a, &m, &g, &temp);
        sensorAccData[numMeasurements][0] = a.acceleration.x;
        sensorAccData[numMeasurements][1] = a.acceleration.y;
        sensorAccData[numMeasurements][2] = a.acceleration.z;
        sensorMagData[numMeasurements][0] = m.magnetic.x;
        sensorMagData[numMeasurements][1] = m.magnetic.y;
        sensorMagData[numMeasurements][2] = m.magnetic.z;
        numMeasurements++;
        Serial.print("Accel X: "); Serial.print(a.acceleration.x); Serial.print(" m/s^2");
        Serial.print("\tY: "); Serial.print(a.acceleration.y);     Serial.print(" m/s^2 ");
        Serial.print("\tZ: "); Serial.print(a.acceleration.z);     Serial.println(" m/s^2 ");
        Serial.print("Mag X: "); Serial.print(m.magnetic.x);   Serial.print(" uT");
        Serial.print("\tY: "); Serial.print(m.magnetic.y);     Serial.print(" uT");
        Serial.print("\tZ: "); Serial.print(m.magnetic.z);     Serial.println(" uT");
    } else {
        Serial.println("Measurements done. Please wait for the calibrator to setup!");
        // MKWii Online music playing in the background...
        delay(2000);
        for (int sensor=0; sensor<2; sensor++) {
            if (sensor==0) {
                Serial.println("Calibration parameters of the magnetometer: ");
            } else {
                Serial.println("Calibration parameters of the accelerometer: ");
            }
            double *D, *S, *C, *S11, *S12, *S12t, *S22, *S22_1, *S22a, *S22b, *SS, *E, *d, *U, *SSS;
            double *eigen_real, *eigen_imag, *v1, *v2, *v, *Q, *Q_1, *B, *QB, J, hmb, *SSSS;
            int *p;
            int i, j, index;
            int ngood = NUM_MEASUREMENTS;
            int nlines = 0;
            double maxval, norm, btqb, *eigen_real3, *eigen_imag3, *Dz, *vdz, *SQ, *A_1, hm, norm1, norm2, norm3;
            double x, y, z, x2, nxsrej, xs, xave;
            double *raw; //raw obs

            // calculate mean (norm) and standard deviation for possible outlier rejection

            xs=0;

            xave=0;

            for(i = 0; i < NUM_MEASUREMENTS; i++) {
                if (sensor==0) {
                    x = sensorMagData[i][0];
                    y = sensorMagData[i][1];
                    z = sensorMagData[i][2];
                } else {
                    x = sensorAccData[i][0];
                    y = sensorAccData[i][1];
                    z = sensorAccData[i][2];
                }
                x2 = x*x + y*y + z*z;
                xs += x2;
                xave += sqrt(x2);
            }
            xave = xave/NUM_MEASUREMENTS; //mean vector length
            xs = sqrt(xs/NUM_MEASUREMENTS -(xave*xave)); //std. dev.

            // summarize statistics, give user opportunity to reject outlying measurements
            
            Serial.print("\nAverage magnitude (default Hm) and sigma of 300 vectors = ");
            Serial.print(xave, 1);
            Serial.println(xs, 1);

            nxsrej = 0; // You can set the rejection criteria here, but the LSM9DS1 is pretty solid, so 0 is a good choice
            if (nxsrej > 0) {
            Serial.print("Rejecting measurements if abs(vector_length-average)/(std. dev.) > ");
            Serial.println(nxsrej, 1);

            // outlier rejection, count remaining lines

            for(i = 0; i < NUM_MEASUREMENTS; i++) {
                if (sensor==0) {
                    x = sensorMagData[i][0];
                    y = sensorMagData[i][1];
                    z = sensorMagData[i][2];
                } else {
                    x = sensorAccData[i][0];
                    y = sensorAccData[i][1];
                    z = sensorAccData[i][2];
                }
                x2 = sqrt(x*x + y*y + z*z);  //vector length
                x2 = fabs(x2 - xave)/xs; //standard deviations from mean
                if (x2 > nxsrej) {
                    ngood--;
                    Serial.print("We reject: ");
                    Serial.print(x,2);
                    Serial.print(y,2);
                    Serial.println(z,2);
                }
            }
            Serial.print("We reject: ");
            Serial.print(NUM_MEASUREMENTS-ngood);
            Serial.println(" measurements!");
            }

            // allocate array space for accepted measurements
            D = (double*)malloc(10 * ngood * sizeof(double));
            raw = (double*)malloc(3 * ngood * sizeof(double));

            j = 0;  //array index for good measurements

            for(i = 0; i < NUM_MEASUREMENTS; i++) {
                if (sensor==0) {
                    x = sensorMagData[i][0];
                    y = sensorMagData[i][1];
                    z = sensorMagData[i][2];
                } else {
                    x = sensorAccData[i][0];
                    y = sensorAccData[i][1];
                    z = sensorAccData[i][2];
                }
                x2 = sqrt(x*x + y*y + z*z);  //vector length
                x2 = fabs(x2 - xave)/xs; //standard deviation from mean
                if ((nxsrej == 0) || (x2 <= nxsrej)) {

                    raw[3*j]   = x;
                    raw[3*j+1] = y;
                    raw[3*j+2] = z;
                    D[j]         = x * x;
                    D[ngood+j]   = y * y;
                    D[ngood*2+j] = z * z;
                    D[ngood*3+j] = 2.0 * y * z;
                    D[ngood*4+j] = 2.0 * x * z;
                    D[ngood*5+j] = 2.0 * x * y;
                    D[ngood*6+j] = 2.0 * x;
                    D[ngood*7+j] = 2.0 * y;
                    D[ngood*8+j] = 2.0 * z;
                    D[ngood*9+j] = 1.0;
                    j++; //count good measurements
                }
            }

            if (j != ngood) {
                Serial.println("Internal indexing error!");
                while(1); //HALT
            }

            nlines = ngood; //number to process

            hm=xave; // CHANGE IF YOU DISAGREE
            Serial.print("Hm is set to: ");
            Serial.println(hm,2);

            // allocate memory for matrix S
            S = (double*)malloc(10 * 10 * sizeof(double));
            Multiply_Self_Transpose(S, D, 10, nlines);

            // Create pre-inverted constraint matrix C
            C = (double*)malloc(6 * 6 * sizeof(double));
            C[0] = 0.0; C[1] = 0.5; C[2] = 0.5; C[3] = 0.0;  C[4] = 0.0;  C[5] = 0.0;
            C[6] = 0.5;  C[7] = 0.0; C[8] = 0.5; C[9] = 0.0;  C[10] = 0.0;  C[11] = 0.0;
            C[12] = 0.5;  C[13] = 0.5; C[14] = 0.0; C[15] = 0.0;  C[16] = 0.0;  C[17] = 0.0;
            C[18] = 0.0;  C[19] = 0.0;  C[20] = 0.0;  C[21] = -0.25; C[22] = 0.0;  C[23] = 0.0;
            C[24] = 0.0;  C[25] = 0.0; C[26] = 0.0;  C[27] = 0.0;  C[28] = -0.25; C[29] = 0.0;
            C[30] = 0.0;  C[31] = 0.0; C[32] = 0.0;  C[33] = 0.0;  C[34] = 0.0;  C[35] = -0.25;

            S11 = (double*)malloc(6 * 6 * sizeof(double));
            Get_Submatrix(S11, 6, 6, S, 10, 0, 0);
            S12 = (double*)malloc(6 * 4 * sizeof(double));
            Get_Submatrix(S12, 6, 4, S, 10, 0, 6);
            S12t = (double*)malloc(4 * 6 * sizeof(double));
            Get_Submatrix(S12t, 4, 6, S, 10, 6, 0);
            S22 = (double*)malloc(4 * 4 * sizeof(double));
            Get_Submatrix(S22, 4, 4, S, 10, 6, 6);

            S22_1 = (double*)malloc(4 * 4 * sizeof(double));
            for(i = 0; i < 16; i++)
            S22_1[i] = S22[i];
            Choleski_LU_Decomposition(S22_1, 4);
            Choleski_LU_Inverse(S22_1, 4);

            // Calculate S22a = S22_1 * S12t   4*6 = 4x4 * 4x6   C = AB
            S22a = (double*)malloc(4 * 6 * sizeof(double));
            Multiply_Matrices(S22a, S22_1, 4, 4, S12t, 6);

            // Then calculate S22b = S12 * S22a      ( 6x6 = 6x4 * 4x6)
            S22b = (double*)malloc(6 * 6 * sizeof(double));
            Multiply_Matrices(S22b, S12, 6, 4, S22a, 6);

            // Calculate SS = S11 - S22b
            SS = (double*)malloc(6 * 6 * sizeof(double));
            for(i = 0; i < 36; i++)
            SS[i] = S11[i] - S22b[i];
            E = (double*)malloc(6 * 6 * sizeof(double));
            Multiply_Matrices(E, C, 6, 6, SS, 6);

            SSS = (double*)malloc(6 * 6 * sizeof(double));
            Hessenberg_Form_Elementary(E, SSS, 6);

            eigen_real = (double*)malloc(6 * sizeof(double));
            eigen_imag = (double*)malloc(6 * sizeof(double));

            QR_Hessenberg_Matrix(E, SSS, eigen_real, eigen_imag, 6, 100);

            index = 0;
            maxval = eigen_real[0];
            for(i = 1; i < 6; i++)
            {
            if(eigen_real[i] > maxval)
            {
                maxval = eigen_real[i];
                index = i;
            }
            }

            v1 = (double*)malloc(6 * sizeof(double));

            v1[0] = SSS[index];
            v1[1] = SSS[index+6];
            v1[2] = SSS[index+12];
            v1[3] = SSS[index+18];
            v1[4] = SSS[index+24];
            v1[5] = SSS[index+30];

            // normalize v1
            norm = sqrt(v1[0] * v1[0] + v1[1] * v1[1] + v1[2] * v1[2] + v1[3] * v1[3] + v1[4] * v1[4] + v1[5] * v1[5]);
            v1[0] /= norm;
            v1[1] /= norm;
            v1[2] /= norm;
            v1[3] /= norm;
            v1[4] /= norm;
            v1[5] /= norm;

            if(v1[0] < 0.0)
            {
            v1[0] = -v1[0];
            v1[1] = -v1[1];
            v1[2] = -v1[2];
            v1[3] = -v1[3];
            v1[4] = -v1[4];
            v1[5] = -v1[5];
            }

            // Calculate v2 = S22a * v1      ( 4x1 = 4x6 * 6x1)
            v2 = (double*)malloc(4 * sizeof(double));
            Multiply_Matrices(v2, S22a, 4, 6, v1, 1);

            v = (double*)malloc(10 * sizeof(double));

            v[0] = v1[0];
            v[1] = v1[1];
            v[2] = v1[2];
            v[3] = v1[3];
            v[4] = v1[4];
            v[5] = v1[5];
            v[6] = -v2[0];
            v[7] = -v2[1];
            v[8] = -v2[2];
            v[9] = -v2[3];

            Q = (double*)malloc(3 * 3 * sizeof(double));

            Q[0] = v[0];
            Q[1] = v[5];
            Q[2] = v[4];
            Q[3] = v[5];
            Q[4] = v[1];
            Q[5] = v[3];
            Q[6] = v[4];
            Q[7] = v[3];
            Q[8] = v[2];

            U = (double*)malloc(3 * sizeof(double));

            U[0] = v[6];
            U[1] = v[7];
            U[2] = v[8];
            Q_1 = (double*)malloc(3 * 3 * sizeof(double));
            for(i = 0; i < 9; i++)
            Q_1[i] = Q[i];
            Choleski_LU_Decomposition(Q_1, 3);
            Choleski_LU_Inverse(Q_1, 3);

            // Calculate B = Q-1 * U   ( 3x1 = 3x3 * 3x1)
            B = (double*)malloc(3 * sizeof(double));
            Multiply_Matrices(B, Q_1, 3, 3, U, 1);
            B[0] = -B[0];     // x-axis combined bias
            B[1] = -B[1];     // y-axis combined bias
            B[2] = -B[2];     // z-axis combined bias

            // for(i = 0; i < 3; i++)
            printf("\r\nCombined bias vector B:\r\n");
            printf("%8.2lf %8.2lf %8.2lf \r\n", B[0],B[1],B[2]);

            // First calculate QB = Q * B   ( 3x1 = 3x3 * 3x1)
            QB = (double*)malloc(3 * sizeof(double));
            Multiply_Matrices(QB, Q, 3, 3, B, 1);

            // Then calculate btqb = BT * QB    ( 1x1 = 1x3 * 3x1)
            Multiply_Matrices(&btqb, B, 1, 3, QB, 1);

            // Calculate hmb = sqrt(btqb - J).
            J = v[9];
            hmb = sqrt(btqb - J);

            // Calculate SQ, the square root of matrix Q
            SSSS = (double*)malloc(3 * 3 * sizeof(double));
            Hessenberg_Form_Elementary(Q, SSSS, 3);

            eigen_real3 = (double*)malloc(3 * sizeof(double));
            eigen_imag3 = (double*)malloc(3 * sizeof(double));
            QR_Hessenberg_Matrix(Q, SSSS, eigen_real3, eigen_imag3, 3, 100);

            // normalize eigenvectors
            norm1 = sqrt(SSSS[0] * SSSS[0] + SSSS[3] * SSSS[3] + SSSS[6] * SSSS[6]);
            SSSS[0] /= norm1;
            SSSS[3] /= norm1;
            SSSS[6] /= norm1;
            norm2 = sqrt(SSSS[1] * SSSS[1] + SSSS[4] * SSSS[4] + SSSS[7] * SSSS[7]);
            SSSS[1] /= norm2;
            SSSS[4] /= norm2;
            SSSS[7] /= norm2;
            norm3 = sqrt(SSSS[2] * SSSS[2] + SSSS[5] * SSSS[5] + SSSS[8] * SSSS[8]);
            SSSS[2] /= norm3;
            SSSS[5] /= norm3;
            SSSS[8] /= norm3;

            Dz = (double*)malloc(3 * 3 * sizeof(double));
            for(i = 0; i < 9; i++)
            Dz[i] = 0.0;
            Dz[0] = sqrt(eigen_real3[0]);
            Dz[4] = sqrt(eigen_real3[1]);
            Dz[8] = sqrt(eigen_real3[2]);

            vdz = (double*)malloc(3 * 3 * sizeof(double));
            Multiply_Matrices(vdz, SSSS, 3, 3, Dz, 3);

            Transpose_Square_Matrix(SSSS, 3);

            SQ = (double*)malloc(3 * 3 * sizeof(double));
            Multiply_Matrices(SQ, vdz, 3, 3, SSSS, 3);

            // hm = 0.569;
            A_1 = (double*)malloc(3 * 3 * sizeof(double));

            for(i = 0; i < 9; i++)
            A_1[i] = SQ[i] * hm / hmb;


            Serial.print("\nCorrection matrix Ainv, using Hm=");
            Serial.print(hm, 1); // Print 'hm' with 1 decimal place
            Serial.println(":");

            for (int i = 0; i < 3; i++) {
                Serial.print(A_1[i * 3], 5); // Print A_1 values with 5 decimal places
                Serial.print(" ");
                Serial.print(A_1[i * 3 + 1], 5);
                Serial.print(" ");
                Serial.println(A_1[i * 3 + 2], 5);
            }

            Serial.println("\nWhere Hcalc = Ainv*(H-B)\n");

            Serial.println("Code initialization statements...");
            if (sensor==0) {
               Serial.print("\nfloat M_B[3] = {");
            } else {
               Serial.print("\nfloat A_B[3] = {");
            }
            
            Serial.print(B[0], 2); // Print B values with 2 decimal places
            Serial.print(",");
            Serial.print(B[1], 2);
            Serial.print(",");
            Serial.print(B[2], 2);
            Serial.println("};");


            if (sensor==0) {
               Serial.println("\nfloat M_Ainv[3][3] = {");
            } else {
               Serial.println("\nfloat A_Ainv[3][3] = {");
            }
            
            Serial.print("  {");
            Serial.print(A_1[0], 5);
            Serial.print(",");
            Serial.print(A_1[1], 5);
            Serial.print(",");
            Serial.print(A_1[2], 5);
            Serial.println("},");
            Serial.print("  {");
            Serial.print(A_1[3], 5);
            Serial.print(",");
            Serial.print(A_1[4], 5);
            Serial.print(",");
            Serial.print(A_1[5], 5);
            Serial.println("},");
            Serial.print("  {");
            Serial.print(A_1[6], 5);
            Serial.print(",");
            Serial.print(A_1[7], 5);
            Serial.print(",");
            Serial.print(A_1[8], 5);
            Serial.println("}");
            Serial.println("};");

            Serial.print("\nRMS corrected vector length ");
            Serial.println(sqrt(xs / nlines), 6); // Print RMS value with 6 decimal places

            Serial.print("\nRMS residual ");
            Serial.println(1.0 - sqrt(xs / nlines) / hm, 6); // Print RMS residual with 6 decimal places


            free(D);
            free(S);
            free(C);
            free(S11);
            free(S12);
            free(S12t);
            free(S22);
            free(S22_1);
            free(S22a);
            free(S22b);
            free(SS);
            free(E);
            free(U);
            free(SSS);
            free(eigen_real);
            free(eigen_imag);
            free(v1);
            free(v2);
            free(v);
            free(Q);
            free(Q_1);
            free(B);
            free(QB);
            free(SSSS);
            free(eigen_real3);
            free(eigen_imag3);
            free(Dz);
            free(vdz);
            free(SQ);
            free(A_1);
        }
        while(1); // HALT
    }
    delay(MEASUREMENT_INTERVAL);
}

void Copy_Vector(double *d, double *s, int n)
{
   memcpy(d, s, sizeof(double) * n);
}

void Multiply_Self_Transpose(double *C, double *A, int nrows, int ncols)
{
   int i,j,k;
   double *pA;
   double *p_A = A;
   double *pB;
   double *pCdiag = C;
   double *pC = C;
   double *pCt;

   for (i = 0; i < nrows; pCdiag += nrows + 1, p_A = pA, i++) {
      pC = pCdiag;
      pCt = pCdiag;
      pB = p_A;
      for (j = i; j < nrows; pC++, pCt += nrows, j++) {
         pA = p_A;
         *pC = 0.0;
         for (k = 0; k < ncols; k++) *pC += *(pA++) * *(pB++);
         *pCt = *pC;
      }
   }
}

#include <string.h>                                    // required for memcpy()

void Get_Submatrix(double *S, int mrows, int mcols,
                                        double *A, int ncols, int row, int col)
{
   int number_of_bytes = sizeof(double) * mcols;

   for (A += row * ncols + col; mrows > 0; A += ncols, S+= mcols, mrows--)
      memcpy(S, A, number_of_bytes);
}

int Lower_Triangular_Solve(double *L, double B[], double x[], int n);
int Lower_Triangular_Inverse(double *L, int n);
int Upper_Triangular_Solve(double *U, double B[], double x[], int n);

int Choleski_LU_Decomposition(double *A, int n)
{
   int i, k, p;
   double *p_Lk0;                   // pointer to L[k][0]
   double *p_Lkp;                   // pointer to L[k][p]
   double *p_Lkk;                   // pointer to diagonal element on row k.
   double *p_Li0;                   // pointer to L[i][0]
   double reciprocal;

   for (k = 0, p_Lk0 = A; k < n; p_Lk0 += n, k++) {

//            Update pointer to row k diagonal element.

      p_Lkk = p_Lk0 + k;

//            Calculate the difference of the diagonal element in row k
//            from the sum of squares of elements row k from column 0 to
//            column k-1.

      for (p = 0, p_Lkp = p_Lk0; p < k; p_Lkp += 1,  p++)
         *p_Lkk -= *p_Lkp * *p_Lkp;

//            If diagonal element is not positive, return the error code,
//            the matrix is not positive definite symmetric.

      if ( *p_Lkk <= 0.0 ) return -1;

//            Otherwise take the square root of the diagonal element.

      *p_Lkk = sqrt( *p_Lkk );
      reciprocal = 1.0 / *p_Lkk;

//            For rows i = k+1 to n-1, column k, calculate the difference
//            between the i,k th element and the inner product of the first
//            k-1 columns of row i and row k, then divide the difference by
//            the diagonal element in row k.
//            Store the transposed element in the upper triangular matrix.

      p_Li0 = p_Lk0 + n;
      for (i = k + 1; i < n; p_Li0 += n, i++) {
         for (p = 0; p < k; p++)
            *(p_Li0 + k) -= *(p_Li0 + p) * *(p_Lk0 + p);
         *(p_Li0 + k) *= reciprocal;
         *(p_Lk0 + i) = *(p_Li0 + k);
      }
   }
   return 0;
}

int Choleski_LU_Solve(double *LU, double B[], double x[], int n)
{

//         Solve the linear equation Ly = B for y, where L is a lower
//         triangular matrix.

   if ( Lower_Triangular_Solve(LU, B, x, n) < 0 ) return -1;

//         Solve the linear equation Ux = y, where y is the solution
//         obtained above of Ly = B and U is an upper triangular matrix.

   return Upper_Triangular_Solve(LU, x, x, n);
}

int Choleski_LU_Inverse(double *LU, int n)
{
   int i, j, k;
   double *p_i, *p_j, *p_k;
   double sum;

   if ( Lower_Triangular_Inverse(LU, n) < 0 ) return -1;

//         Premultiply L inverse by the transpose of L inverse.

   for (i = 0, p_i = LU; i < n; i++, p_i += n) {
      for (j = 0, p_j = LU; j <= i; j++, p_j += n) {
         sum = 0.0;
         for (k = i, p_k = p_i; k < n; k++, p_k += n)
            sum += *(p_k + i) * *(p_k + j);
         *(p_i + j) = sum;
         *(p_j + i) = sum;
      }
   }

   return 0;
}

void Multiply_Matrices(double *C, double *A, int nrows, int ncols,
                                                          double *B, int mcols)
{
   double *pB;
   double *p_B;
   int i,j,k;

   for (i = 0; i < nrows; A += ncols, i++)
      for (p_B = B, j = 0; j < mcols; C++, p_B++, j++) {
         pB = p_B;
         *C = 0.0;
         for (k = 0; k < ncols; pB += mcols, k++)
            *C += *(A+k) * *pB;
      }
}

void Identity_Matrix(double *A, int n)
{
   int i,j;

   for (i = 0; i < n - 1; i++) {
      *A++ = 1.0;
      for (j = 0; j < n; j++) *A++ = 0.0;
   }
   *A = 1.0;
}

void Interchange_Rows(double *A, int row1, int row2, int ncols);
void Interchange_Columns(double *A, int col1, int col2, int nrows, int ncols);
void Identity_Matrix(double *A, int n);
void Copy_Vector(double *d, double *s, int n);

//                        Internally Defined Routines
static void Hessenberg_Elementary_Transform(double* H, double *S, int perm[], int n);

int Hessenberg_Form_Elementary(double *A, double* S, int n)
{
   int i, j, k, col, row;
   int* perm;
   double *p_row, *pS_row;
   double max;
   double s;
   double *pA, *pB, *pC, *pS;

         // n x n matrices for which n <= 2 are already in Hessenberg form

   if (n <= 1) { *S = 1.0; return 0; }
   if (n == 2) { *S++ = 1.0; *S++ = 0.0; *S++ = 1.0; *S = 0.0; return 0; }

                       // Allocate working memory

   perm = (int*) malloc(n * sizeof(int));
   if (perm == NULL) return -1;             // not enough memory

           // For each column use Elementary transformations
           //   to zero the entries below the subdiagonal.

   p_row = A + n;
   pS_row = S + n;
   for (col = 0; col < (n - 2); p_row += n, pS_row += n, col++) {

          // Find the row in column "col" with maximum magnitude where
          // row >= col + 1.

      row = col + 1;
      perm[row] = row;
      for (pA = p_row + col, max = 0.0, i = row; i < n; pA += n, i++)
         if (fabs(*pA) > max) { perm[row] = i; max = fabs(*pA); }

          // If perm[row] != row, then interchange row "row" and row
          // perm[row] and interchange column "row" and column perm[row].

      if ( perm[row] != row ) {
         Interchange_Rows(A, row, perm[row], n);
         Interchange_Columns(A, row, perm[row], n, n);
      }

          // Zero out the components lying below the subdiagonal.

      pA = p_row + n;
      pS = pS_row + n;
      for (i = col + 2; i < n; pA += n, pS += n, i++) {
         s = *(pA + col) / *(p_row + col);
         for (j = 0; j < n; j++)
            *(pA + j) -= *(p_row + j) * s;
         *(pS + col) = s;
         for (j = 0, pB = A + col + 1, pC = A + i; j < n; pB +=n, pC += n, j++)
            *pB += s * *pC;
      }
   }
   pA = A + n + n;
   pS = S + n + n;
   for (i = 2; i < n; pA += n, pS += n, i++) Copy_Vector(pA, pS, i - 1);

   Hessenberg_Elementary_Transform(A, S, perm, n);

   free(perm);
   return 0;
}


static void Hessenberg_Elementary_Transform(double *H, double* S, int perm[],
                                                                         int n)
{
   int i, j;
   double *pS, *pH;
   double x;

   Identity_Matrix(S, n);
   for (i = n - 2; i >= 1; i--) {
      pH = H + n * (i + 1);
      pS = S + n * (i + 1);
      for (j = i + 1; j < n; pH += n, pS += n, j++) {
         *(pS + i) = *(pH + i - 1);
         *(pH + i - 1) = 0.0;
      }
      if (perm[i] != i) {
         pS = S + n * i;
         pH = S + n * perm[i];
         for (j = i; j < n; j++) {
            *(pS + j) = *(pH + j);
            *(pH + j) = 0.0;
         }
         *(pH + i) = 1.0;
      }
   }
}
////////////////////////////////////////////////////////////////////////////////
// File: qr_hessenberg_matrix.c                                               //
// Routine(s):                                                                //
//    QR_Hessenberg_Matrix                                                    //
////////////////////////////////////////////////////////////////////////////////

#include <math.h>                 // required for fabs() and sqrt()
#include <float.h>                // required for DBL_EPSILON


//                        Internally Defined Routines
static void   One_Real_Eigenvalue(double Hrow[], double eigen_real[],
                                  double eigen_imag[], int row, double shift);
static void   Two_Eigenvalues(double *H, double *S, double eigen_real[],
                                 double eigen_imag[], int n, int k, double t);
static void   Update_Row(double *Hrow, double cos, double sin, int n, int k);
static void   Update_Column(double* H, double cos, double sin, int n, int k);
static void   Update_Transformation(double *S, double cos, double sin,
                                                               int n, int k);
static void   Double_QR_Iteration(double *H, double *S, int row, int min_row,
                                         int n, double* shift, int iteration);
static void   Product_and_Sum_of_Shifts(double *H, int n, int max_row,
                    double* shift, double *trace, double *det, int iteration);
static int    Two_Consecutive_Small_Subdiagonal(double* H, int min_row,
                                int max_row, int n, double trace, double det);
static void   Double_QR_Step(double *H, int min_row, int max_row, int min_col,
                                  double trace, double det, double *S, int n);
static void   Complex_Division(double x, double y, double u, double v,
                                                       double* a, double* b);
static void   BackSubstitution(double *H, double eigen_real[],
                                                  double eigen_imag[], int n);
static void   BackSubstitute_Real_Vector(double *H, double eigen_real[],
                 double eigen_imag[], int row,  double zero_tolerance, int n);
static void   BackSubstitute_Complex_Vector(double *H, double eigen_real[],
                 double eigen_imag[], int row,  double zero_tolerance, int n);
static void   Calculate_Eigenvectors(double *H, double *S, double eigen_real[],
                                                  double eigen_imag[], int n);

int QR_Hessenberg_Matrix( double *H, double *S, double eigen_real[],
                          double eigen_imag[], int n, int max_iteration_count)
{
   int i;
   int row;
   int iteration;
   int found_eigenvalue;
   double shift = 0.0;
   double* pH;

   for ( row = n - 1; row >= 0; row--) {
      found_eigenvalue = 0;
      for (iteration = 1; iteration <= max_iteration_count; iteration++) {

                      // Search for small subdiagonal element

         for (i = row, pH = H + row * n; i > 0; i--, pH -= n)
            if (fabs(*(pH + i - 1 )) <= DBL_EPSILON *
                       ( fabs(*(pH - n + i - 1)) + fabs(*(pH + i)) ) ) break;

                // If the subdiagonal element on row "row" is small, then
                // that row element is an eigenvalue.  If the subdiagonal
                // element on row "row-1" is small, then the eigenvalues
                // of the 2x2 diagonal block consisting rows "row-1" and
                // "row" are eigenvalues.  Otherwise perform a double QR
                // iteration.

         switch(row - i) {
            case 0: // One real eigenvalue
               One_Real_Eigenvalue(pH, eigen_real, eigen_imag, i, shift);
               found_eigenvalue = 1;
               break;
            case 1: // Either two real eigenvalues or a complex pair
               row--;
               Two_Eigenvalues(H, S, eigen_real, eigen_imag, n, row, shift);
               found_eigenvalue = 1;
               break;
            default:
               Double_QR_Iteration(H, S, i, row, n, &shift, iteration);
         }
         if (found_eigenvalue) break;
      }
      if (iteration > max_iteration_count) return -1;
   }

   BackSubstitution(H, eigen_real, eigen_imag, n);
   Calculate_Eigenvectors(H, S, eigen_real, eigen_imag, n);

   return 0;
}
static void One_Real_Eigenvalue(double Hrow[], double eigen_real[],
                                   double eigen_imag[], int row, double shift)
{
   Hrow[row] += shift;
   eigen_real[row] = Hrow[row];
   eigen_imag[row] = 0.0;
}
static void Two_Eigenvalues(double *H, double* S, double eigen_real[],
                            double eigen_imag[], int n, int row, double shift)
{
   double p, q, x, discriminant, r;
   double cos, sin;
   double *Hrow = H + n * row;
   double *Hnextrow = Hrow + n;
   int nextrow = row + 1;

   p = 0.5 * (Hrow[row] - Hnextrow[nextrow]);
   x = Hrow[nextrow] * Hnextrow[row];
   discriminant = p * p + x;
   Hrow[row] += shift;
   Hnextrow[nextrow] += shift;
   if (discriminant > 0.0) {                 // pair of real roots
      q = sqrt(discriminant);
      if (p < 0.0) q = p - q; else q += p;
      eigen_real[row] = Hnextrow[nextrow] + q;
      eigen_real[nextrow] = Hnextrow[nextrow] - x / q;
      eigen_imag[row] = 0.0;
      eigen_imag[nextrow] = 0.0;
      r = sqrt(Hnextrow[row]*Hnextrow[row] + q * q);
      sin = Hnextrow[row] / r;
      cos = q / r;
      Update_Row(Hrow, cos, sin, n, row);
      Update_Column(H, cos, sin, n, row);
      Update_Transformation(S, cos, sin, n, row);
   }
   else {                             // pair of complex roots
      eigen_real[nextrow] = eigen_real[row] = Hnextrow[nextrow] + p;
      eigen_imag[row] = sqrt(fabs(discriminant));
      eigen_imag[nextrow] = -eigen_imag[row];
   }
}
static void Update_Row(double *Hrow, double cos, double sin, int n, int row)
{
   double x;
   double *Hnextrow = Hrow + n;
   int i;

   for (i = row; i < n; i++) {
      x = Hrow[i];
      Hrow[i] = cos * x + sin * Hnextrow[i];
      Hnextrow[i] = cos * Hnextrow[i] - sin * x;
   }
}
static void Update_Column(double* H, double cos, double sin, int n, int col)
{
   double x;
   int i;
   int next_col = col + 1;

   for (i = 0; i <= next_col; i++, H += n) {
      x = H[col];
      H[col] = cos * x + sin * H[next_col];
      H[next_col] = cos * H[next_col] - sin * x;
   }
}
static void Update_Transformation(double *S, double cos, double sin,
                                                                 int n, int k)
{
   double x;
   int i;
   int k1 = k + 1;

   for (i = 0; i < n; i++, S += n) {
      x = S[k];
      S[k] = cos * x + sin * S[k1];
      S[k1] = cos * S[k1] - sin * x;
   }
}
static void Double_QR_Iteration(double *H, double *S, int min_row, int max_row,
                                          int n, double* shift, int iteration)
{
   int k;
   double trace, det;

   Product_and_Sum_of_Shifts(H, n, max_row, shift, &trace, &det, iteration);
   k = Two_Consecutive_Small_Subdiagonal(H, min_row, max_row, n, trace, det);
   Double_QR_Step(H, min_row, max_row, k, trace, det, S, n);
}
static void Product_and_Sum_of_Shifts(double *H, int n, int max_row,
                     double* shift, double *trace, double *det, int iteration)
{
   double *pH = H + max_row * n;
   double *p_aux;
   int i;
   int min_col = max_row - 1;

   if ( ( iteration % 10 ) == 0 ) {
      *shift += pH[max_row];
      for (i = 0, p_aux = H; i <= max_row; p_aux += n, i++)
         p_aux[i] -= pH[max_row];
      p_aux = pH - n;
      *trace = fabs(pH[min_col]) + fabs(p_aux[min_col - 1]);
      *det = *trace * *trace;
      *trace *= 1.5;
   }
   else {
      p_aux = pH - n;
      *trace = p_aux[min_col] + pH[max_row];
      *det = p_aux[min_col] * pH[max_row] - p_aux[max_row] * pH[min_col];
   }
};


////////////////////////////////////////////////////////////////////////////////
//  static int Two_Consecutive_Small_Subdiagonal(double* H, int min_row,      //
//                              int max_row, int n, double trace, double det) //
//                                                                            //
//  Description:                                                              //
//     To reduce the amount of computation in Francis' double QR step search  //
//     for two consecutive small subdiagonal elements from row nn to row m,   //
//     where m < nn.                                                          //
//                                                                            //
//  Arguments:                                                                //
//     double *H                                                              //
//            Pointer to the first element of the matrix in Hessenberg form.  //
//     int    min_row                                                         //
//            The row in which to end the search (search is from upwards).    //
//     int    max_row                                                         //
//            The row in which to begin the search.                           //
//     int    n                                                               //
//            The dimension of H.                                             //
//     double trace                                                           //
//            The trace of the lower 2 x 2 block diagonal matrix.             //
//     double det                                                             //
//            The determinant of the lower 2 x 2 block diagonal matrix.       //
//                                                                            //
//  Return Value:                                                             //
//     Row with negligible subdiagonal element or min_row if none found.      //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
static int Two_Consecutive_Small_Subdiagonal(double* H, int min_row,
                                 int max_row, int n, double trace, double det)
{
   double x, y ,z, s;
   double* pH;
   int i, k;

   for (k = max_row - 2, pH = H + k * n; k >= min_row; pH -= n, k--) {
      x = (pH[k] * ( pH[k] - trace ) + det) / pH[n+k] + pH[k+1];
      y = pH[k] + pH[n+k+1] - trace;
      z = pH[n + n + k + 1];
      s = fabs(x) + fabs(y) + fabs(z);
      x /= s;
      y /= s;
      z /= s;
      if (k == min_row) break;
      if ( (fabs(pH[k-1]) * (fabs(y) + fabs(z)) ) <=
          DBL_EPSILON * fabs(x) *
             (fabs(pH[k-1-n]) + fabs(pH[k]) + fabs(pH[n + k + 1])) ) break;
   }
   for (i = k+2, pH = H + i * n; i <= max_row; pH += n, i++) pH[i-2] = 0.0;
   for (i = k+3, pH = H + i * n; i <= max_row; pH += n, i++) pH[i-3] = 0.0;
   return k;
};


////////////////////////////////////////////////////////////////////////////////
//  static void Double_QR_Step(double *H, int min_row, int max_row,           //
//                                            int min_col, double *S, int n)  //
//                                                                            //
//  Description:                                                              //
//     Perform Francis' double QR step from rows 'min_row' to 'max_row'       //
//     and columns 'min_col' to 'max_row'.                                    //
//                                                                            //
//  Arguments:                                                                //
//     double *H                                                              //
//            Pointer to the first element of the matrix in Hessenberg form.  //
//     int    min_row                                                         //
//            The row in which to begin.                                      //
//     int    max_row                                                         //
//            The row in which to end.                                        //
//     int    min_col                                                         //
//            The column in which to begin.                                   //
//     double trace                                                           //
//            The trace of the lower 2 x 2 block diagonal matrix.             //
//     double det                                                             //
//            The determinant of the lower 2 x 2 block diagonal matrix.       //
//     double *S                                                              //
//            Pointer to the first element of the transformation matrix.      //
//     int    n                                                               //
//            The dimensions of H and S.                                      //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
static void Double_QR_Step(double *H, int min_row, int max_row, int min_col,
                                   double trace, double det, double *S, int n)
{
   double s, x, y, z;
   double a, b, c;
   double *pH;
   double *tH;
   double *pS;
   int i,j,k;
   int last_test_row_col = max_row - 1;

   k = min_col;
   pH = H + min_col * n;
   a = (pH[k] * ( pH[k] - trace ) + det) / pH[n+k] + pH[k+1];
   b = pH[k] + pH[n+k+1] - trace;
   c = pH[n + n + k + 1];
   s = fabs(a) + fabs(b) + fabs(c);
   a /= s;
   b /= s;
   c /= s;

   for (; k <= last_test_row_col; k++, pH += n) {
      if ( k > min_col ) {
         c = (k == last_test_row_col) ? 0.0 : pH[n + n + k - 1];
         x = fabs(pH[k-1]) + fabs(pH[n + k - 1]) + fabs(c);
         if ( x == 0.0 ) continue;
         a = pH[k - 1] / x;
         b = pH[n + k - 1] / x;
         c /= x;
      }
      s = sqrt( a * a + b * b + c * c );
      if (a < 0.0) s = -s;
      if ( k > min_col ) pH[k-1] = -s * x;
      else if (min_row != min_col) pH[k-1] = -pH[k-1];
      a += s;
      x = a / s;
      y = b / s;
      z = c / s;
      b /= a;
      c /= a;

                          // Update rows k, k+1, k+2
      for (j = k; j < n; j++) {
         a = pH[j] + b * pH[n+j];
         if ( k != last_test_row_col ) {
            a += c * pH[n + n + j];
            pH[n + n + j] -= a * z;
         }
         pH[n + j] -= a * y;
         pH[j] -= a * x;
      }

                             // Update column k+1

      j = k + 3;
      if (j > max_row) j = max_row;
      for (i = 0, tH = H; i <= j; i++, tH += n) {
         a = x * tH[k] + y * tH[k+1];
         if ( k != last_test_row_col ) {
            a += z * tH[k+2];
            tH[k+2] -= a * c;
         }
         tH[k+1] -= a * b;
         tH[k] -= a;
      }

                        // Update transformation matrix

      for (i = 0, pS = S; i < n; pS += n, i++) {
         a = x * pS[k] + y * pS[k+1];
         if ( k != last_test_row_col ) {
            a += z * pS[k+2];
            pS[k+2] -= a * c;
         }
         pS[k+1] -= a * b;
         pS[k] -= a;
      }
   };
}


////////////////////////////////////////////////////////////////////////////////
//  static void BackSubstitution(double *H, double eigen_real[],              //
//                                               double eigen_imag[], int n)  //
//                                                                            //
//  Description:                                                              //
//                                                                            //
//  Arguments:                                                                //
//     double *H                                                              //
//            Pointer to the first element of the matrix in Hessenberg form.  //
//     double eigen_real[]                                                    //
//            The real part of an eigenvalue.                                 //
//     double eigen_imag[]                                                    //
//            The imaginary part of an eigenvalue.                            //
//     int    n                                                               //
//            The dimension of H, eigen_real, and eigen_imag.                 //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
static void BackSubstitution(double *H, double eigen_real[],
                                                   double eigen_imag[], int n)
{
   double zero_tolerance;
   double *pH;
   int i, j, row;

                        // Calculate the zero tolerance

   pH = H;
   zero_tolerance = fabs(pH[0]);
   for (pH += n, i = 1; i < n; pH += n, i++)
      for (j = i-1; j < n; j++) zero_tolerance += fabs(pH[j]);
   zero_tolerance *= DBL_EPSILON;

                           // Start Backsubstitution

   for (row = n-1; row >= 0; row--) {
      if (eigen_imag[row] == 0.0)
         BackSubstitute_Real_Vector(H, eigen_real, eigen_imag, row,
                                                           zero_tolerance, n);
      else if ( eigen_imag[row] < 0.0 )
         BackSubstitute_Complex_Vector(H, eigen_real, eigen_imag, row,
                                                           zero_tolerance, n);
   }
}


////////////////////////////////////////////////////////////////////////////////
//  static void BackSubstitute_Real_Vector(double *H, double eigen_real[],    //
//             double eigen_imag[], int row,  double zero_tolerance, int n)   //
//                                                                            //
//  Description:                                                              //
//                                                                            //
//  Arguments:                                                                //
//     double *H                                                              //
//            Pointer to the first element of the matrix in Hessenberg form.  //
//     double eigen_real[]                                                    //
//            The real part of an eigenvalue.                                 //
//     double eigen_imag[]                                                    //
//            The imaginary part of an eigenvalue.                            //
//     int    row                                                             //
//     double zero_tolerance                                                  //
//            Zero substitute. To avoid dividing by zero.                     //
//     int    n                                                               //
//            The dimension of H, eigen_real, and eigen_imag.                 //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
static void BackSubstitute_Real_Vector(double *H, double eigen_real[],
                   double eigen_imag[], int row,  double zero_tolerance, int n)
{
   double *pH;
   double *pV;
   double x,y;
   double u[4];
   double v[2];
   int i,j,k;

   k = row;
   pH = H + row * n;
   pH[row] = 1.0;
   for (i = row - 1, pH -= n; i >= 0; i--, pH -= n) {
      u[0] = pH[i] - eigen_real[row];
      v[0] = pH[row];
      pV = H + n * k;
      for (j = k; j < row; j++, pV += n) v[0] += pH[j] * pV[row];
      if ( eigen_imag[i] < 0.0 ) {
         u[3] = u[0];
         v[1] = v[0];
      } else {
         k = i;
         if (eigen_imag[i] == 0.0) {
            if (u[0] != 0.0) pH[row] = - v[0] / u[0];
            else pH[row] = - v[0] / zero_tolerance;
         } else {
            u[1] = pH[i+1];
            u[2] = pH[n+i];
            x = (eigen_real[i] - eigen_real[row]);
            x *= x;
            x += eigen_imag[i] * eigen_imag[i];
            pH[row] = (u[1] * v[1] - u[3] * v[0]) / x;
            if ( fabs(u[1]) > fabs(u[3]) )
               pH[n+row] = -(v[0] + u[0] * pH[row]) / u[1];
            else
               pH[n+row] = -(v[1] + u[2] * pH[row]) / u[3];
         }
      }
   }
}


////////////////////////////////////////////////////////////////////////////////
//  static void BackSubstitute_Complex_Vector(double *H, double eigen_real[], //
//              double eigen_imag[], int row,  double zero_tolerance, int n)  //
//                                                                            //
//  Description:                                                              //
//                                                                            //
//  Arguments:                                                                //
//     double *H                                                              //
//            Pointer to the first element of the matrix in Hessenberg form.  //
//     double eigen_real[]                                                    //
//            The real part of an eigenvalue.                                 //
//     double eigen_imag[]                                                    //
//            The imaginary part of an eigenvalue.                            //
//     int    row                                                             //
//     double zero_tolerance                                                  //
//            Zero substitute. To avoid dividing by zero.                     //
//     int    n                                                               //
//            The dimension of H, eigen_real, and eigen_imag.                 //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
static void BackSubstitute_Complex_Vector(double *H, double eigen_real[],
                   double eigen_imag[], int row,  double zero_tolerance, int n)
{
   double *pH;
   double *pV;
   double x,y;
   double u[4];
   double v[2];
   double w[2];
   int i,j,k;

   k = row - 1;
   pH = H + n * row;
   if ( fabs(pH[k]) > fabs(pH[row-n]) ) {
      pH[k-n] = - (pH[row] - eigen_real[row]) / pH[k];
      pH[row-n] = -eigen_imag[row] / pH[k];
   }
   else
      Complex_Division(-pH[row-n], 0.0,
           pH[k-n]-eigen_real[row], eigen_imag[row], &pH[k-n], &pH[row-n]);
   pH[k] = 1.0;
   pH[row] = 0.0;
   for (i = row - 2, pH = H + n * i; i >= 0; pH -= n, i--) {
      u[0] = pH[i] - eigen_real[row];
      w[0] = pH[row];
      w[1] = 0.0;
      pV = H + k * n;
      for (j = k; j < row; j++, pV+=n) {
         w[0] += pH[j] * pV[row - 1];
         w[1] += pH[j] * pV[row];
      }
      if (eigen_imag[i] < 0.0) {
         u[3] = u[0];
         v[0] = w[0];
         v[1] = w[1];
      } else {
         k = i;
         if (eigen_imag[i] == 0.0) {
            Complex_Division(-w[0], -w[1], u[0], eigen_imag[row], &pH[row-1],
                                                                    &pH[row]);
         }
         else {
            u[1] = pH[i+1];
            u[2] = pH[n + i];
            x = eigen_real[i] - eigen_real[row];
            y = 2.0 * x * eigen_imag[row];
            x = x * x + eigen_imag[i] * eigen_imag[i]
                                          - eigen_imag[row] * eigen_imag[row];
            if ( x == 0.0 && y == 0.0 )
              x = zero_tolerance * ( fabs(u[0]) + fabs(u[1]) + fabs(u[2])
                                       + fabs(u[3]) + fabs(eigen_imag[row]) );
            Complex_Division(u[1]*v[0] - u[3] * w[0] + w[1] * eigen_imag[row],
                           u[1] * v[1] - u[3] * w[1] - w[0] * eigen_imag[row],
                                                   x, y, &pH[row-1], &pH[row]);
            if ( fabs(u[1]) > (fabs(u[3]) + fabs(eigen_imag[row])) ) {
               pH[n+row-1] = -w[0] - u[0] * pH[row-1]
                                           + eigen_imag[row] * pH[row] / u[1];
               pH[n+row] = -w[1] - u[0] * pH[row]
                                          - eigen_imag[row] * pH[row-1] / u[1];
            }
            else {
               Complex_Division(-v[0] - u[2] * pH[row-1], -v[1] - u[2]*pH[row],
                              u[3], eigen_imag[row], &pH[n+row-1], &pH[n+row]);
            }
         }
      }
   }
}

////////////////////////////////////////////////////////////////////////////////
//  static void Calculate_Eigenvectors(double *H, double *S,                  //
//                          double eigen_real[], double eigen_imag[], int n)  //
//                                                                            //
//  Description:                                                              //
//     Multiply by transformation matrix.                                     //
//                                                                            //
//  Arguments:                                                                //
//     double *H                                                              //
//            Pointer to the first element of the matrix in Hessenberg form.  //
//     double *S                                                              //
//            Pointer to the first element of the transformation matrix.      //
//     double eigen_real[]                                                    //
//            The real part of an eigenvalue.                                 //
//     double eigen_imag[]                                                    //
//            The imaginary part of an eigenvalue.                            //
//     int    n                                                               //
//            The dimension of H, S, eigen_real, and eigen_imag.              //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
static void Calculate_Eigenvectors(double *H, double *S, double eigen_real[],
                                                   double eigen_imag[], int n)
{
   double* pH;
   double* pS;
   double x,y;
   int i,j,k;

   for (k = n-1; k >= 0; k--) {
      if (eigen_imag[k] < 0.0) {
         for (i = 0, pS = S; i < n; pS += n, i++) {
            x = 0.0;
            y = 0.0;
            for (j = 0, pH = H; j <= k; pH += n, j++) {
               x += pS[j] * pH[k-1];
               y += pS[j] * pH[k];
            }
            pS[k-1] = x;
            pS[k] = y;
         }
      } else if (eigen_imag[k] == 0.0) {
         for (i = 0, pS = S; i < n; i++, pS += n) {
            x = 0.0;
            for (j = 0, pH = H; j <= k; j++, pH += n)
               x += pS[j] * pH[k];
            pS[k] = x;
        }
      }
   }
}


////////////////////////////////////////////////////////////////////////////////
//  static void Complex_Division(double x, double y, double u, double v,      //
//                                                    double* a, double* b)   //
//                                                                            //
//  Description:                                                              //
//    a + i b = (x + iy) / (u + iv)                                           //
//            = (x * u + y * v) / r^2 + i (y * u - x * v) / r^2,              //
//    where r^2 = u^2 + v^2.                                                  //
//                                                                            //
//  Arguments:                                                                //
//     double x                                                               //
//            Real part of the numerator.                                     //
//     double y                                                               //
//            Imaginary part of the numerator.                                //
//     double u                                                               //
//            Real part of the denominator.                                   //
//     double v                                                               //
//            Imaginary part of the denominator.                              //
//     double *a                                                              //
//            Real part of the quotient.                                      //
//     double *b                                                              //
//            Imaginary part of the quotient.                                 //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
static void Complex_Division(double x, double y, double u, double v,
                                                         double* a, double* b)
{
   double q = u*u + v*v;

   *a = (x * u + y * v) / q;
   *b = (y * u - x * v) / q;
}
////////////////////////////////////////////////////////////////////////////////
// File: transpose_square_matrix.c                                            //
// Routine(s):                                                                //
//    Transpose_Square_Matrix                                                 //
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//  void Transpose_Square_Matrix( double *A, int n )                          //
//                                                                            //
//  Description:                                                              //
//     Take the transpose of A and store in place.                            //
//                                                                            //
//  Arguments:                                                                //
//     double *A    Pointer to the first element of the matrix A.             //
//     int    n     The number of rows and columns of the matrix A.           //
//                                                                            //
//  Return Values:                                                            //
//     void                                                                   //
//                                                                            //
//  Example:                                                                  //
//     #define N                                                              //
//     double A[N][N];                                                        //
//                                                                            //
//     (your code to initialize the matrix A)                                 //
//                                                                            //
//     Transpose_Square_Matrix( &A[0][0], N);                                 //
//     printf("The transpose of A is \n"); ...                                //
////////////////////////////////////////////////////////////////////////////////
void Transpose_Square_Matrix( double *A, int n )
{
   double *pA, *pAt;
   double temp;
   int i,j;

   for (i = 0; i < n; A += n + 1, i++) {
      pA = A + 1;
      pAt = A + n;
      for (j = i+1 ; j < n; pA++, pAt += n, j++) {
         temp = *pAt;
         *pAt = *pA;
         *pA = temp;
      }
   }
}
///////////////////////////////////////////////////////////////////////////////
// File: lower_triangular.c                                                   //
// Routines:                                                                  //
//    Lower_Triangular_Solve                                                  //
//    Lower_Triangular_Inverse                                                //
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//  int Lower_Triangular_Solve(double *L, double *B, double x[], int n)       //
//                                                                            //
//  Description:                                                              //
//     This routine solves the linear equation Lx = B, where L is an n x n    //
//     lower triangular matrix.  (The superdiagonal part of the matrix is     //
//     not addressed.)                                                        //
//     The algorithm follows:                                                 //
//                      x[0] = B[0]/L[0][0], and                              //
//     x[i] = [B[i] - (L[i][0] * x[0]  + ... + L[i][i-1] * x[i-1])] / L[i][i],//
//     for i = 1, ..., n-1.                                                   //
//                                                                            //
//  Arguments:                                                                //
//     double *L   Pointer to the first element of the lower triangular       //
//                 matrix.                                                    //
//     double *B   Pointer to the column vector, (n x 1) matrix, B.           //
//     double *x   Pointer to the column vector, (n x 1) matrix, x.           //
//     int     n   The number of rows or columns of the matrix L.             //
//                                                                            //
//  Return Values:                                                            //
//     0  Success                                                             //
//    -1  Failure - The matrix L is singular.                                 //
//                                                                            //
//  Example:                                                                  //
//     #define N                                                              //
//     double A[N][N], B[N], x[N];                                            //
//                                                                            //
//     (your code to create matrix A and column vector B)                     //
//     err = Lower_Triangular_Solve(&A[0][0], B, x, n);                       //
//     if (err < 0) printf(" Matrix A is singular\n");                        //
//     else printf(" The solution is \n");                                    //
//           ...                                                              //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
int Lower_Triangular_Solve(double *L, double B[], double x[], int n)
{
   int i, k;

//         Solve the linear equation Lx = B for x, where L is a lower
//         triangular matrix.

   for (k = 0; k < n; L += n, k++) {
      if (*(L + k) == 0.0) return -1;           // The matrix L is singular
      x[k] = B[k];
      for (i = 0; i < k; i++) x[k] -= x[i] * *(L + i);
      x[k] /= *(L + k);
   }

   return 0;
}


////////////////////////////////////////////////////////////////////////////////
//  int Lower_Triangular_Inverse(double *L,  int n)                           //
//                                                                            //
//  Description:                                                              //
//     This routine calculates the inverse of the lower triangular matrix L.  //
//     The superdiagonal part of the matrix is not addressed.                 //
//     The algorithm follows:                                                 //
//        Let M be the inverse of L, then L M = I,                            //
//     M[i][i] = 1.0 / L[i][i] for i = 0, ..., n-1, and                       //
//     M[i][j] = -[(L[i][j] M[j][j] + ... + L[i][i-1] M[i-1][j])] / L[i][i],  //
//     for i = 1, ..., n-1, j = 0, ..., i - 1.                                //
//                                                                            //
//                                                                            //
//  Arguments:                                                                //
//     double *L   On input, the pointer to the first element of the matrix   //
//                 whose lower triangular elements form the matrix which is   //
//                 to be inverted. On output, the lower triangular part is    //
//                 replaced by the inverse.  The superdiagonal elements are   //
//                 not modified.                                              //
//                 its inverse.                                               //
//     int     n   The number of rows and/or columns of the matrix L.         //
//                                                                            //
//  Return Values:                                                            //
//     0  Success                                                             //
//    -1  Failure - The matrix L is singular.                                 //
//                                                                            //
//  Example:                                                                  //
//     #define N                                                              //
//     double L[N][N];                                                        //
//                                                                            //
//     (your code to create the matrix L)                                     //
//     err = Lower_Triangular_Inverse(&L[0][0], N);                           //
//     if (err < 0) printf(" Matrix L is singular\n");                        //
//     else {                                                                 //
//        printf(" The inverse is \n");                                       //
//           ...                                                              //
//     }                                                                      //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
int Lower_Triangular_Inverse(double *L, int n)
{
   int i, j, k;
   double *p_i, *p_j, *p_k;
   double sum;

//         Invert the diagonal elements of the lower triangular matrix L.

   for (k = 0, p_k = L; k < n; p_k += (n + 1), k++) {
      if (*p_k == 0.0) return -1;
      else *p_k = 1.0 / *p_k;
   }

//         Invert the remaining lower triangular matrix L row by row.

   for (i = 1, p_i = L + n; i < n; i++, p_i += n) {
      for (j = 0, p_j = L; j < i; p_j += n, j++) {
         sum = 0.0;
         for (k = j, p_k = p_j; k < i; k++, p_k += n)
            sum += *(p_i + k) * *(p_k + j);
         *(p_i + j) = - *(p_i + i) * sum;
      }
   }

   return 0;
}
////////////////////////////////////////////////////////////////////////////////
// File: upper_triangular.c                                                   //
// Routines:                                                                  //
//    Upper_Triangular_Solve                                                  //
//    Upper_Triangular_Inverse                                                //
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//  int Upper_Triangular_Solve(double *U, double *B, double x[], int n)       //
//                                                                            //
//  Description:                                                              //
//     This routine solves the linear equation Ux = B, where U is an n x n    //
//     upper triangular matrix.  (The subdiagonal part of the matrix is       //
//     not addressed.)                                                        //
//     The algorithm follows:                                                 //
//                  x[n-1] = B[n-1]/U[n-1][n-1], and                          //
//     x[i] = [B[i] - (U[i][i+1] * x[i+1]  + ... + U[i][n-1] * x[n-1])]       //
//                                                                 / U[i][i], //
//     for i = n-2, ..., 0.                                                   //
//                                                                            //
//  Arguments:                                                                //
//     double *U   Pointer to the first element of the upper triangular       //
//                 matrix.                                                    //
//     double *B   Pointer to the column vector, (n x 1) matrix, B.           //
//     double *x   Pointer to the column vector, (n x 1) matrix, x.           //
//     int     n   The number of rows or columns of the matrix U.             //
//                                                                            //
//  Return Values:                                                            //
//     0  Success                                                             //
//    -1  Failure - The matrix U is singular.                                 //
//                                                                            //
//  Example:                                                                  //
//     #define N                                                              //
//     double A[N][N], B[N], x[N];                                            //
//                                                                            //
//     (your code to create matrix A and column vector B)                     //
//     err = Upper_Triangular_Solve(&A[0][0], B, x, n);                       //
//     if (err < 0) printf(" Matrix A is singular\n");                        //
//     else printf(" The solution is \n");                                    //
//           ...                                                              //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
int Upper_Triangular_Solve(double *U, double B[], double x[], int n)
{
   int i, k;

//         Solve the linear equation Ux = B for x, where U is an upper
//         triangular matrix.

   for (k = n-1, U += n * (n - 1); k >= 0; U -= n, k--) {
      if (*(U + k) == 0.0) return -1;           // The matrix U is singular
      x[k] = B[k];
      for (i = k + 1; i < n; i++) x[k] -= x[i] * *(U + i);
      x[k] /= *(U + k);
   }

   return 0;
}


////////////////////////////////////////////////////////////////////////////////
//  int Upper_Triangular_Inverse(double *U,  int n)                           //
//                                                                            //
//  Description:                                                              //
//     This routine calculates the inverse of the upper triangular matrix U.  //
//     The subdiagonal part of the matrix is not addressed.                   //
//     The algorithm follows:                                                 //
//        Let M be the inverse of U, then U M = I,                            //
//     M[n-1][n-1] = 1.0 / U[n-1][n-1] and                                    //
//     M[i][j] = -( U[i][i+1] M[i+1][j] + ... + U[i][j] M[j][j] ) / U[i][i],  //
//     for i = n-2, ... , 0,  j = n-1, ..., i+1.                              //
//                                                                            //
//                                                                            //
//  Arguments:                                                                //
//     double *U   On input, the pointer to the first element of the matrix   //
//                 whose upper triangular elements form the matrix which is   //
//                 to be inverted. On output, the upper triangular part is    //
//                 replaced by the inverse.  The subdiagonal elements are     //
//                 not modified.                                              //
//     int     n   The number of rows and/or columns of the matrix U.         //
//                                                                            //
//  Return Values:                                                            //
//     0  Success                                                             //
//    -1  Failure - The matrix U is singular.                                 //
//                                                                            //
//  Example:                                                                  //
//     #define N                                                              //
//     double U[N][N];                                                        //
//                                                                            //
//     (your code to create the matrix U)                                     //
//     err = Upper_Triangular_Inverse(&U[0][0], N);                           //
//     if (err < 0) printf(" Matrix U is singular\n");                        //
//     else {                                                                 //
//        printf(" The inverse is \n");                                       //
//           ...                                                              //
//     }                                                                      //
////////////////////////////////////////////////////////////////////////////////
//                                                                            //
int Upper_Triangular_Inverse(double *U, int n)
{
   int i, j, k;
   double *p_i, *p_j, *p_k;
   double sum;

//         Invert the diagonal elements of the upper triangular matrix U.

   for (k = 0, p_k = U; k < n; p_k += (n + 1), k++) {
      if (*p_k == 0.0) return -1;
      else *p_k = 1.0 / *p_k;
   }

//         Invert the remaining upper triangular matrix U.

   for (i = n - 2, p_i = U + n * (n - 2); i >=0; p_i -= n, i-- ) {
      for (j = n - 1; j > i; j--) {
         sum = 0.0;
         for (k = i + 1, p_k = p_i + n; k <= j; p_k += n, k++ ) {
            sum += *(p_i + k) * *(p_k + j);
         }
         *(p_i + j) = - *(p_i + i) * sum;
      }
   }

   return 0;
}
////////////////////////////////////////////////////////////////////////////////
// File: interchange_cols.c                                                   //
// Routine(s):                                                                //
//    Interchange_Columns                                                     //
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//  void Interchange_Columns(double *A, int col1, int col2, int nrows,        //
//                                                                 int ncols) //
//                                                                            //
//  Description:                                                              //
//     Interchange the columns 'col1' and 'col2' of the  nrows x ncols        //
//     matrix A.                                                              //
//                                                                            //
//  Arguments:                                                                //
//     double *A    Pointer to the first element of the matrix A.             //
//     int    col1  The column of A which is to be interchanged with col2.    //
//     int    col2  The column of A which is to be interchanged with col1.    //
//     int    nrows The number of rows matrix A.                              //
//     int    ncols The number of columns of the matrix A.                    //
//                                                                            //
//  Return Values:                                                            //
//     void                                                                   //
//                                                                            //
//  Example:                                                                  //
//     #define N                                                              //
//     #define M                                                              //
//     double A[M][N];                                                        //
//     int i,j;                                                               //
//                                                                            //
//     (your code to initialize the matrix A, the column number i and column  //
//       number j)                                                            //
//                                                                            //
//     if ( (i >= 0) && ( i < N ) && ( j >= 0 ) && (j < N) )                  //
//        Interchange_Columns(&A[0][0], i, j, M, N);                          //
//     printf("The matrix A is \n"); ...                                      //
////////////////////////////////////////////////////////////////////////////////
void Interchange_Columns(double *A, int col1, int col2, int nrows, int ncols)
{
   int i;
   double *pA1, *pA2;
   double temp;

   pA1 = A + col1;
   pA2 = A + col2;
   for (i = 0; i < nrows; pA1 += ncols, pA2 += ncols, i++) {
      temp = *pA1;
      *pA1 = *pA2;
      *pA2 = temp;
   }
}
////////////////////////////////////////////////////////////////////////////////
// File: interchange_rows.c                                                   //
// Routine(s):                                                                //
//    Interchange_Rows                                                        //
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//  void Interchange_Rows(double *A, int row1, int row2, int ncols)           //
//                                                                            //
//  Description:                                                              //
//     Interchange the rows 'row1' and 'row2' of the  nrows x ncols matrix A. //
//                                                                            //
//  Arguments:                                                                //
//     double *A    Pointer to the first element of the matrix A.             //
//     int    row1  The row of A which is to be interchanged with row row2.   //
//     int    row2  The row of A which is to be interchanged with row row1.   //
//     int    ncols The number of columns of the matrix A.                    //
//                                                                            //
//  Return Values:                                                            //
//     void                                                                   //
//                                                                            //
//  Example:                                                                  //
//     #define N                                                              //
//     #define M                                                              //
//     double A[M][N];                                                        //
//     int i, j;                                                              //
//                                                                            //
//  (your code to initialize the matrix A, the row number i and row number j) //
//                                                                            //
//     if ( (i >= 0) && ( i < M ) && (j > 0) && ( j < M ) )                   //
//        Interchange_Rows(&A[0][0], i, j, N);                                //
//     printf("The matrix A is \n"); ...                                      //
////////////////////////////////////////////////////////////////////////////////
void Interchange_Rows(double *A, int row1, int row2, int ncols)
{
   int i;
   double *pA1, *pA2;
   double temp;

   pA1 = A + row1 * ncols;
   pA2 = A + row2 * ncols;
   for (i = 0; i < ncols; i++) {
      temp = *pA1;
      *pA1++ = *pA2;
      *pA2++ = temp;
   }
}