numactl --interleave=all ./testing_zgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_zgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |R - Q^H*A|   |I - Q^H*Q|
===============================================================================
  100   100     ---   (  ---  )      2.24 (   0.00)       ---
 1000  1000     ---   (  ---  )    154.67 (   0.03)       ---
   10    10     ---   (  ---  )      0.12 (   0.00)       ---
   20    20     ---   (  ---  )      0.76 (   0.00)       ---
   30    30     ---   (  ---  )      1.83 (   0.00)       ---
   40    40     ---   (  ---  )      0.92 (   0.00)       ---
   50    50     ---   (  ---  )      1.59 (   0.00)       ---
   60    60     ---   (  ---  )      2.22 (   0.00)       ---
   70    70     ---   (  ---  )      1.67 (   0.00)       ---
   80    80     ---   (  ---  )      2.62 (   0.00)       ---
   90    90     ---   (  ---  )      3.44 (   0.00)       ---
  100   100     ---   (  ---  )      4.35 (   0.00)       ---
  200   200     ---   (  ---  )     13.96 (   0.00)       ---
  300   300     ---   (  ---  )     29.51 (   0.00)       ---
  400   400     ---   (  ---  )     46.09 (   0.01)       ---
  500   500     ---   (  ---  )     64.51 (   0.01)       ---
  600   600     ---   (  ---  )     83.67 (   0.01)       ---
  700   700     ---   (  ---  )    104.68 (   0.02)       ---
  800   800     ---   (  ---  )    123.91 (   0.02)       ---
  900   900     ---   (  ---  )    141.81 (   0.03)       ---
 1000  1000     ---   (  ---  )    164.11 (   0.03)       ---
 2000  2000     ---   (  ---  )    382.23 (   0.11)       ---
 3000  3000     ---   (  ---  )    545.03 (   0.26)       ---
 4000  4000     ---   (  ---  )    746.75 (   0.46)       ---
 5000  5000     ---   (  ---  )    827.59 (   0.81)       ---
 6000  6000     ---   (  ---  )    908.51 (   1.27)       ---
 7000  7000     ---   (  ---  )    956.59 (   1.91)       ---
 8000  8000     ---   (  ---  )    997.48 (   2.74)       ---
 9000  9000     ---   (  ---  )   1009.93 (   3.85)       ---
10000 10000     ---   (  ---  )   1021.70 (   5.22)       ---
12000 12000     ---   (  ---  )   1057.49 (   8.72)       ---
14000 14000     ---   (  ---  )   1063.87 (  13.76)       ---
16000 16000     ---   (  ---  )   1073.69 (  20.35)       ---
18000 18000     ---   (  ---  )   1050.56 (  29.61)       ---
20000 20000     ---   (  ---  )   1072.71 (  39.78)       ---

numactl --interleave=all ./testing_zgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_zgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)    |b - A*x|
================================================================
  100   100     ---   (  ---  )      1.68 (   0.00)       ---
 1000  1000     ---   (  ---  )    143.18 (   0.04)       ---
   10    10     ---   (  ---  )      0.01 (   0.00)       ---
   20    20     ---   (  ---  )      0.05 (   0.00)       ---
   30    30     ---   (  ---  )      0.15 (   0.00)       ---
   40    40     ---   (  ---  )      0.33 (   0.00)       ---
   50    50     ---   (  ---  )      0.60 (   0.00)       ---
   60    60     ---   (  ---  )      0.95 (   0.00)       ---
   70    70     ---   (  ---  )      2.09 (   0.00)       ---
   80    80     ---   (  ---  )      3.12 (   0.00)       ---
   90    90     ---   (  ---  )      3.54 (   0.00)       ---
  100   100     ---   (  ---  )      2.70 (   0.00)       ---
  200   200     ---   (  ---  )     11.33 (   0.00)       ---
  300   300     ---   (  ---  )     25.13 (   0.01)       ---
  400   400     ---   (  ---  )     37.00 (   0.01)       ---
  500   500     ---   (  ---  )     57.51 (   0.01)       ---
  600   600     ---   (  ---  )     70.94 (   0.02)       ---
  700   700     ---   (  ---  )     92.71 (   0.02)       ---
  800   800     ---   (  ---  )    106.65 (   0.03)       ---
  900   900     ---   (  ---  )    128.17 (   0.03)       ---
 1000  1000     ---   (  ---  )    150.61 (   0.04)       ---
 2000  2000     ---   (  ---  )    347.84 (   0.12)       ---
 3000  3000     ---   (  ---  )    605.55 (   0.24)       ---
 4000  4000     ---   (  ---  )    741.14 (   0.46)       ---
 5000  5000     ---   (  ---  )    799.79 (   0.83)       ---
 6000  6000     ---   (  ---  )    877.47 (   1.31)       ---
 7000  7000     ---   (  ---  )    937.07 (   1.95)       ---
 8000  8000     ---   (  ---  )    981.20 (   2.78)       ---
 9000  9000     ---   (  ---  )    996.99 (   3.90)       ---
10000 10000     ---   (  ---  )   1013.21 (   5.26)       ---
12000 12000     ---   (  ---  )   1052.16 (   8.76)       ---
14000 14000     ---   (  ---  )   1063.70 (  13.76)       ---
16000 16000     ---   (  ---  )   1085.06 (  20.14)       ---
18000 18000     ---   (  ---  )   1049.97 (  29.63)       ---
20000 20000     ---   (  ---  )   1070.26 (  39.87)       ---
