| 1 | /* ---------------------------------------------------------------------------- |
| 2 | |
| 3 | * GTSAM Copyright 2010, Georgia Tech Research Corporation, |
| 4 | * Atlanta, Georgia 30332-0415 |
| 5 | * All Rights Reserved |
| 6 | * Authors: Frank Dellaert, et al. (see THANKS for the full author list) |
| 7 | |
| 8 | * See LICENSE for the license information |
| 9 | |
| 10 | * -------------------------------------------------------------------------- */ |
| 11 | |
| 12 | /* |
| 13 | * @file timeMatrix.cpp |
| 14 | * @brief Performs timing and profiling for Matrix operations |
| 15 | * @author Alex Cunningham |
| 16 | */ |
| 17 | |
| 18 | #include <iostream> |
| 19 | #include <gtsam/base/timing.h> |
| 20 | #include <gtsam/base/Matrix.h> |
| 21 | |
| 22 | using namespace std; |
| 23 | using namespace gtsam; |
| 24 | |
| 25 | /* |
| 26 | * Results: |
| 27 | * Alex's Machine: |
| 28 | * (using p = 100000 m = 10 n = 12 reps = 50) - Average times |
| 29 | * - (1st pass of simple changes) no pass: 0.184 sec , pass: 0.181 sec |
| 30 | * - (1st rev memcpy) no pass: 0.181 sec , pass: 0.180 sec |
| 31 | * - (1st rev matrix_range) no pass: 0.186 sec , pass: 0.184 sec |
| 32 | * (using p = 10 m = 10 n = 12 reps = 10000000) |
| 33 | * - (matrix_range version) no pass: 24.21 sec , pass: 23.97 sec |
| 34 | * - (memcpy version) no pass: 18.96 sec , pass: 18.39 sec |
| 35 | * - (original version) no pass: 23.45 sec , pass: 22.80 sec |
| 36 | * - rev 2100 no pass: 18.45 sec , pass: 18.35 sec |
| 37 | */ |
| 38 | double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) { |
| 39 | // create a large number of matrices |
| 40 | // p = number of matrices |
| 41 | // m = rows per matrix |
| 42 | // n = columns per matrix |
| 43 | // reps = number of repetitions |
| 44 | |
| 45 | // fill the matrices with identities |
| 46 | vector<const Matrix *> matrices; |
| 47 | for (size_t i=0; i<p;++i) { |
| 48 | Matrix * M = new Matrix; |
| 49 | (*M) = Matrix::Identity(rows: m,cols: n); |
| 50 | matrices.push_back(x: M); |
| 51 | } |
| 52 | |
| 53 | // start timing |
| 54 | Matrix result; |
| 55 | double elapsed; |
| 56 | { |
| 57 | gttic_(elapsed); |
| 58 | |
| 59 | if (passDims) |
| 60 | for (size_t i=0; i<reps; ++i) |
| 61 | result = collect(matrices, m, n); |
| 62 | else |
| 63 | for (size_t i=0; i<reps; ++i) |
| 64 | result = collect(matrices); |
| 65 | |
| 66 | gttoc_(elapsed); |
| 67 | tictoc_getNode(elapsedNode, elapsed); |
| 68 | elapsed = elapsedNode->secs(); |
| 69 | tictoc_reset_(); |
| 70 | } |
| 71 | // delete the matrices |
| 72 | for (size_t i=0; i<p;++i) { |
| 73 | delete matrices[i]; |
| 74 | } |
| 75 | |
| 76 | return elapsed; |
| 77 | //return elapsed/reps; |
| 78 | } |
| 79 | |
| 80 | /* |
| 81 | * Results: |
| 82 | * Alex's Machine: |
| 83 | * - Original : 0.60 sec (x1000) |
| 84 | * - 1st Rev : 0.49 sec (x1000) |
| 85 | * - rev 2100 : 0.52 sec (x1000) |
| 86 | */ |
| 87 | double timeVScaleColumn(size_t m, size_t n, size_t reps) { |
| 88 | // make a matrix to scale |
| 89 | Matrix M(m, n); |
| 90 | for (size_t i=0; i<m; ++i) |
| 91 | for (size_t j=0; j<n; ++j) |
| 92 | M(i,j) = 2*i+j; |
| 93 | |
| 94 | // make a vector to use for scaling |
| 95 | Vector V(m); |
| 96 | for (size_t i=0; i<m; ++i) |
| 97 | V(i) = i*2; |
| 98 | |
| 99 | double elapsed; |
| 100 | Matrix result; |
| 101 | { |
| 102 | gttic_(elapsed); |
| 103 | |
| 104 | for (size_t i=0; i<reps; ++i) |
| 105 | Matrix result = vector_scale(A: M,v: V); |
| 106 | |
| 107 | gttoc_(elapsed); |
| 108 | tictoc_getNode(elapsedNode, elapsed); |
| 109 | elapsed = elapsedNode->secs(); |
| 110 | tictoc_reset_(); |
| 111 | } |
| 112 | |
| 113 | return elapsed; |
| 114 | } |
| 115 | |
| 116 | /* |
| 117 | * Results: |
| 118 | * Alex's Machine: |
| 119 | * - Original : 0.54 sec (x1000) |
| 120 | * - 1st rev : 0.44 sec (x1000) |
| 121 | * - rev 2100 : 1.69 sec (x1000) |
| 122 | */ |
| 123 | double timeVScaleRow(size_t m, size_t n, size_t reps) { |
| 124 | // make a matrix to scale |
| 125 | Matrix M(m, n); |
| 126 | for (size_t i=0; i<m; ++i) |
| 127 | for (size_t j=0; j<n; ++j) |
| 128 | M(i,j) = 2*i+j; |
| 129 | |
| 130 | // make a vector to use for scaling |
| 131 | Vector V(n); |
| 132 | for (size_t i=0; i<n; ++i) |
| 133 | V(i) = i*2; |
| 134 | |
| 135 | double elapsed; |
| 136 | Matrix result; |
| 137 | { |
| 138 | gttic_(elapsed); |
| 139 | |
| 140 | for (size_t i=0; i<reps; ++i) |
| 141 | result = vector_scale(v: V,A: M); |
| 142 | |
| 143 | gttoc_(elapsed); |
| 144 | tictoc_getNode(elapsedNode, elapsed); |
| 145 | elapsed = elapsedNode->secs(); |
| 146 | tictoc_reset_(); |
| 147 | } |
| 148 | |
| 149 | return elapsed; |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * Results: |
| 154 | * Alex's Machine (reps = 200000) |
| 155 | * - ublas matrix_column : 4.63 sec |
| 156 | * - naive implementation : 4.70 sec |
| 157 | * |
| 158 | * reps = 2000000 |
| 159 | * - rev 2100 : 45.11 sec |
| 160 | */ |
| 161 | double timeColumn(size_t reps) { |
| 162 | // create a matrix |
| 163 | size_t m = 100; size_t n = 100; |
| 164 | Matrix M(m, n); |
| 165 | for (size_t i=0; i<m; ++i) |
| 166 | for (size_t j=0; j<n; ++j) |
| 167 | M(i,j) = 2*i+j; |
| 168 | |
| 169 | // extract a column |
| 170 | double elapsed; |
| 171 | Vector result; |
| 172 | { |
| 173 | gttic_(elapsed); |
| 174 | |
| 175 | for (size_t i=0; i<reps; ++i) |
| 176 | for (size_t j = 0; j<n; ++j) |
| 177 | //result = ublas::matrix_column<Matrix>(M, j); |
| 178 | result = column(A: M, j); |
| 179 | |
| 180 | gttoc_(elapsed); |
| 181 | tictoc_getNode(elapsedNode, elapsed); |
| 182 | elapsed = elapsedNode->secs(); |
| 183 | tictoc_reset_(); |
| 184 | } |
| 185 | return elapsed; |
| 186 | } |
| 187 | |
| 188 | /* |
| 189 | * Results |
| 190 | * Alex's machine |
| 191 | * |
| 192 | * Runs at reps = 500000 |
| 193 | * Baseline (no householder, just matrix copy) : 0.05 sec |
| 194 | * Initial : 8.20 sec |
| 195 | * All in one function : 7.89 sec |
| 196 | * Replace householder update with GSL, ATLAS : 0.92 sec |
| 197 | * |
| 198 | * Runs at reps = 2000000 |
| 199 | * Baseline (GSL/ATLAS householder update) : 3.61 sec |
| 200 | * |
| 201 | * Runs at reps = 5000000 |
| 202 | * Baseline : 8.76 sec |
| 203 | * GSL/Atlas version of updateAb : 9.03 sec // Why does this have an effect? |
| 204 | * Inlining house() : 6.33 sec |
| 205 | * Inlining householder_update [GSL] : 6.15 sec |
| 206 | * Rev 2100 : 5.75 sec |
| 207 | * |
| 208 | */ |
| 209 | double timeHouseholder(size_t reps) { |
| 210 | // create a matrix |
| 211 | Matrix Abase = (Matrix(4, 7) << |
| 212 | -5, 0, 5, 0, 0, 0, -1, |
| 213 | 00, -5, 0, 5, 0, 0, 1.5, |
| 214 | 10, 0, 0, 0,-10, 0, 2, |
| 215 | 00, 10, 0, 0, 0,-10, -1).finished(); |
| 216 | |
| 217 | // perform timing |
| 218 | double elapsed; |
| 219 | { |
| 220 | gttic_(elapsed); |
| 221 | |
| 222 | for (size_t i=0; i<reps; ++i) { |
| 223 | Matrix A = Abase; |
| 224 | householder_(A,k: 3); |
| 225 | } |
| 226 | |
| 227 | gttoc_(elapsed); |
| 228 | tictoc_getNode(elapsedNode, elapsed); |
| 229 | elapsed = elapsedNode->secs(); |
| 230 | tictoc_reset_(); |
| 231 | } |
| 232 | return elapsed; |
| 233 | } |
| 234 | /** |
| 235 | * Results: (Alex's machine) |
| 236 | * reps: 200000 |
| 237 | * |
| 238 | * Initial (boost matrix proxies) - 12.08 |
| 239 | * Direct pointer method - 4.62 |
| 240 | */ |
| 241 | double timeMatrixInsert(size_t reps) { |
| 242 | // create a matrix |
| 243 | Matrix bigBase = Matrix::Zero(rows: 100, cols: 100); |
| 244 | Matrix small = Matrix::Identity(rows: 5,cols: 5); |
| 245 | |
| 246 | // perform timing |
| 247 | double elapsed; |
| 248 | { |
| 249 | gttic_(elapsed); |
| 250 | |
| 251 | Matrix big = bigBase; |
| 252 | for (size_t rep=0; rep<reps; ++rep) |
| 253 | for (size_t i=0; i<100; i += 5) |
| 254 | for (size_t j=0; j<100; j += 5) |
| 255 | insertSub(fullMatrix&: big, subMatrix: small, i,j); |
| 256 | |
| 257 | gttoc_(elapsed); |
| 258 | tictoc_getNode(elapsedNode, elapsed); |
| 259 | elapsed = elapsedNode->secs(); |
| 260 | tictoc_reset_(); |
| 261 | } |
| 262 | return elapsed; |
| 263 | } |
| 264 | |
| 265 | int main(int argc, char ** argv) { |
| 266 | |
| 267 | // Time collect() |
| 268 | cout << "Starting Matrix::collect() Timing" << endl; |
| 269 | //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50; |
| 270 | size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; |
| 271 | double collect_time1 = timeCollect(p, m, n, passDims: false, reps); |
| 272 | double collect_time2 = timeCollect(p, m, n, passDims: true, reps); |
| 273 | cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; |
| 274 | cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; |
| 275 | |
| 276 | // Time vector_scale_column |
| 277 | cout << "Starting Matrix::vector_scale(column) Timing" << endl; |
| 278 | size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; |
| 279 | double vsColumn_time = timeVScaleColumn(m: m1, n: n1, reps: reps1); |
| 280 | cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; |
| 281 | |
| 282 | // Time vector_scale_row |
| 283 | cout << "Starting Matrix::vector_scale(row) Timing" << endl; |
| 284 | double vsRow_time = timeVScaleRow(m: m1, n: n1, reps: reps1); |
| 285 | cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; |
| 286 | |
| 287 | // Time column() NOTE: using the ublas version |
| 288 | cout << "Starting column() Timing" << endl; |
| 289 | size_t reps2 = 2000000; |
| 290 | double column_time = timeColumn(reps: reps2); |
| 291 | cout << "Time: " << column_time << " sec" << endl; |
| 292 | |
| 293 | // Time householder_ function |
| 294 | cout << "Starting householder_() Timing" << endl; |
| 295 | size_t reps_house = 5000000; |
| 296 | double house_time = timeHouseholder(reps: reps_house); |
| 297 | cout << "Elapsed time for householder_() : " << house_time << " sec" << endl; |
| 298 | |
| 299 | // Time matrix insertion |
| 300 | cout << "Starting insertSub() Timing" << endl; |
| 301 | size_t reps_insert = 200000; |
| 302 | double insert_time = timeMatrixInsert(reps: reps_insert); |
| 303 | cout << "Elapsed time for insertSub() : " << insert_time << " sec" << endl; |
| 304 | |
| 305 | return 0; |
| 306 | } |
| 307 | |