TimeTBB.cpp source code [codebrowser/examples/TimeTBB.cpp]

1	/ ----------------------------------------------------------------------------*
2
3	* GTSAM Copyright 2010, Georgia Tech Research Corporation,
4	* Atlanta, Georgia 30332-0415
5	* All Rights Reserved
6	* Authors: Frank Dellaert, et al. (see THANKS for the full author list)
7
8	* See LICENSE for the license information
9	* -------------------------------------------------------------------------- */
10
11	/**
12	* @file TimeTBB.cpp
13	* @brief Measure task scheduling overhead in TBB
14	* @author Richard Roberts
15	* @date November 6, 2013
16	*/
17
18	#include <gtsam/base/Matrix.h>
19	#include <gtsam/global_includes.h>
20
21	#include <algorithm>
22	#include <iostream>
23	#include <map>
24	#include <random>
25
26	using namespace std;
27	using namespace gtsam;
28
29	#ifdef GTSAM_USE_TBB
30
31	#include <tbb/blocked_range.h> // tbb::blocked_range
32	#include <tbb/tick_count.h> // tbb::tick_count
33	#include <tbb/parallel_for.h> // tbb::parallel_for
34	#include <tbb/cache_aligned_allocator.h> // tbb::cache_aligned_allocator
35	#include <tbb/task_arena.h> // tbb::task_arena
36	#include <tbb/task_group.h> // tbb::task_group
37
38	static const DenseIndex numberOfProblems = `1000000`;
39	static const DenseIndex problemSize = `4`;
40
41	typedef Eigen::Matrix<double, problemSize, problemSize> FixedMatrix;
42
43	/ ************************************************************************* /
44	struct ResultWithThreads
45	{
46	typedef map<int, double>::value_type value_type;
47	map<int, double> grainSizesWithoutAllocation;
48	map<int, double> grainSizesWithAllocation;
49	};
50
51	typedef map<int, ResultWithThreads> Results;
52
53	/ ************************************************************************* /
54	struct WorkerWithoutAllocation
55	{
56	vector<double>& results;
57
58	WorkerWithoutAllocation(vector<double>& results)
59	: results(results), gen(rd()), dis(-`1.0`, `1.0`) {}
60
61	// Copy constructor for TBB
62	WorkerWithoutAllocation(const WorkerWithoutAllocation& other)
63	: results(other.results), gen(rd()), dis(-`1.0`, `1.0`) {}
64
65	void operator()(const tbb::blocked_range<size_t>& r) const
66	{
67	for(size_t i = r.begin(); i != r.end(); ++i)
68	{
69	FixedMatrix m1;
70	FixedMatrix m2;
71	std::generate(m1.data(), m1.data() + m1.size(),
72	[&]() { return dis(gen); });
73	std::generate(m2.data(), m2.data() + m2.size(),
74	[&]() { return dis(gen); });
75	FixedMatrix prod = m1 * m2;
76	results[i] = prod.norm();
77	}
78	}
79	std::random_device rd;
80	mutable std::minstd_rand gen;
81	mutable std::uniform_real_distribution<double> dis;
82	};
83
84	/ ************************************************************************* /
85	map<int, double> testWithoutMemoryAllocation(int num_threads)
86	{
87	// A function to do some matrix operations without allocating any memory
88
89	// Create task_arena and task_group
90	tbb::task_arena arena(num_threads);
91	tbb::task_group tg;
92
93	// Now call it
94	vector<double> results(numberOfProblems);
95
96	const vector<size_t> grainSizes = {`1`, `10`, `100`, `1000`};
97	map<int, double> timingResults;
98	for(size_t grainSize: grainSizes)
99	{
100	tbb::tick_count t0 = tbb::tick_count::now();
101
102	// Run parallel code (as a task group) inside of task arena
103	arena.execute([&] {
104	tg.run_and_wait([&] {
105	tbb::parallel_for(
106	tbb::blocked_range<size_t>(`0`, numberOfProblems, grainSize),
107	WorkerWithoutAllocation(results));
108	});
109	});
110
111	tbb::tick_count t1 = tbb::tick_count::now();
112	cout << "Without memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
113	timingResults[(int)grainSize] = (t1 - t0).seconds();
114	}
115
116	return timingResults;
117	}
118
119	/ ************************************************************************* /
120	struct WorkerWithAllocation
121	{
122	vector<double>& results;
123
124	WorkerWithAllocation(vector<double>& results)
125	: results(results), gen(rd()), dis(-`1.0`, `1.0`) {}
126
127	// Copy constructor for TBB
128	WorkerWithAllocation(const WorkerWithAllocation& other)
129	: results(other.results), gen(rd()), dis(-`1.0`, `1.0`) {}
130
131	void operator()(const tbb::blocked_range<size_t>& r) const
132	{
133	tbb::cache_aligned_allocator<double> allocator;
134	for(size_t i = r.begin(); i != r.end(); ++i)
135	{
136	double m1data = allocator.allocate(problemSize problemSize);
137	Eigen::Map<Matrix> m1(m1data, problemSize, problemSize);
138	double m2data = allocator.allocate(problemSize problemSize);
139	Eigen::Map<Matrix> m2(m2data, problemSize, problemSize);
140	double proddata = allocator.allocate(problemSize problemSize);
141	Eigen::Map<Matrix> prod(proddata, problemSize, problemSize);
142
143	std::generate(m1.data(), m1.data() + m1.size(),
144	[&]() { return dis(gen); });
145	std::generate(m2.data(), m2.data() + m2.size(),
146	[&]() { return dis(gen); });
147	prod = m1 * m2;
148	results[i] = prod.norm();
149
150	allocator.deallocate(m1data, problemSize * problemSize);
151	allocator.deallocate(m2data, problemSize * problemSize);
152	allocator.deallocate(proddata, problemSize * problemSize);
153	}
154	}
155
156	std::random_device rd;
157	mutable std::minstd_rand gen;
158	mutable std::uniform_real_distribution<double> dis;
159	};
160
161	/ ************************************************************************* /
162	map<int, double> testWithMemoryAllocation(int num_threads)
163	{
164	// A function to do some matrix operations with allocating memory
165
166	// Create task_arena and task_group
167	tbb::task_arena arena(num_threads);
168	tbb::task_group tg;
169
170	// Now call it
171	vector<double> results(numberOfProblems);
172
173	const vector<size_t> grainSizes = {`1`, `10`, `100`, `1000`};
174	map<int, double> timingResults;
175	for(size_t grainSize: grainSizes)
176	{
177	tbb::tick_count t0 = tbb::tick_count::now();
178
179	// Run parallel code (as a task group) inside of task arena
180	arena.execute([&] {
181	tg.run_and_wait([&] {
182	tbb::parallel_for(
183	tbb::blocked_range<size_t>(`0`, numberOfProblems, grainSize),
184	WorkerWithAllocation(results));
185	});
186	});
187
188	tbb::tick_count t1 = tbb::tick_count::now();
189	cout << "With memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
190	timingResults[(int)grainSize] = (t1 - t0).seconds();
191	}
192
193	return timingResults;
194	}
195
196	/ ************************************************************************* /
197	int main(int argc, char* argv[])
198	{
199	cout << "numberOfProblems = " << numberOfProblems << endl;
200	cout << "problemSize = " << problemSize << endl;
201
202	const vector<int> numThreads = {`1`, `4`, `8`};
203	Results results;
204
205	for(size_t n: numThreads)
206	{
207	cout << "With " << n << " threads:" << endl;
208	results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((int)n);
209	results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation((int)n);
210	cout << endl;
211	}
212
213	cout << "Summary of results:" << endl;
214	for(const Results::value_type& threads_result: results)
215	{
216	const int threads = threads_result.first;
217	const ResultWithThreads& result = threads_result.second;
218	if(threads != `1`)
219	{
220	for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
221	{
222	const int grainsize = grainsize_time.first;
223	const double speedup = results[`1`].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
224	cout << threads << " threads, without allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
225	}
226	for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
227	{
228	const int grainsize = grainsize_time.first;
229	const double speedup = results[`1`].grainSizesWithAllocation[grainsize] / grainsize_time.second;
230	cout << threads << " threads, with allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
231	}
232	}
233	}
234
235	return `0`;
236	}
237
238	#else
239
240	/ ************************************************************************* /
241	int main(int argc, char* argv [])
242	{
243	cout << "GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
244	return `0`;
245	}
246
247	#endif
248

Browse the source code of codebrowser/examples/TimeTBB.cpp