Skip to content

Commit f3eefb8

Browse files
committed
Updated master project.
1 parent 7627c05 commit f3eefb8

13 files changed

+2404
-0
lines changed

Makefile

+16
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ MDRIVER_OBJS:= \
3333
libc_allocator.o \
3434
mdriver.o
3535

36+
BENCHMARKS:= cache-scratch.cpp cache-thrash.cpp larson.cpp linux-scalability.c
3637

3738
# Blank line ends list.
3839

@@ -61,6 +62,17 @@ pintool:
6162
mdriver: $(OBJS) $(MDRIVER_OBJS)
6263
$(CXX) $(LDFLAGS) $(OBJS) $(MDRIVER_OBJS) -o $@
6364

65+
benchmark: $(OBJS) wrapper.cpp
66+
for benchmark in $(BENCHMARKS); do \
67+
name=$${benchmark%.*}; \
68+
echo $(CXX) $(CFLAGS) -DMYMALLOC $(LDFLAGS) $(OBJS) benchmarks/$$benchmark -o $$name; \
69+
$(CXX) $(CFLAGS) -DMYMALLOC $(LDFLAGS) $(OBJS) benchmarks/$$benchmark -o $$name; \
70+
echo $(CXX) $(CFLAGS) -DMYMALLOC -DVALIDATE $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-validate; \
71+
$(CXX) $(CFLAGS) -DMYMALLOC -DVALIDATE $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-validate; \
72+
echo $(CXX) $(CFLAGS) $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-libc; \
73+
$(CXX) $(CFLAGS) $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-libc; \
74+
done
75+
6476
# compile objects
6577

6678
# pattern rule for building objects
@@ -81,4 +93,8 @@ run: $(TARGETS)
8193
# remove targets and .o files as well as output generated by CQ
8294
clean:
8395
$(RM) $(TARGETS) $(OBJS) $(MDRIVER_OBJS) *.std* .buildmode
96+
for benchmark in $(BENCHMARKS); do \
97+
name=$${benchmark%.*}; \
98+
$(RM) $$name $$name-libc $$name-validate; \
99+
done
84100
$(RM) tmp/*.out

benchmarks/README

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Modified for Fall 2011 by 6.172 Staff
2+
3+
Concurrent Memory Allocator Benchmarks
4+
5+
Emery Berger <[email protected]>
6+
http://www.cs.umass.edu/~emery
7+
8+
9+
This is the suite of concurrent benchmarks used in the paper "Hoard: A
10+
Scalable Memory Allocator for Multithreaded Applications". If you use
11+
these benchmarks, please cite that paper as follows:
12+
13+
@inproceedings{Berger:2000:HSM:378993.379232,
14+
author = {Berger, Emery D. and McKinley, Kathryn S. and Blumofe, Robert D. and Wilson, Paul R.},
15+
title = {Hoard: a scalable memory allocator for multithreaded applications},
16+
booktitle = {Proceedings of the ninth international conference on Architectural support for programming languages and operating systems},
17+
series = {ASPLOS-IX},
18+
year = {2000},
19+
isbn = {1-58113-317-0},
20+
location = {Cambridge, Massachusetts, United States},
21+
pages = {117--128},
22+
numpages = {12},
23+
url = {http://doi.acm.org/10.1145/378993.379232},
24+
doi = {http://doi.acm.org/10.1145/378993.379232},
25+
acmid = {379232},
26+
publisher = {ACM},
27+
address = {New York, NY, USA},
28+
}
29+
30+
Here is a brief description of the benchmarks, including usage and
31+
sample parameters. P denotes the number of processors (cores) in your
32+
system.
33+
34+
* cache-scratch:
35+
36+
This benchmark is referred to in the paper as "passive-false", and
37+
tests resilience against passive false sharing (see the paper for
38+
details).
39+
40+
Parameters: <threads> <inner-loop> <object-size> <iterations>
41+
42+
% cache-scratch 1 100 8 1000000
43+
% cache-scratch P 100 8 1000000
44+
45+
* cache-thrash:
46+
47+
This benchmark is referred to in the paper as "active-false", and
48+
also tests resilience against active false sharing (see the paper
49+
for details).
50+
51+
It uses the same parameters as cache-scratch.
52+
53+
* larson:
54+
55+
This benchmark is courtesy of Paul Larson at Microsoft Research. It
56+
simulates a server: each thread allocates and deallocates objects,
57+
and then transfers some objects (randomly selected) to other threads
58+
to be freed.
59+
60+
Parameters: <seconds> <min-obj-size> <max-obj-size> <objects> <iterations> <rng seed> <num-threads>
61+
62+
% larson 10 7 8 1000 10000 RAND P
63+
64+
65+
Additional benchmarks not in the original Hoard paper:
66+
67+
* linux-scalability:
68+
69+
This benchmark from the University of Michigan also tests allocator
70+
throughput.
71+
72+
Parameters: <object-size> <iterations> <number-of-threads>
73+
74+
% linux-scalability 8 10000000 P

benchmarks/cache-scratch.cpp

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
///-*-C++-*-//////////////////////////////////////////////////////////////////
2+
//
3+
// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
4+
// for Shared-Memory Multiprocessors
5+
// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
6+
//
7+
// This library is free software; you can redistribute it and/or modify
8+
// it under the terms of the GNU Library General Public License as
9+
// published by the Free Software Foundation, http://www.fsf.org.
10+
//
11+
// This library is distributed in the hope that it will be useful, but
12+
// WITHOUT ANY WARRANTY; without even the implied warranty of
13+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
// Library General Public License for more details.
15+
//
16+
//////////////////////////////////////////////////////////////////////////////
17+
18+
/**
19+
* @file cache-scratch.cpp
20+
*
21+
* cache-scratch is a benchmark that exercises a heap's cache-locality.
22+
* An allocator that allows multiple threads to re-use the same small
23+
* object (possibly all in one cache-line) will scale poorly, while
24+
* an allocator like Hoard will exhibit near-linear scaling.
25+
*
26+
* Try the following (on a P-processor machine):
27+
*
28+
* cache-scratch 1 1000 1 1000000
29+
* cache-scratch P 1000 1 1000000
30+
*
31+
* cache-scratch-hoard 1 1000 1 1000000
32+
* cache-scratch-hoard P 1000 1 1000000
33+
*
34+
* The ideal is a P-fold speedup.
35+
*
36+
* Modified for Fall 2011 by 6.172 Staff
37+
*/
38+
39+
40+
#include <stdio.h>
41+
#include <stdlib.h>
42+
43+
#include "fred.h"
44+
#include "cpuinfo.h"
45+
#include "timer.h"
46+
47+
#include "../wrapper.cpp"
48+
49+
// This class just holds arguments to each thread.
50+
class workerArg {
51+
public:
52+
workerArg (char * obj, int objSize, int repetitions, int iterations)
53+
: _object (obj),
54+
_objSize (objSize),
55+
_iterations (iterations),
56+
_repetitions (repetitions)
57+
{}
58+
59+
char * _object;
60+
int _objSize;
61+
int _iterations;
62+
int _repetitions;
63+
};
64+
65+
66+
#if defined(_WIN32)
67+
extern "C" void worker (void * arg)
68+
#else
69+
extern "C" void * worker (void * arg)
70+
#endif
71+
{
72+
// free the object we were given.
73+
// Then, repeatedly do the following:
74+
// malloc a given-sized object,
75+
// repeatedly write on it,
76+
// then free it.
77+
workerArg * w = (workerArg *) arg;
78+
CUSTOM_FREE(w->_object);
79+
for (int i = 0; i < w->_iterations; i++) {
80+
// Allocate the object.
81+
char * obj = (char *) CUSTOM_MALLOC(w->_objSize);
82+
// Write into it a bunch of times.
83+
for (int j = 0; j < w->_repetitions; j++) {
84+
for (int k = 0; k < w->_objSize; k++) {
85+
obj[k] = (char) k;
86+
volatile char ch = obj[k];
87+
ch++;
88+
}
89+
}
90+
// Free the object.
91+
CUSTOM_FREE(obj);
92+
}
93+
delete w;
94+
95+
end_thread();
96+
97+
#if !defined(_WIN32)
98+
return NULL;
99+
#endif
100+
}
101+
102+
103+
int main (int argc, char * argv[])
104+
{
105+
int nthreads;
106+
int iterations;
107+
int objSize;
108+
int repetitions;
109+
110+
if (argc > 4) {
111+
nthreads = atoi(argv[1]);
112+
iterations = atoi(argv[2]);
113+
objSize = atoi(argv[3]);
114+
repetitions = atoi(argv[4]);
115+
} else {
116+
fprintf (stderr, "Usage: %s nthreads iterations objSize repetitions\n", argv[0]);
117+
return 1;
118+
}
119+
120+
HL::Fred * threads = new HL::Fred[nthreads];
121+
HL::Fred::setConcurrency (HL::CPUInfo::getNumProcessors());
122+
123+
int i;
124+
125+
// Allocate nthreads objects and distribute them among the threads.
126+
char ** objs = (char **) CUSTOM_MALLOC(sizeof(char*) * nthreads);
127+
for (i = 0; i < nthreads; i++) {
128+
objs[i] = (char *) CUSTOM_MALLOC(objSize);
129+
}
130+
131+
HL::Timer t;
132+
t.start();
133+
134+
for (i = 0; i < nthreads; i++) {
135+
workerArg * w = new workerArg (objs[i], objSize, repetitions / nthreads, iterations);
136+
threads[i].create (&worker, (void *) w);
137+
}
138+
for (i = 0; i < nthreads; i++) {
139+
threads[i].join();
140+
}
141+
t.stop();
142+
143+
delete [] threads;
144+
CUSTOM_FREE(objs);
145+
146+
printf ("Time elapsed = %f seconds.\n", (double) t);
147+
end_program();
148+
return 0;
149+
}

benchmarks/cache-thrash.cpp

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
///-*-C++-*-//////////////////////////////////////////////////////////////////
2+
//
3+
// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
4+
// for Shared-Memory Multiprocessors
5+
// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
6+
//
7+
// Copyright (c) 1998-2003, The University of Texas at Austin.
8+
//
9+
// This library is free software; you can redistribute it and/or modify
10+
// it under the terms of the GNU Library General Public License as
11+
// published by the Free Software Foundation, http://www.fsf.org.
12+
//
13+
// This library is distributed in the hope that it will be useful, but
14+
// WITHOUT ANY WARRANTY; without even the implied warranty of
15+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16+
// Library General Public License for more details.
17+
//
18+
//////////////////////////////////////////////////////////////////////////////
19+
20+
/**
21+
* @file cache-thrash.cpp
22+
* @brief cache-thrash is a benchmark that exercises a heap's cache-locality.
23+
*
24+
* Try the following (on a P-processor machine):
25+
*
26+
* cache-thrash 1 1000 1 1000000
27+
* cache-thrash P 1000 1 1000000
28+
*
29+
* cache-thrash-hoard 1 1000 1 1000000
30+
* cache-thrash-hoard P 1000 1 1000000
31+
*
32+
* The ideal is a P-fold speedup.
33+
*
34+
* Modified for Fall 2011 by 6.172 Staff
35+
*/
36+
37+
38+
#include <iostream>
39+
#include <stdlib.h>
40+
#include <stdio.h>
41+
using namespace std;
42+
43+
#include "cpuinfo.h"
44+
#include "fred.h"
45+
#include "timer.h"
46+
47+
#include "../wrapper.cpp"
48+
49+
// This class just holds arguments to each thread.
50+
class workerArg {
51+
public:
52+
workerArg (size_t objSize, int repetitions, int iterations)
53+
: _objSize (objSize),
54+
_iterations (iterations),
55+
_repetitions (repetitions)
56+
{}
57+
58+
size_t _objSize;
59+
int _iterations;
60+
int _repetitions;
61+
};
62+
63+
64+
#if defined(_WIN32)
65+
extern "C" void worker (void * arg)
66+
#else
67+
extern "C" void * worker (void * arg)
68+
#endif
69+
{
70+
// Repeatedly do the following:
71+
// malloc a given-sized object,
72+
// repeatedly write on it,
73+
// then free it.
74+
workerArg * w = (workerArg *) arg;
75+
for (int i = 0; i < w->_iterations; i++) {
76+
// Allocate the object.
77+
char * obj = (char *) CUSTOM_MALLOC(w->_objSize);
78+
// Write into it a bunch of times.
79+
for (int j = 0; j < w->_repetitions; j++) {
80+
for (int k = 0; k < w->_objSize; k++) {
81+
obj[k] = (char) k;
82+
volatile char ch = obj[k];
83+
ch++;
84+
}
85+
}
86+
// Free the object.
87+
CUSTOM_FREE(obj);
88+
}
89+
delete w;
90+
end_thread();
91+
#if !defined(_WIN32)
92+
return NULL;
93+
#endif
94+
}
95+
96+
int main (int argc, char * argv[])
97+
{
98+
int nthreads;
99+
int iterations;
100+
int objSize;
101+
int repetitions;
102+
103+
if (argc > 4) {
104+
nthreads = atoi(argv[1]);
105+
iterations = atoi(argv[2]);
106+
objSize = atoi(argv[3]);
107+
repetitions = atoi(argv[4]);
108+
} else {
109+
cerr << "Usage: " << argv[0] << " nthreads iterations objSize repetitions" << endl;
110+
exit(1);
111+
}
112+
113+
HL::Fred * threads = new HL::Fred[nthreads];
114+
HL::Fred::setConcurrency (HL::CPUInfo::getNumProcessors());
115+
116+
int i;
117+
118+
HL::Timer t;
119+
t.start();
120+
121+
for (i = 0; i < nthreads; i++) {
122+
workerArg * w = new workerArg (objSize, repetitions / nthreads, iterations);
123+
threads[i].create (&worker, (void *) w);
124+
}
125+
for (i = 0; i < nthreads; i++) {
126+
threads[i].join();
127+
}
128+
t.stop();
129+
130+
delete [] threads;
131+
132+
cout << "Time elapsed = " << (double) t << " seconds." << endl;
133+
end_program();
134+
}

0 commit comments

Comments
 (0)