rishabhkabra
diff --git a/‎Makefile
+16 b/‎Makefile
+16
diff --git a/‎benchmarks/README
+74 b/‎benchmarks/README
+74
diff --git a/‎benchmarks/cache-scratch.cpp
+149 b/‎benchmarks/cache-scratch.cpp
+149
diff --git a/‎benchmarks/cache-thrash.cpp
+134 b/‎benchmarks/cache-thrash.cpp
+134
@@ -33,6 +33,7 @@ MDRIVER_OBJS:= \
 	libc_allocator.o \
 	mdriver.o
 
+BENCHMARKS:= cache-scratch.cpp cache-thrash.cpp larson.cpp linux-scalability.c
 
 # Blank line ends list.
 
@@ -61,6 +62,17 @@ pintool:
 mdriver: $(OBJS) $(MDRIVER_OBJS)
 	$(CXX) $(LDFLAGS) $(OBJS) $(MDRIVER_OBJS) -o $@
 
+benchmark: $(OBJS) wrapper.cpp
+	for benchmark in $(BENCHMARKS); do \
+		name=$${benchmark%.*}; \
+		echo $(CXX) $(CFLAGS) -DMYMALLOC $(LDFLAGS) $(OBJS) benchmarks/$$benchmark -o $$name; \
+		$(CXX) $(CFLAGS) -DMYMALLOC $(LDFLAGS) $(OBJS) benchmarks/$$benchmark -o $$name; \
+		echo $(CXX) $(CFLAGS) -DMYMALLOC -DVALIDATE $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-validate; \
+		$(CXX) $(CFLAGS) -DMYMALLOC -DVALIDATE $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-validate; \
+		echo $(CXX) $(CFLAGS) $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-libc; \
+		$(CXX) $(CFLAGS) $(LDFLAGS) benchmarks/$$benchmark $(OBJS) -o $$name-libc; \
+	done
+
 # compile objects
 
 # pattern rule for building objects
@@ -81,4 +93,8 @@ run: $(TARGETS)
 # remove targets and .o files as well as output generated by CQ
 clean:
 	$(RM) $(TARGETS) $(OBJS) $(MDRIVER_OBJS) *.std* .buildmode
+	for benchmark in $(BENCHMARKS); do \
+		name=$${benchmark%.*}; \
+		$(RM) $$name $$name-libc $$name-validate; \
+	done
 	$(RM) tmp/*.out
@@ -0,0 +1,74 @@
+// Modified for Fall 2011 by 6.172 Staff
+
+Concurrent Memory Allocator Benchmarks
+
+Emery Berger <[email protected]>
+http://www.cs.umass.edu/~emery
+
+
+This is the suite of concurrent benchmarks used in the paper "Hoard: A
+Scalable Memory Allocator for Multithreaded Applications". If you use
+these benchmarks, please cite that paper as follows:
+
+@inproceedings{Berger:2000:HSM:378993.379232,
+ author = {Berger, Emery D. and McKinley, Kathryn S. and Blumofe, Robert D. and Wilson, Paul R.},
+ title = {Hoard: a scalable memory allocator for multithreaded applications},
+ booktitle = {Proceedings of the ninth international conference on Architectural support for programming languages and operating systems},
+ series = {ASPLOS-IX},
+ year = {2000},
+ isbn = {1-58113-317-0},
+ location = {Cambridge, Massachusetts, United States},
+ pages = {117--128},
+ numpages = {12},
+ url = {http://doi.acm.org/10.1145/378993.379232},
+ doi = {http://doi.acm.org/10.1145/378993.379232},
+ acmid = {379232},
+ publisher = {ACM},
+ address = {New York, NY, USA},
+}
+
+Here is a brief description of the benchmarks, including usage and
+sample parameters.  P denotes the number of processors (cores) in your
+system.
+
+* cache-scratch:
+
+  This benchmark is referred to in the paper as "passive-false", and
+  tests resilience against passive false sharing (see the paper for
+  details).
+
+  Parameters: <threads> <inner-loop> <object-size> <iterations>
+
+  % cache-scratch 1 100 8 1000000
+  % cache-scratch P 100 8 1000000
+
+* cache-thrash:
+
+  This benchmark is referred to in the paper as "active-false", and
+  also tests resilience against active false sharing (see the paper
+  for details).
+
+  It uses the same parameters as cache-scratch.
+
+* larson:
+
+  This benchmark is courtesy of Paul Larson at Microsoft Research. It
+  simulates a server: each thread allocates and deallocates objects,
+  and then transfers some objects (randomly selected) to other threads
+  to be freed.
+
+  Parameters: <seconds> <min-obj-size> <max-obj-size> <objects> <iterations> <rng seed> <num-threads>
+
+  % larson 10 7 8 1000 10000 RAND P
+
+
+Additional benchmarks not in the original Hoard paper:
+
+* linux-scalability:
+
+  This benchmark from the University of Michigan also tests allocator
+  throughput.
+
+  Parameters: <object-size> <iterations> <number-of-threads>
+
+  % linux-scalability 8 10000000 P
@@ -0,0 +1,149 @@
+///-*-C++-*-//////////////////////////////////////////////////////////////////
+//
+// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
+//        for Shared-Memory Multiprocessors
+// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Library General Public License as
+// published by the Free Software Foundation, http://www.fsf.org.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Library General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * @file cache-scratch.cpp
+ *
+ * cache-scratch is a benchmark that exercises a heap's cache-locality.
+ * An allocator that allows multiple threads to re-use the same small
+ * object (possibly all in one cache-line) will scale poorly, while
+ * an allocator like Hoard will exhibit near-linear scaling.
+ *
+ * Try the following (on a P-processor machine):
+ *
+ *  cache-scratch 1 1000 1 1000000
+ *  cache-scratch P 1000 1 1000000
+ *
+ *  cache-scratch-hoard 1 1000 1 1000000
+ *  cache-scratch-hoard P 1000 1 1000000
+ *
+ *  The ideal is a P-fold speedup.
+ *
+ *  Modified for Fall 2011 by 6.172 Staff
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "fred.h"
+#include "cpuinfo.h"
+#include "timer.h"
+
+#include "../wrapper.cpp"
+
+// This class just holds arguments to each thread.
+class workerArg {
+public:
+  workerArg (char * obj, int objSize, int repetitions, int iterations)
+    : _object (obj),
+      _objSize (objSize),
+      _iterations (iterations),
+      _repetitions (repetitions)
+  {}
+
+  char * _object;
+  int _objSize;
+  int _iterations;
+  int _repetitions;
+};
+
+
+#if defined(_WIN32)
+extern "C" void worker (void * arg)
+#else
+extern "C" void * worker (void * arg)
+#endif
+{
+  // free the object we were given.
+  // Then, repeatedly do the following:
+  //   malloc a given-sized object,
+  //   repeatedly write on it,
+  //   then free it.
+  workerArg * w = (workerArg *) arg;
+  CUSTOM_FREE(w->_object);
+  for (int i = 0; i < w->_iterations; i++) {
+    // Allocate the object.
+    char * obj = (char *) CUSTOM_MALLOC(w->_objSize);
+    // Write into it a bunch of times.
+    for (int j = 0; j < w->_repetitions; j++) {
+      for (int k = 0; k < w->_objSize; k++) {
+	obj[k] = (char) k;
+	volatile char ch = obj[k];
+	ch++;
+      }
+    }
+    // Free the object.
+    CUSTOM_FREE(obj);
+  }
+  delete w;
+
+  end_thread();
+
+#if !defined(_WIN32)
+  return NULL;
+#endif
+}
+
+
+int main (int argc, char * argv[])
+{
+  int nthreads;
+  int iterations;
+  int objSize;
+  int repetitions;
+
+  if (argc > 4) {
+    nthreads = atoi(argv[1]);
+    iterations = atoi(argv[2]);
+    objSize = atoi(argv[3]);
+    repetitions = atoi(argv[4]);
+  } else {
+    fprintf (stderr, "Usage: %s nthreads iterations objSize repetitions\n", argv[0]);
+    return 1;
+  }
+
+  HL::Fred * threads = new HL::Fred[nthreads];
+  HL::Fred::setConcurrency (HL::CPUInfo::getNumProcessors());
+
+  int i;
+
+  // Allocate nthreads objects and distribute them among the threads.
+  char ** objs = (char **) CUSTOM_MALLOC(sizeof(char*) * nthreads);
+  for (i = 0; i < nthreads; i++) {
+    objs[i] = (char *) CUSTOM_MALLOC(objSize);
+  }
+
+  HL::Timer t;
+  t.start();
+
+  for (i = 0; i < nthreads; i++) {
+    workerArg * w = new workerArg (objs[i], objSize, repetitions / nthreads, iterations);
+    threads[i].create (&worker, (void *) w);
+  }
+  for (i = 0; i < nthreads; i++) {
+    threads[i].join();
+  }
+  t.stop();
+
+  delete [] threads;
+  CUSTOM_FREE(objs);
+
+  printf ("Time elapsed = %f seconds.\n", (double) t);
+  end_program();
+  return 0;
+}
@@ -0,0 +1,134 @@
+///-*-C++-*-//////////////////////////////////////////////////////////////////
+//
+// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
+//        for Shared-Memory Multiprocessors
+// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
+//
+// Copyright (c) 1998-2003, The University of Texas at Austin.
+//
+// This library is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Library General Public License as
+// published by the Free Software Foundation, http://www.fsf.org.
+//
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Library General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * @file  cache-thrash.cpp
+ * @brief cache-thrash is a benchmark that exercises a heap's cache-locality.
+ *
+ * Try the following (on a P-processor machine):
+ *
+ *  cache-thrash 1 1000 1 1000000
+ *  cache-thrash P 1000 1 1000000
+ *
+ *  cache-thrash-hoard 1 1000 1 1000000
+ *  cache-thrash-hoard P 1000 1 1000000
+ *
+ *  The ideal is a P-fold speedup.
+ *
+ * Modified for Fall 2011 by 6.172 Staff
+*/
+
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+using namespace std;
+
+#include "cpuinfo.h"
+#include "fred.h"
+#include "timer.h"
+
+#include "../wrapper.cpp"
+
+// This class just holds arguments to each thread.
+class workerArg {
+public:
+  workerArg (size_t objSize, int repetitions, int iterations)
+    : _objSize (objSize),
+      _iterations (iterations),
+      _repetitions (repetitions)
+  {}
+
+  size_t _objSize;
+  int _iterations;
+  int _repetitions;
+};
+
+
+#if defined(_WIN32)
+extern "C" void worker (void * arg)
+#else
+extern "C" void * worker (void * arg)
+#endif
+{
+  // Repeatedly do the following:
+  //   malloc a given-sized object,
+  //   repeatedly write on it,
+  //   then free it.
+  workerArg * w = (workerArg *) arg;
+  for (int i = 0; i < w->_iterations; i++) {
+    // Allocate the object.
+    char * obj = (char *) CUSTOM_MALLOC(w->_objSize);
+    // Write into it a bunch of times.
+    for (int j = 0; j < w->_repetitions; j++) {
+      for (int k = 0; k < w->_objSize; k++) {
+	obj[k] = (char) k;
+	volatile char ch = obj[k];
+	ch++;
+      }
+    }
+    // Free the object.
+    CUSTOM_FREE(obj);
+  }
+  delete w;
+  end_thread();
+#if !defined(_WIN32)
+  return NULL;
+#endif
+}
+
+int main (int argc, char * argv[])
+{
+  int nthreads;
+  int iterations;
+  int objSize;
+  int repetitions;
+
+  if (argc > 4) {
+    nthreads = atoi(argv[1]);
+    iterations = atoi(argv[2]);
+    objSize = atoi(argv[3]);
+    repetitions = atoi(argv[4]);
+  } else {
+    cerr << "Usage: " << argv[0] << " nthreads iterations objSize repetitions" << endl;
+    exit(1);
+  }
+
+  HL::Fred * threads = new HL::Fred[nthreads];
+  HL::Fred::setConcurrency (HL::CPUInfo::getNumProcessors());
+
+  int i;
+
+  HL::Timer t;
+  t.start();
+
+  for (i = 0; i < nthreads; i++) {
+    workerArg * w = new workerArg (objSize, repetitions / nthreads, iterations);
+    threads[i].create (&worker, (void *) w);
+  }
+  for (i = 0; i < nthreads; i++) {
+    threads[i].join();
+  }
+  t.stop();
+
+  delete [] threads;
+
+  cout << "Time elapsed = " << (double) t << " seconds." << endl;
+  end_program();
+}