Performance comparison - Thinking in C++

To get a better feel for the differences between the sequence containers, it’s illuminating to race them against each other while performing various operations.

//: C04:SequencePerformance.cpp

// Comparing the performance of the basic // sequence containers for various operations

#include <vector>

#include <queue>

#include <list>

#include <iostream>

#include <string>

#include <typeinfo>

#include <ctime>

#include <cstdlib>

using namespace std;

class FixedSize { int x[20];

// Automatic generation of default constructor, // copy-constructor and operator=

} fs;

template<class Cont>

struct InsertBack {

void operator()(Cont& c, long count) { for(long i = 0; i < count; i++) c.push_back(fs);

}

char* testName() { return "InsertBack"; } };

template<class Cont>

struct InsertFront {

void operator()(Cont& c, long count) { long cnt = count * 10;

for(long i = 0; i < cnt; i++) c.push_front(fs);

}

char* testName() { return "InsertFront"; } };

template<class Cont>

struct InsertMiddle {

void operator()(Cont& c, long count) { typename Cont::iterator it;

long cnt = count / 10;

for(long i = 0; i < cnt; i++) {

// Must get the iterator every time to keep // from causing an access violation with // vector. Increment it to put it in the // middle of the container:

it = c.begin();

it++;

c.insert(it, fs);

} }

char* testName() { return "InsertMiddle"; } };

template<class Cont>

struct RandomAccess { // Not for list void operator()(Cont& c, long count) { int sz = c.size();

long cnt = count * 100;

for(long i = 0; i < cnt; i++) c[rand() % sz];

}

char* testName() { return "RandomAccess"; } };

template<class Cont>

struct Traversal {

void operator()(Cont& c, long count) { long cnt = count / 100;

for(long i = 0; i < cnt; i++) {

typename Cont::iterator it = c.begin(), end = c.end();

while(it != end) it++;

} }

char* testName() { return "Traversal"; } };

template<class Cont>

struct Swap {

void operator()(Cont& c, long count) { int middle = c.size() / 2;

typename Cont::iterator it = c.begin(), mid = c.begin();

it++; // Put it in the middle

for(int x = 0; x < middle + 1; x++) mid++;

long cnt = count * 10;

for(long i = 0; i < cnt; i++) swap(*it, *mid);

}

char* testName() { return "Swap"; } };

template<class Cont>

struct RemoveMiddle {

void operator()(Cont& c, long count) { long cnt = count / 10;

if(cnt > c.size()) {

cout << "RemoveMiddle: not enough elements"

<< endl;

return;

}

for(long i = 0; i < cnt; i++) {

typename Cont::iterator it = c.begin();

it++;

c.erase(it);

} }

char* testName() { return "RemoveMiddle"; } };

template<class Cont>

struct RemoveBack {

void operator()(Cont& c, long count) { long cnt = count * 10;

if(cnt > c.size()) {

cout << "RemoveBack: not enough elements"

<< endl;

return;

}

for(long i = 0; i < cnt; i++) c.pop_back();

}

char* testName() { return "RemoveBack"; } };

template<class Op, class Container>

void measureTime(Op f, Container& c, long count){

string id(typeid(f).name());

bool Deque = id.find("deque") != string::npos;

bool List = id.find("list") != string::npos;

bool Vector = id.find("vector") !=string::npos;

string cont = Deque ? "deque" : List ? "list"

: Vector? "vector" : "unknown";

cout << f.testName() << " for " << cont << ": ";

// Standard C library CPU ticks:

clock_t ticks = clock();

f(c, count); // Run the test ticks = clock() - ticks;

cout << ticks << endl;

}

typedef deque<FixedSize> DF;

typedef list<FixedSize> LF;

typedef vector<FixedSize> VF;

int main(int argc, char* argv[]) { srand(time(0));

long count = 1000;

if(argc >= 2) count = atoi(argv[1]);

DF deq;

LF lst;

VF vec, vecres;

vecres.reserve(count); // Preallocate storage measureTime(InsertBack<VF>(), vec, count);

measureTime(InsertBack<VF>(), vecres, count);

measureTime(InsertBack<DF>(), deq, count);

measureTime(InsertBack<LF>(), lst, count);

// Can't push_front() with a vector:

//! measureTime(InsertFront<VF>(), vec, count);

measureTime(InsertFront<DF>(), deq, count);

measureTime(InsertFront<LF>(), lst, count);

measureTime(InsertMiddle<VF>(), vec, count);

measureTime(InsertMiddle<DF>(), deq, count);

measureTime(InsertMiddle<LF>(), lst, count);

measureTime(RandomAccess<VF>(), vec, count);

measureTime(RandomAccess<DF>(), deq, count);

// Can't operator[] with a list:

//! measureTime(RandomAccess<LF>(), lst, count);

measureTime(Traversal<VF>(), vec, count);

measureTime(Traversal<DF>(), deq, count);

measureTime(Traversal<LF>(), lst, count);

measureTime(Swap<VF>(), vec, count);

measureTime(Swap<DF>(), deq, count);

measureTime(Swap<LF>(), lst, count);

measureTime(RemoveMiddle<VF>(), vec, count);

measureTime(RemoveMiddle<DF>(), deq, count);

measureTime(RemoveMiddle<LF>(), lst, count);

vec.resize(vec.size() * 10); // Make it bigger measureTime(RemoveBack<VF>(), vec, count);

measureTime(RemoveBack<DF>(), deq, count);

measureTime(RemoveBack<LF>(), lst, count);

} ///:~

This example makes heavy use of templates to eliminate redundancy, save space, guarantee identical code and improve clarity. Each test is represented by a class that is templatized on the container it will operate on. The test itself is inside the operator( ) which, in each case, takes a reference to the container and a repeat count – this count is not always used exactly as it is, but sometimes increased or decreased to prevent the test from being too short or too long.

The repeat count is just a factor, and all tests are compared using the same value.

Each test class also has a member function that returns its name, so that it can easily be printed. You might think that this should be accomplished using run-time type identification, but since the actual name of the class involves a template expansion, this turns out to be the more direct approach.

The measureTime( ) function template takes as its first template argument the operation that it’s going to test – which is itself a class template selected from the group defined previously in the listing. The template argument Op will not only contain the name of the class, but also (decorated into it) the type of the container it’s working with. The RTTI typeid( ) operation allows the name of the class to be extracted as a char*, which can then be used to create a string called id. This string can be searched using string::find( ) to look for deque, list or vector. The bool variable that corresponds to the matching string becomes true, and this is used to properly initialize the string cont so the container name can be accurately printed, along with the test name.

Once the type of test and the container being tested has been printed out, the actual test is quite simple. The Standard C library function clock( ) is used to capture the starting and ending CPU ticks (this is typically more fine-grained than trying to measure seconds). Since f is an object of type Op, which is a class that has an operator( ), the line:

f(c, count);

is actually calling the operator( ) for the object f.

In main( ), you can see that each different type of test is run on each type of container, except for the containers that don’t support the particular operation being tested (these are

commented out).

When you run the program, you’ll get comparative performance numbers for your particular compiler and your particular operating system and platform. Although this is only intended to give you a feel for the various performance features relative to the other sequences, it is not a bad way to get a quick-and-dirty idea of the behavior of your library, and also to compare one library with another.

set

The set produces a container that will accept only one of each thing you place in it; it also sorts the elements (sorting isn’t intrinsic to the conceptual definition of a set, but the STL set stores its elements in a balanced binary tree to provide rapid lookups, thus producing sorted results when you traverse it). The first two examples in this chapter used sets.

Consider the problem of creating an index for a book. You might like to start with all the words in the book, but you only want one instance of each word and you want them sorted. Of course, a set is perfect for this, and solves the problem effortlessly. However, there’s also the problem of punctuation and any other non-alpha characters, which must be stripped off to generate proper words. One solution to this problem is to use the Standard C library function strtok( ), which produces tokens (in our case, words) given a set of delimiters to strip out:

//: C04:WordList.cpp

// Display a list of words used in a document

#include "../require.h"

#include <string>

#include <cstring>

#include <set>

#include <iostream>

#include <fstream>

using namespace std;

const char* delimiters =

" \t;()\"<>:{}[]+-=&*#.,/\\~";

int main(int argc, char* argv[]) { requireArgs(argc, 1);

ifstream in(argv[1]);

assure(in, argv[1]);

set<string> wordlist;

string line;

while(getline(in, line)) { // Capture individual words:

char* s = // Cast probably won’t crash:

strtok((char*)line.c_str(), delimiters);

while(s) {

// Automatic type conversion:

wordlist.insert(s);

s = strtok(0, delimiters);

} }

// Output results:

copy(wordlist.begin(), wordlist.end(), ostream_iterator<string>(cout, "\n"));

} ///:~

strtok( ) takes the starting address of a character buffer (the first argument) and looks for delimiters (the second argument). It replaces the delimiter with a zero, and returns the address of the beginning of the token. If you call it subsequent times with a first argument of zero it will continue extracting tokens from the rest of the string until it finds the end. In this case, the delimiters are those that delimit the keywords and identifiers of C++, so it extracts these keywords and identifiers. Each word is turned into a string and placed into the wordlist vector, which eventually contains the whole file, broken up into words.

You don’t have to use a set just to get a sorted sequence. You can use the sort( ) function (along with a multitude of other functions in the STL) on different STL containers. However, it’s likely that set will be faster.

In document Thinking in C++ (Page 194-200)