By Joel Yliluoma, March 2010
Phase 0 : C code example
Hello everyone, I am Bisqwit.
The topic of today's video is C++.
This video assumes that you already have some background in C programming.
#include <stdio.h>
#include <stdlib.h>
struct list_node
{
struct list_node* prev, *next;
int value;
};
struct list
{
struct list_node* first, *last;
};
void list_insert(l, where, v)
struct list *l;
struct list_node *where;
int v;
{
}
For convenience's sake, I go by ANSI C.
Nobody, who really still uses K&R, can seriously
claim that they can't read ANSI C.
ANSI C has been around since 1989 anyway.
void list_insert(struct list *l, struct list_node *where, int v)
{
}
This is an example C program that implements
a bidirectional linked list and demonstrates some basic operations.
#include <stdio.h>
#include <stdlib.h>
struct list_node
{
struct list_node* prev, *next;
int value;
};
struct list
{
struct list_node* first, *last;
};
void list_node_init(struct list_node *n, int v) { n->value = v; }
struct list_node *list_begin(struct list *l) { return l->first; }
struct list_node *list_end(struct list *l) { return NULL; }
struct list_node *list_advance(struct list_node *i) { return i->next; }
void list_init(struct list *l) { l->first = NULL; l->last = NULL; }
void list_insert(struct list *l, struct list_node *where, int v)
{
struct list_node *newnode = (struct list_node*)malloc(sizeof(*newnode));
list_node_init(newnode, v);
newnode->prev = where ? where->prev : l->last;
newnode->next = where;
if(newnode->prev) newnode->prev->next = newnode;
if(where) where->prev = newnode;
if(where == l->first) l->first = newnode;
if(!where) l->last = newnode;
}
void list_push_front(struct list *l, int v) { list_insert(l,list_begin(l), v); }
void list_push_back(struct list *l, int v) { list_insert(l,list_end(l), v); }
void list_erase(struct list *l, struct list_node *where)
{
struct list_node *prev = where->prev;
struct list_node *next = where->next;
if(prev) prev->next = next;
if(next) next->prev = prev;
if(where == l->first) l->first = next;
if(where == l->last) l->last = prev;
free(where);
}
void list_clear(struct list *l) { while(l->first) list_erase(l, l->first); }
void list_done(struct list *l) { list_clear(l); }
int main(void)
{
struct list mylist;
struct list_node *i;
list_init(&mylist);
list_push_back(&mylist, 1);
list_push_back(&mylist, 5);
list_push_front(&mylist, 8);
list_push_back(&mylist, 3);
for(i = list_begin(&mylist); i != list_end(&mylist); i = list_advance(i))
printf("%d ", i->value);
printf("\n");
list_done(&mylist);
return 0;
}
A perfectly rationable implementation.
And it compiles and runs cleanly.
Phase 1 : Transform into C++ code
C++ was originally called "C with classes", that's how it began.
Now a class in C++ is just the same thing as the struct.
class is struct.
The big deal with classes is that they may contain "methods".
A method is a function, declared within the struct,
for dealing with the struct's data.
struct list_node
{
struct list_node* prev, *next;
int value;
void init(int v) { value = v; }
struct list_node* advance() { return next; }
};
These functions automatically get an invisible parameter,
a pointer to the struct instance itself, called "
this".
This is why they can use the struct members without a pointer.
"
this" can be written explicitly in the code,
if one chooses to, but for now, let's do without.
struct list
{
struct list_node* first, *last;
struct list_node* begin() { return first; }
struct list_node* end() { return NULL; }
void init() { first = NULL; last = NULL; }
void insert(struct list_node* where, int v)
{
struct list_node* newnode = (struct list_node*)malloc(sizeof(*newnode));
newnode->init(v);
newnode->prev = where ? where->prev : last;
newnode->next = where;
if(newnode->prev) newnode->prev->next = newnode;
if(where) where->prev = newnode;
if(where == first) first = newnode;
if(!where) last = newnode;
}
void push_front(int v) { insert(begin(), v); }
void push_back(int v) { insert(end(), v); }
void erase(struct list_node* where)
{
struct list_node* prev = where->prev;
struct list_node* next = where->next;
if(prev) prev->next = next;
if(next) next->prev = prev;
if(where == first) first = next;
if(where == last) last = prev;
free(where);
}
void clear() { while(first) erase(first); }
void done() { clear(); }
};
Compared to the equivalent C code, this results in significant
simplifications that make the code both easier to read and write.
int main(void)
{
struct list mylist;
struct list_node* i;
mylist.init();
mylist.push_back(1);
mylist.push_back(5);
mylist.push_front(8);
mylist.push_back(3);
for(i = mylist.begin(); i != mylist.end(); i = i->advance())
printf("%d ", i->value);
printf("\n");
mylist.done();
return 0;
}
Phase 2 : Scoping
Structs can also be nested. This affects scoping & namespaces;
where the name can be used and where it is visible.
Such scoping is an effective way to reduce namespace pollution.
struct list
{
struct node
{
struct node* prev, *next;
int value;
void init(int v) { value = v; }
struct node* advance() { return next; }
};
struct node* first, *last;
struct node* begin() { return first; }
struct node* end() { return NULL; }
void init() { first = NULL; last = NULL; }
void insert(struct node* where, int v)
{
struct node* newnode = (struct node*)malloc(sizeof(*newnode));
newnode->init(v);
newnode->prev = where ? where->prev : last;
newnode->next = where;
if(newnode->prev) newnode->prev->next = newnode;
if(where) where->prev = newnode;
if(where == first) first = newnode;
if(!where) last = newnode;
}
void push_front(int v) { insert(begin(), v); }
void push_back(int v) { insert(end(), v); }
void erase(struct node* where)
{
struct node* prev = where->prev;
struct node* next = where->next;
if(prev) prev->next = next;
if(next) next->prev = prev;
if(where == first) first = next;
if(where == last) last = prev;
free(where);
}
void clear() { while(first) erase(first); }
void done() { clear(); }
};
Also, because scoped names are more relevant to the context
they are visible in, they can be made shorter.
(Observe how "list_node" was renamed just "node" here.)
int main(void)
{
struct list mylist;
mylist.init();
mylist.push_back(1);
mylist.push_back(5);
mylist.push_front(8);
mylist.push_back(3);
for(list::node* i = mylist.begin(); i != mylist.end(); i = i->advance())
printf("%d ", i->value);
printf("\n");
mylist.done();
return 0;
}
"
node" is no longer a global name, so outside of the
list class
we need to tell the compiler which namespace it is found in.
It is
list's node, i.e. "
list::node".
There could be types called "node" elsewhere,
that could be used independently in the same program.
Phase 3: Constructors and destructors
In C++, structs are now proper types. The struct keyword
can be dropped without a need for a typedef statement.
node* first, *last;
node* begin() { return first; }
node* end() { return NULL; }
One of the major points of attention in C++ is initialization
and deinitialization. C++ calls them constructing and destructing.
There is special syntax allocated for just that purpose.
Our init() method becomes a constructor, and our done() method
becomes a destructor.
Constructors and destructors are always named after the class
that they construct and destruct.
list() { first = NULL; last = NULL; }
~list() { clear(); }
The "new" keyword, new in C++, does both memory allocation
and constructor calling. The "delete" keyword, conversely,
does both destructing and freeing at once.
struct list
{
struct node
{
node* prev, *next;
int value;
node(int v) : value(v) {}
node* advance() { return next; }
};
node* first, *last;
node* begin() { return first; }
node* end() { return NULL; }
list() : first(NULL), last(NULL) {}
~list() { clear(); }
void insert(node* where, int v)
{
node* newnode = new node(v);
newnode->prev = where ? where->prev : last;
newnode->next = where;
if(newnode->prev) newnode->prev->next = newnode;
if(where) where->prev = newnode;
if(where == first) first = newnode;
if(!where) last = newnode;
}
void push_front(int v) { insert(begin(), v); }
void push_back(int v) { insert(end(), v); }
void erase(node* where)
{
node* prev = where->prev;
node* next = where->next;
if(prev) prev->next = next;
if(next) next->prev = prev;
if(where == first) first = next;
if(where == last) last = prev;
delete where;
}
void clear() { while(first) erase(first); }
};
However, unless you are implementing data structures like this program
here, it is very rare that you actually use the "new" or "delete" keywords
yourself. A much simpler way to create objects is simply to declare them.
Our main() here does just that.
int main(void)
{
list mylist;
// mylist.init() -- no longer necessary. mylist::mylist() is automatically called.
mylist.push_back(1);
mylist.push_back(5);
mylist.push_front(8);
mylist.push_back(3);
for(list::node* i = mylist.begin(); i != mylist.end(); i = i->advance())
printf("%d ", i->value);
printf("\n");
// mylist.done() -- no longer necessary. mylist::~mylist() is automatically called.
return 0;
}
When the object is constructed, the constructor method is automatically
called. We no longer need an explicit call to "init()".
Similarly, when the object goes out of scope, here denoted by the
closing curly brace, the destructor is automatically called.
We no longer need an explicit call to "done()".
There is no memory leak in this program.
Phase 4 : Templates
Now, what if we need a linked list for some other data type than integers?
That's where templates come in.
In C, this would be difficult. In C++, just a couple of changes is enough.
template<typename T>
struct list
{
struct node
{
node* prev, *next;
T value;
node(T v) : value(v) {}
node* advance() { return next; }
};
node* first, *last;
node* begin() { return first; }
node* end() { return NULL; }
list() : first(NULL), last(NULL) {}
void insert(node* where, T v)
{
node* newnode = new node(v);
newnode->prev = where ? where->prev : last;
newnode->next = where;
if(newnode->prev) newnode->prev->next = newnode;
if(where) where->prev = newnode;
if(where == first) first = newnode;
if(!where) last = newnode;
}
void push_front(T v) { insert(begin(), v); }
void push_back(T v) { insert(end(), v); }
void erase(node* where)
{
node* prev = where->prev;
node* next = where->next;
if(prev) prev->next = next;
if(next) next->prev = prev;
if(where == first) first = next;
if(where == last) last = prev;
delete where;
}
void clear() { while(first) erase(first); }
void done() { clear(); }
};
int main(void)
{
list<int> mylist;
mylist.push_back(1);
mylist.push_back(5);
mylist.push_front(8);
mylist.push_back(3);
for(auto i = mylist.begin(); i != mylist.end(); i = i->advance())
printf("%d ", i->value);
printf("\n");
list<const char*> mylist2;
mylist2.push_back("Europe");
mylist2.push_back("Asia");
mylist2.push_front("Africa");
mylist2.push_back("Antarctica");
for(auto i = mylist2.begin(); i != mylist2.end(); i = i->advance())
printf("%s ", i->value);
printf("\n");
return 0;
}
Phase 5 : Standard C++ library
Now of course, a linked list is already found in the C++ standard library.
There is no need to reinvent the wheel.
#include <stdio.h>
#include <list>
int main(void)
{
std::list<int> mylist;
mylist.push_back(1);
mylist.push_back(5);
mylist.push_front(8);
mylist.push_back(3);
for(auto i = mylist.begin(); i != mylist.end(); ++i)
printf("%d ", *i);
printf("\n");
std::list<const char*> mylist2;
mylist2.push_back("Europe");
mylist2.push_back("Asia");
mylist2.push_front("Africa");
mylist2.push_back("Antarctica");
for(auto i = mylist2.begin(); i != mylist2.end(); ++i)
printf("%s ", *i);
printf("\n");
return 0;
}
How optimal is it?
Here is the assembler code of the printing loop of the C program.
Illustrated here which parts of the loop correspond to which parts of the assembler code.
Now here it is after the first change into C++ code.
This is the assembler listing when the code has been changed to use templates.
Finally, here is the version with standard library (STL) class instead of our homebrew list:
Again, illustrating which parts of the assembler code correspond to each part of the code when using
std::list:
So you see, what is the cost and the supposed overhead from all this abstraction?
Absolutely nothing.
If there is a reason to avoid C++ in favor of C, performance is not one of them.
How about the program size?
Here is the list of sizes of each of these program versions when compiled:
| Program | Size (g++ -m32) | Size (clang++ -m32) | Size (g++ -m64) | Size (clang++ -m64) | Size (ICC 11.1, 64-bit) | Size (MSVC) | Size (DJGPP, g++ 4.7.2) |
| list (C) | 5012 | 4524 | 6792 | 6240 | 20488 | 45056 | 93696 |
| list1 (C++) | 4116 | 4164 | 5376 | 5528 | 20104 | 45056 | 92672 |
| list2 (C++) | 4116 | 4164 | 5376 | 5304 | 20104 | 45056 | 92672 |
| list3 (C++) | 4048 | 4068 | 5272 | 5304 | 20320 | 49664 | 155648 |
| list4 (C++) | 4408 | 4264 | 5680 | 5496 | 20768 | 49664 | 155648 |
| list5 (C++) | 4932 | 5132 | 6944 | 7104 | dnc | 56832 | 157184 |
Using G++ version 4.9.1 on Linux with -Ofast -s, Clang++ 3.5.0 on Linux with -Ofast -s, and Visual Studio Express 10.0 (cl 16.00.30319.01) on Windows XP with /O2.
The results are rather mixed.
There is an increase in the library function size when new/delete are brought aboard, and when std::list is used,
but other than that, most differences are within the bounds that come from alignment differences.