A guide into C++ for C programmers

By Joel Yliluoma, March 2010

Phase 0 : C code example

Hello everyone, I am Bisqwit. The topic of today's video is C++. This video assumes that you already have some background in C programming.

#include <stdio.h>
#include <stdlib.h>

struct list_node
{
    struct list_node* prev, *next;
    int value;
};
struct list
{
    struct list_node* first, *last;
};

void list_insert(l, where, v)
struct list *l;
struct list_node *where;
int v;
{
}
For convenience's sake, I go by ANSI C. Nobody, who really still uses K&R, can seriously claim that they can't read ANSI C. ANSI C has been around since 1989 anyway.

void list_insert(struct list *l, struct list_node *where, int v)
{
}
This is an example C program that implements a bidirectional linked list and demonstrates some basic operations.

#include <stdio.h>
#include <stdlib.h>

struct list_node
{
    struct list_node* prev, *next;
    int value;
};
struct list
{
    struct list_node* first, *last;
};

void list_node_init(struct list_node *n, int v) { n->value = v; }
struct list_node *list_begin(struct list *l)     { return l->first; }
struct list_node *list_end(struct list *l)       { return NULL; }
struct list_node *list_advance(struct list_node *i) { return i->next; }

void list_init(struct list *l) { l->first = NULL; l->last = NULL; }

void list_insert(struct list *l, struct list_node *where, int v)
{
    struct list_node *newnode = (struct list_node*)malloc(sizeof(*newnode));
    list_node_init(newnode, v);
    newnode->prev = where ? where->prev : l->last;
    newnode->next = where;
    if(newnode->prev)  newnode->prev->next = newnode;
    if(where) where->prev = newnode;
    if(where == l->first) l->first = newnode;
    if(!where) l->last = newnode;
}
void list_push_front(struct list *l, int v) { list_insert(l,list_begin(l), v); }
void list_push_back(struct list *l, int v)  { list_insert(l,list_end(l),   v); }
void list_erase(struct list *l, struct list_node *where)
{
    struct list_node *prev = where->prev;
    struct list_node *next = where->next;
    if(prev) prev->next = next;
    if(next) next->prev = prev;
    if(where == l->first) l->first = next;
    if(where == l->last) l->last = prev;
    free(where);
}
void list_clear(struct list *l) { while(l->first) list_erase(l, l->first); }
void list_done(struct list *l) { list_clear(l); }

int main(void)
{
    struct list mylist;
    struct list_node *i;

    list_init(&mylist);
    list_push_back(&mylist, 1);
    list_push_back(&mylist, 5);
    list_push_front(&mylist, 8);
    list_push_back(&mylist, 3);

    for(i = list_begin(&mylist); i != list_end(&mylist); i = list_advance(i))
        printf("%d ", i->value);
    printf("\n");

    list_done(&mylist);

    return 0;
}
A perfectly rationable implementation. And it compiles and runs cleanly.

Phase 1 : Transform into C++ code

C++ was originally called "C with classes", that's how it began. Now a class in C++ is just the same thing as the struct.

class is struct.

The big deal with classes is that they may contain "methods". A method is a function, declared within the struct, for dealing with the struct's data.

struct list_node
{
    struct list_node* prev, *next;
    int value;

    void init(int v) { value = v; }
    struct list_node* advance() { return next; }
};
These functions automatically get an invisible parameter, a pointer to the struct instance itself, called "this". This is why they can use the struct members without a pointer.

"this" can be written explicitly in the code, if one chooses to, but for now, let's do without.

struct list
{
    struct list_node* first, *last;

    struct list_node* begin()     { return first; }
    struct list_node* end()       { return NULL; }

    void init() { first = NULL; last = NULL; }

    void insert(struct list_node* where, int v)
    {
        struct list_node* newnode = (struct list_node*)malloc(sizeof(*newnode));
        newnode->init(v);
        newnode->prev = where ? where->prev : last;
        newnode->next = where;
        if(newnode->prev)  newnode->prev->next = newnode;
        if(where) where->prev = newnode;
        if(where == first) first = newnode;
        if(!where) last = newnode;
    }
    void push_front(int v) { insert(begin(), v); }
    void push_back(int v)  { insert(end(),   v); }
    void erase(struct list_node* where)
    {
        struct list_node* prev = where->prev;
        struct list_node* next = where->next;
        if(prev) prev->next = next;
        if(next) next->prev = prev;
        if(where == first) first = next;
        if(where == last) last = prev;
        free(where);
    }
    void clear() { while(first) erase(first); }
    void done() { clear(); }
};
Compared to the equivalent C code, this results in significant simplifications that make the code both easier to read and write.

int main(void)
{
    struct list mylist;
    struct list_node* i;
 
    mylist.init();
    mylist.push_back(1);
    mylist.push_back(5);
    mylist.push_front(8);
    mylist.push_back(3);

    for(i = mylist.begin(); i != mylist.end(); i = i->advance())
        printf("%d ", i->value);
    printf("\n");

    mylist.done();

    return 0;
}

Phase 2 : Scoping

Structs can also be nested. This affects scoping & namespaces; where the name can be used and where it is visible. Such scoping is an effective way to reduce namespace pollution.

struct list
{
    struct node
    {
        struct node* prev, *next;
        int value;

        void init(int v) { value = v; }
        struct node* advance() { return next; }
    };

    struct node* first, *last;

    struct node* begin()     { return first; }
    struct node* end()       { return NULL; }

    void init() { first = NULL; last = NULL; }

    void insert(struct node* where, int v)
    {
        struct node* newnode = (struct node*)malloc(sizeof(*newnode));
        newnode->init(v);
        newnode->prev = where ? where->prev : last;
        newnode->next = where;
        if(newnode->prev)  newnode->prev->next = newnode;
        if(where) where->prev = newnode;
        if(where == first) first = newnode;
        if(!where) last = newnode;
    }
    void push_front(int v) { insert(begin(), v); }
    void push_back(int v)  { insert(end(),   v); }
    void erase(struct node* where)
    {
        struct node* prev = where->prev;
        struct node* next = where->next;
        if(prev) prev->next = next;
        if(next) next->prev = prev;
        if(where == first) first = next;
        if(where == last) last = prev;
        free(where);
    }
    void clear() { while(first) erase(first); }
    void done() { clear(); }
};

Also, because scoped names are more relevant to the context they are visible in, they can be made shorter. (Observe how "list_node" was renamed just "node" here.)

int main(void)
{
    struct list mylist;

    mylist.init();
    mylist.push_back(1);
    mylist.push_back(5);
    mylist.push_front(8);
    mylist.push_back(3);

    for(list::node* i = mylist.begin(); i != mylist.end(); i = i->advance())
        printf("%d ", i->value);
    printf("\n");

    mylist.done();

    return 0;
}
"node" is no longer a global name, so outside of the list class we need to tell the compiler which namespace it is found in. It is list's node, i.e. "list::node".

There could be types called "node" elsewhere, that could be used independently in the same program.

Phase 3: Constructors and destructors

In C++, structs are now proper types. The struct keyword can be dropped without a need for a typedef statement.

    node* first, *last;

    node* begin()     { return first; }
    node* end()       { return NULL; }
One of the major points of attention in C++ is initialization and deinitialization. C++ calls them constructing and destructing. There is special syntax allocated for just that purpose. Our init() method becomes a constructor, and our done() method becomes a destructor.

Constructors and destructors are always named after the class that they construct and destruct.

    list()  { first = NULL; last = NULL; }
    ~list() { clear(); }
The "new" keyword, new in C++, does both memory allocation and constructor calling. The "delete" keyword, conversely, does both destructing and freeing at once.

struct list
{
    struct node
    {
        node* prev, *next;
        int value;

        node(int v) : value(v) {}
        node* advance() { return next; }
    };

    node* first, *last;

    node* begin()     { return first; }
    node* end()       { return NULL; }

    list() : first(NULL), last(NULL) {}
    ~list() { clear(); }

    void insert(node* where, int v)
    {
        node* newnode = new node(v);
        newnode->prev = where ? where->prev : last;
        newnode->next = where;
        if(newnode->prev)  newnode->prev->next = newnode;
        if(where) where->prev = newnode;
        if(where == first) first = newnode;
        if(!where) last = newnode;
    }
    void push_front(int v) { insert(begin(), v); }
    void push_back(int v)  { insert(end(),   v); }
    void erase(node* where)
    {
        node* prev = where->prev;
        node* next = where->next;
        if(prev) prev->next = next;
        if(next) next->prev = prev;
        if(where == first) first = next;
        if(where == last) last = prev;
        delete where;
    }
    void clear() { while(first) erase(first); }
};
However, unless you are implementing data structures like this program here, it is very rare that you actually use the "new" or "delete" keywords yourself. A much simpler way to create objects is simply to declare them. Our main() here does just that.

int main(void)
{
    list mylist;
    // mylist.init() -- no longer necessary. mylist::mylist() is automatically called.

    mylist.push_back(1);
    mylist.push_back(5);
    mylist.push_front(8);
    mylist.push_back(3);

    for(list::node* i = mylist.begin(); i != mylist.end(); i = i->advance())
        printf("%d ", i->value);
    printf("\n");

    // mylist.done() -- no longer necessary. mylist::~mylist() is automatically called.
    return 0;
}
When the object is constructed, the constructor method is automatically called. We no longer need an explicit call to "init()". Similarly, when the object goes out of scope, here denoted by the closing curly brace, the destructor is automatically called. We no longer need an explicit call to "done()".

There is no memory leak in this program.

Phase 4 : Templates

Now, what if we need a linked list for some other data type than integers? That's where templates come in. In C, this would be difficult. In C++, just a couple of changes is enough.

template<typename T>
struct list
{
    struct node
    {
        node* prev, *next;
        T value;

        node(T v) : value(v) {}
        node* advance() { return next; }
    };

    node* first, *last;

    node* begin()     { return first; }
    node* end()       { return NULL; }  

    list() : first(NULL), last(NULL) {}

    void insert(node* where, T v)
    {
        node* newnode = new node(v);
        newnode->prev = where ? where->prev : last;
        newnode->next = where;   
        if(newnode->prev)  newnode->prev->next = newnode;
        if(where) where->prev = newnode;
        if(where == first) first = newnode;
        if(!where) last = newnode;
    }
    void push_front(T v) { insert(begin(), v); }
    void push_back(T v)  { insert(end(),   v); }
    void erase(node* where)
    {
        node* prev = where->prev;
        node* next = where->next;
        if(prev) prev->next = next;
        if(next) next->prev = prev;
        if(where == first) first = next;
        if(where == last) last = prev;
        delete where;   
    }
    void clear() { while(first) erase(first); }
    void done() { clear(); }
};

int main(void)
{
    list<int> mylist;

    mylist.push_back(1);
    mylist.push_back(5);
    mylist.push_front(8);
    mylist.push_back(3);
  
    for(auto i = mylist.begin(); i != mylist.end(); i = i->advance())
        printf("%d ", i->value);
    printf("\n");

    list<const char*> mylist2;

    mylist2.push_back("Europe");
    mylist2.push_back("Asia");
    mylist2.push_front("Africa");
    mylist2.push_back("Antarctica");

    for(auto i = mylist2.begin(); i != mylist2.end(); i = i->advance())
        printf("%s ", i->value);
    printf("\n");

    return 0;
}

Phase 5 : Standard C++ library

Now of course, a linked list is already found in the C++ standard library. There is no need to reinvent the wheel.

#include <stdio.h>
#include <list>

int main(void)
{
    std::list<int> mylist;

    mylist.push_back(1);
    mylist.push_back(5);
    mylist.push_front(8);
    mylist.push_back(3);

    for(auto i = mylist.begin(); i != mylist.end(); ++i)
        printf("%d ", *i);
    printf("\n");

    std::list<const char*> mylist2;

    mylist2.push_back("Europe");
    mylist2.push_back("Asia");
    mylist2.push_front("Africa");
    mylist2.push_back("Antarctica");

    for(auto i = mylist2.begin(); i != mylist2.end(); ++i)
        printf("%s ", *i);
    printf("\n");

    return 0;
}

How optimal is it?

Here is the assembler code of the printing loop of the C program.

http://bisqwit.iki.fi/jutut/kuvat/programming_examples/cppguide/lesson1/1-thu.png

Illustrated here which parts of the loop correspond to which parts of the assembler code.

http://bisqwit.iki.fi/jutut/kuvat/programming_examples/cppguide/lesson1/1b-thu.png

Now here it is after the first change into C++ code.

http://bisqwit.iki.fi/jutut/kuvat/programming_examples/cppguide/lesson1/2-thu.png

This is the assembler listing when the code has been changed to use templates.

http://bisqwit.iki.fi/jutut/kuvat/programming_examples/cppguide/lesson1/3-thu.png

Finally, here is the version with standard library (STL) class instead of our homebrew list:

http://bisqwit.iki.fi/jutut/kuvat/programming_examples/cppguide/lesson1/4-thu.png

Again, illustrating which parts of the assembler code correspond to each part of the code when using std::list:

http://bisqwit.iki.fi/jutut/kuvat/programming_examples/cppguide/lesson1/4b-thu.png

So you see, what is the cost and the supposed overhead from all this abstraction? Absolutely nothing.

If there is a reason to avoid C++ in favor of C, performance is not one of them.

How about the program size?

Here is the list of sizes of each of these program versions when compiled:

Program Size (g++ -m32) Size (clang++ -m32) Size (g++ -m64) Size (clang++ -m64) Size (ICC 11.1, 64-bit) Size (MSVC) Size (DJGPP, g++ 4.7.2)
list (C) 5012 4524 6792 6240 20488 45056 93696
list1 (C++) 4116 4164 5376 5528 20104 45056 92672
list2 (C++) 4116 4164 5376 5304 20104 45056 92672
list3 (C++) 4048 4068 5272 5304 20320 49664 155648
list4 (C++) 4408 4264 5680 5496 20768 49664 155648
list5 (C++) 4932 5132 6944 7104 dnc 56832 157184

Using G++ version 4.9.1 on Linux with -Ofast -s, Clang++ 3.5.0 on Linux with -Ofast -s, and Visual Studio Express 10.0 (cl 16.00.30319.01) on Windows XP with /O2. The results are rather mixed. There is an increase in the library function size when new/delete are brought aboard, and when std::list is used, but other than that, most differences are within the bounds that come from alignment differences.


Last edited at: 2014-09-24T06:11:10+00:00