Bisqwit's filebacked Deque C++ template
#ifndef bqtDequeHH
#define bqtDequeHH
/*
*
* Deque<typename DataType,
* size_t NodeSize=1048576*16,
* size_t WindowSize=..., size_t NumWindows=2>
*
* Deque is a double-ended queue. It's a data container which
* provides the following storage and inquiry methods:
*
* DataType& back();
* DataType& front();
* DataType& operator[] (index); (and their const versions)
* void push_back(elem);
* void push_front(elem);
* void pop_front(number_of_elements_to_pop);
* void pop_back(number_of_elements_to_pop);
* DataType& pop_front();
* DataType& pop_back();
* size_t size();
* void clear();
* bool empty();
*
* For stacks and queues.
*
* It is designed to allow huge amounts of data to be stored without
* significantly consuming the address space of the running program.
* The maximum amount of data must be specified at compile-time,
* but the space will only be reserved as needed. Regardless of the
* space used, only a few megabytes at most will be used of the
* program's address space, depending on the template parameters.
*
* The complexity of each of those operations is O(1).
*
* Only one reference is guaranteed to be valid at a time. Accessing any
* other element, or pushing/popping, may invalidate previous references
* to elements. size() and empty() will not invalidate references.
*
* Iterators are currently not supported, but implementing them
* should not prove too much a difficulty.
*
* The container stores its data in temporary files in /tmp, and
* the files will be automatically deleted when the program exits.
*
* The template parameters are:
* DataType is the type of data stored in the container.
* NodeSize defines the unit of allocation.
* The bigger the NodeSize, the less RAM will be used.
* The smaller the NodeSize, the more often will unused
* space be freed, but storage size is still the same.
* WindowSize is the size of an individual mmap window into a temporary file.
* Typically the number of windows in simultaneously existance
* is around NumWindows*2. It should be a multiple of getpagesize().
* In Linux, the value of getpagesize() is 0x1000.
* The bigger the window, the more address space is used,
* but the less frequent the mmap() calls are.
* The WindowSize value should be chosen
* to satisfy the formulae
* WindowSize >= 2*sizeof(DataType)
* and WindowSize >= 2*getpagesize()
* and WindowSize % getpagesize() == 0.
* NumWindows is the number of windows to use per node.
* It should be correspond to the approximate amount of distinct
* positions of the container you are accessing simultaneously.
* That is, if you write to the end and read from the beginning,
* the value should be 1 or 2.
*
* Error handling:
* If attempting to push when the storage is full, or a mmap() call
* fails, the storage will call the abort() function.
*
* Standards:
* It requires that the mmap64(), munmap64(), size_t, ftruncate64(),
* unlink(), open(), dup(), close() features are defined and that
* O_LARGEFILE is defined and works.
* The file system is assumed to be capable of handling sparse files,
* and mmapping the holes and writing to them is assumed to work.
*
* Written by Joel Yliluoma
* Copyright (C) 1992,2008 Bisqwit (http://iki.fi/bisqwit/)
*/
#include <deque>
#include <boost/shared_ptr.hpp>
#ifdef __x86_64q
/* On x86_64, we have no fear of running out of address space,
* so we can use RAM as storage.
*/
#include "simplevec.hh"
template<size_t WindowSize,
size_t NumWindows,
typename DataType=char,
size_t MaxSize=0>
class DequeBuffer
{
public:
static const size_t ElemCount = MaxSize / sizeof(DataType);
static const size_t NBytes = ElemCount * (size_t)sizeof(DataType);
DequeBuffer() { }
~DequeBuffer() { }
void Detach() { }
DataType* GetElemWindow(size_t elemno)
{
return (DataType*) &data[elemno * sizeof(DataType)];
}
private:
SimpleVec<char, NBytes> data;
};
#else
/* Otherwise, we must use file-backed buffers */
# include "filewindow.hh"
#endif
template<typename DataType,
size_t NodeSize = 1048576*64,
size_t WindowSize = (sizeof(DataType)*2+0x1FFFFF)&~0x1FFFFF,
size_t NumWindows = 2
>
class Deque
{
private:
#ifdef __x86_64q
typedef DequeBuffer<WindowSize, NumWindows, DataType, NodeSize> NodeType;
#else
typedef TempFileBuffer<WindowSize, NumWindows, DataType, NodeSize> NodeType;
#endif
static const size_t ElemPerNode = NodeType::ElemCount;
/* boost::shared_ptr is used for the nodes
* because the nodes do not allow copying.
*/
mutable std::deque<boost::shared_ptr<NodeType> > Nodes;
private:
void InsertBeginning()
{
Nodes.push_front(boost::shared_ptr<NodeType> (new NodeType));
BeginPtr += ElemPerNode;
EndPtr += ElemPerNode;
}
void EatBeginning(size_t NumNodesEat)
{
if(NumNodesEat <= 1) return;
Nodes.erase(Nodes.begin(), Nodes.begin()+NumNodesEat);
BeginPtr -= ElemPerNode*NumNodesEat;
EndPtr -= ElemPerNode*NumNodesEat;
}
void EatEnd(size_t NumNodesKeep)
{
size_t NumNodesEat = Nodes.size() - NumNodesKeep;
if(NumNodesEat <= 1) return;
Nodes.erase(Nodes.begin()+NumNodesKeep, Nodes.end());
BeginPtr -= ElemPerNode*NumNodesEat;
EndPtr -= ElemPerNode*NumNodesEat;
}
void DoDetachTest()
{
if(EndPtr > BeginPtr)
{
/* If the previously pushed item is in a different
* node than this current node, and BeginPtr points
* to yet another node, there's possibility that
* the previously pushed item doesn't need to read
* for some time. Detach it if possible.
*/
const size_t LastPtr = (EndPtr-1);
size_t OldNode = (LastPtr)/ElemPerNode;
size_t NewNode = ( EndPtr)/ElemPerNode;
if(OldNode != NewNode)
{
size_t BeginNode = (BeginPtr)/ElemPerNode;
if(BeginNode != OldNode)
{
Nodes[OldNode]->Detach();
}
}
}
}
private:
DataType* GetBuffer(size_t ElemNo)
{
size_t NodeNo = ElemNo / ElemPerNode;
while(Nodes.size() <= NodeNo)
{
Nodes.push_back(boost::shared_ptr<NodeType> (new NodeType));
}
size_t NodeElem = ElemNo % ElemPerNode;
return Nodes[NodeNo]->GetElemWindow(NodeElem);
}
size_t BeginPtr, EndPtr;
size_t Size;
public:
Deque()
{
clear();
}
~Deque()
{
}
void clear()
{
Nodes.clear();
BeginPtr=EndPtr=0;
Size=0;
}
void push_back(const DataType& t)
{
DoDetachTest();
*GetBuffer(EndPtr) = t;
++Size;
++EndPtr;
}
void push_front(const DataType& t)
{
while(BeginPtr <= 0)
{
InsertBeginning();
}
--BeginPtr;
*GetBuffer(BeginPtr) = t;
++Size;
// DoDetachTest(); - not tuned for push_front() yet.
}
void insert(size_t pos, const DataType& t)
{
if(pos <= 0)
{
push_front(t);
return;
}
if(pos >= size())
{
push_back(t);
return;
}
size_t middle = size() / 2;
if(pos < middle)
{
// grow the beginning
push_front( (*this)[0]);
for(size_t n=1; n<pos; ++n)
{
(*this)[n] = (*this)[n+1];
}
}
else
{
// grow the later part
push_back( (*this)[size()-1]);
for(size_t n=size()-2; n>pos; --n)
{
(*this)[n] = (*this)[n-1];
}
}
(*this)[pos] = t;
}
void push_somewhere(const DataType& t)
{
static unsigned char flag=0;
if(++flag || BeginPtr==0) push_back(t); else push_front(t);
}
const DataType& operator[] (size_t index) const
{
return *GetBuffer(BeginPtr+index);
}
const DataType& back() const { return operator[] ( size()-1 ); }
const DataType& front() const { return operator[] ( 0 ); }
DataType& operator[] (size_t index)
{
return *GetBuffer(BeginPtr+index);
}
DataType& back() { return operator[] ( size()-1 ); }
DataType& front() { return operator[] ( 0 ); }
void pop_front(size_t amount)
{
BeginPtr += amount;
EatBeginning(BeginPtr / ElemPerNode);
Size -= amount;
}
void pop_back(size_t amount)
{
EndPtr -= amount;
EatEnd((EndPtr+ElemPerNode-1) / ElemPerNode);
Size -= amount;
}
DataType pop_front()
{
DataType result = front();
++BeginPtr;
EatBeginning(BeginPtr / ElemPerNode);
--Size;
return result;
}
DataType pop_back()
{
DataType result = back();
--EndPtr;
EatEnd((EndPtr+ElemPerNode-1) / ElemPerNode);
--Size;
return result;
}
size_t size() const { return Size; }
bool empty() const { return !Size; }
void Report(std::ostream& out) const
{
out << "// Deque has " << size()
<< " (" << (size()*sizeof(DataType)) << " bytes"
", Unit=" << sizeof(DataType)
<< ", Begin=" << BeginPtr
<< ", End=" << EndPtr
<< ", Files=" << Nodes.size()
<< ")"
"\n";
}
};
#endif