Bisqwit's filebacked Deque C++ template

Download: deque.hh, filewindow.hh

#ifndef bqtDequeHH #define bqtDequeHH /* * * Deque<typename DataType, * size_t NodeSize=1048576*16, * size_t WindowSize=..., size_t NumWindows=2> * * Deque is a double-ended queue. It's a data container which * provides the following storage and inquiry methods: * * DataType& back(); * DataType& front(); * DataType& operator[] (index); (and their const versions) * void push_back(elem); * void push_front(elem); * void pop_front(number_of_elements_to_pop); * void pop_back(number_of_elements_to_pop); * DataType& pop_front(); * DataType& pop_back(); * size_t size(); * void clear(); * bool empty(); * * For stacks and queues. * * It is designed to allow huge amounts of data to be stored without * significantly consuming the address space of the running program. * The maximum amount of data must be specified at compile-time, * but the space will only be reserved as needed. Regardless of the * space used, only a few megabytes at most will be used of the * program's address space, depending on the template parameters. * * The complexity of each of those operations is O(1). * * Only one reference is guaranteed to be valid at a time. Accessing any * other element, or pushing/popping, may invalidate previous references * to elements. size() and empty() will not invalidate references. * * Iterators are currently not supported, but implementing them * should not prove too much a difficulty. * * The container stores its data in temporary files in /tmp, and * the files will be automatically deleted when the program exits. * * The template parameters are: * DataType is the type of data stored in the container. * NodeSize defines the unit of allocation. * The bigger the NodeSize, the less RAM will be used. * The smaller the NodeSize, the more often will unused * space be freed, but storage size is still the same. * WindowSize is the size of an individual mmap window into a temporary file. * Typically the number of windows in simultaneously existance * is around NumWindows*2. It should be a multiple of getpagesize(). * In Linux, the value of getpagesize() is 0x1000. * The bigger the window, the more address space is used, * but the less frequent the mmap() calls are. * The WindowSize value should be chosen * to satisfy the formulae * WindowSize >= 2*sizeof(DataType) * and WindowSize >= 2*getpagesize() * and WindowSize % getpagesize() == 0. * NumWindows is the number of windows to use per node. * It should be correspond to the approximate amount of distinct * positions of the container you are accessing simultaneously. * That is, if you write to the end and read from the beginning, * the value should be 1 or 2. * * Error handling: * If attempting to push when the storage is full, or a mmap() call * fails, the storage will call the abort() function. * * Standards: * It requires that the mmap64(), munmap64(), size_t, ftruncate64(), * unlink(), open(), dup(), close() features are defined and that * O_LARGEFILE is defined and works. * The file system is assumed to be capable of handling sparse files, * and mmapping the holes and writing to them is assumed to work. * * Written by Joel Yliluoma * Copyright (C) 1992,2008 Bisqwit (http://iki.fi/bisqwit/) */ #include <deque> #include <boost/shared_ptr.hpp> #ifdef __x86_64q /* On x86_64, we have no fear of running out of address space, * so we can use RAM as storage. */ #include "simplevec.hh" template<size_t WindowSize, size_t NumWindows, typename DataType=char, size_t MaxSize=0> class DequeBuffer { public: static const size_t ElemCount = MaxSize / sizeof(DataType); static const size_t NBytes = ElemCount * (size_t)sizeof(DataType); DequeBuffer() { } ~DequeBuffer() { } void Detach() { } DataType* GetElemWindow(size_t elemno) { return (DataType*) &data[elemno * sizeof(DataType)]; } private: SimpleVec<char, NBytes> data; }; #else /* Otherwise, we must use file-backed buffers */ # include "filewindow.hh" #endif template<typename DataType, size_t NodeSize = 1048576*64, size_t WindowSize = (sizeof(DataType)*2+0x1FFFFF)&~0x1FFFFF, size_t NumWindows = 2 > class Deque { private: #ifdef __x86_64q typedef DequeBuffer<WindowSize, NumWindows, DataType, NodeSize> NodeType; #else typedef TempFileBuffer<WindowSize, NumWindows, DataType, NodeSize> NodeType; #endif static const size_t ElemPerNode = NodeType::ElemCount; /* boost::shared_ptr is used for the nodes * because the nodes do not allow copying. */ mutable std::deque<boost::shared_ptr<NodeType> > Nodes; private: void InsertBeginning() { Nodes.push_front(boost::shared_ptr<NodeType> (new NodeType)); BeginPtr += ElemPerNode; EndPtr += ElemPerNode; } void EatBeginning(size_t NumNodesEat) { if(NumNodesEat <= 1) return; Nodes.erase(Nodes.begin(), Nodes.begin()+NumNodesEat); BeginPtr -= ElemPerNode*NumNodesEat; EndPtr -= ElemPerNode*NumNodesEat; } void EatEnd(size_t NumNodesKeep) { size_t NumNodesEat = Nodes.size() - NumNodesKeep; if(NumNodesEat <= 1) return; Nodes.erase(Nodes.begin()+NumNodesKeep, Nodes.end()); BeginPtr -= ElemPerNode*NumNodesEat; EndPtr -= ElemPerNode*NumNodesEat; } void DoDetachTest() { if(EndPtr > BeginPtr) { /* If the previously pushed item is in a different * node than this current node, and BeginPtr points * to yet another node, there's possibility that * the previously pushed item doesn't need to read * for some time. Detach it if possible. */ const size_t LastPtr = (EndPtr-1); size_t OldNode = (LastPtr)/ElemPerNode; size_t NewNode = ( EndPtr)/ElemPerNode; if(OldNode != NewNode) { size_t BeginNode = (BeginPtr)/ElemPerNode; if(BeginNode != OldNode) { Nodes[OldNode]->Detach(); } } } } private: DataType* GetBuffer(size_t ElemNo) { size_t NodeNo = ElemNo / ElemPerNode; while(Nodes.size() <= NodeNo) { Nodes.push_back(boost::shared_ptr<NodeType> (new NodeType)); } size_t NodeElem = ElemNo % ElemPerNode; return Nodes[NodeNo]->GetElemWindow(NodeElem); } size_t BeginPtr, EndPtr; size_t Size; public: Deque() { clear(); } ~Deque() { } void clear() { Nodes.clear(); BeginPtr=EndPtr=0; Size=0; } void push_back(const DataType& t) { DoDetachTest(); *GetBuffer(EndPtr) = t; ++Size; ++EndPtr; } void push_front(const DataType& t) { while(BeginPtr <= 0) { InsertBeginning(); } --BeginPtr; *GetBuffer(BeginPtr) = t; ++Size; // DoDetachTest(); - not tuned for push_front() yet. } void insert(size_t pos, const DataType& t) { if(pos <= 0) { push_front(t); return; } if(pos >= size()) { push_back(t); return; } size_t middle = size() / 2; if(pos < middle) { // grow the beginning push_front( (*this)[0]); for(size_t n=1; n<pos; ++n) { (*this)[n] = (*this)[n+1]; } } else { // grow the later part push_back( (*this)[size()-1]); for(size_t n=size()-2; n>pos; --n) { (*this)[n] = (*this)[n-1]; } } (*this)[pos] = t; } void push_somewhere(const DataType& t) { static unsigned char flag=0; if(++flag || BeginPtr==0) push_back(t); else push_front(t); } const DataType& operator[] (size_t index) const { return *GetBuffer(BeginPtr+index); } const DataType& back() const { return operator[] ( size()-1 ); } const DataType& front() const { return operator[] ( 0 ); } DataType& operator[] (size_t index) { return *GetBuffer(BeginPtr+index); } DataType& back() { return operator[] ( size()-1 ); } DataType& front() { return operator[] ( 0 ); } void pop_front(size_t amount) { BeginPtr += amount; EatBeginning(BeginPtr / ElemPerNode); Size -= amount; } void pop_back(size_t amount) { EndPtr -= amount; EatEnd((EndPtr+ElemPerNode-1) / ElemPerNode); Size -= amount; } DataType pop_front() { DataType result = front(); ++BeginPtr; EatBeginning(BeginPtr / ElemPerNode); --Size; return result; } DataType pop_back() { DataType result = back(); --EndPtr; EatEnd((EndPtr+ElemPerNode-1) / ElemPerNode); --Size; return result; } size_t size() const { return Size; } bool empty() const { return !Size; } void Report(std::ostream& out) const { out << "// Deque has " << size() << " (" << (size()*sizeof(DataType)) << " bytes" ", Unit=" << sizeof(DataType) << ", Begin=" << BeginPtr << ", End=" << EndPtr << ", Files=" << Nodes.size() << ")" "\n"; } }; #endif