-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add back compactds as source code instead of submodule
- Loading branch information
Showing
58 changed files
with
15,381 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
#ifndef _MOURISL_COMPACTDS_DS_ALPHABET | ||
#define _MOURISL_COMPACTDS_DS_ALPHABET | ||
|
||
#include "Utils.hpp" | ||
#include "HuffmanCode.hpp" | ||
#include "FixedSizeElemArray.hpp" | ||
|
||
typedef char ALPHABET ; | ||
|
||
#define ALPHABET_CODE_NOCODE 0 | ||
#define ALPHABET_CODE_PLAIN 1 | ||
#define ALPHABET_CODE_HUFFMAN 2 | ||
|
||
// The data structe for mapping alphabet | ||
// Conceptually, all the other data structure regard the alphabet as {0,...,|sigma|-1}, | ||
// This function serves to map these numeric alphabet to actually alphabet(char by default). | ||
namespace compactds { | ||
class Alphabet | ||
{ | ||
private: | ||
size_t _space ; | ||
int _method ; | ||
ALPHABET *_alphabetList ; | ||
int _alphabetCode[1<<(sizeof(ALPHABET) * 8)] ; | ||
short _alphabetCodeLen[1<<(sizeof(ALPHABET) * 8)] ; // the length of encoded bits. | ||
size_t _n ; | ||
|
||
HuffmanCode huffmanCode ; | ||
public: | ||
Alphabet() | ||
{ | ||
_n = _space = 0 ; | ||
_method = ALPHABET_CODE_NOCODE ; | ||
} | ||
|
||
~Alphabet() { Free() ; } | ||
|
||
void Free() | ||
{ | ||
if (_n != 0) | ||
free(_alphabetList) ; | ||
} | ||
|
||
size_t GetSpace() { return _space + sizeof(*this); } | ||
|
||
// Use plain binary number sequentially for the characters in s. | ||
// @return: code length | ||
int InitFromList(const ALPHABET *s, size_t n) | ||
{ | ||
size_t i ; | ||
this->_n = n ; | ||
_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * n) ; | ||
_space = sizeof(ALPHABET) * n ; | ||
memset(_alphabetCode, 0, sizeof(_alphabetCode)) ; | ||
memset(_alphabetCodeLen, 0, sizeof(_alphabetCodeLen)) ; | ||
|
||
int codeLen = Utils::Log2Ceil(n) ; | ||
for (i = 0 ; i < n ; ++i) | ||
{ | ||
_alphabetList[i] = s[i] ; | ||
_alphabetCode[ (int)s[i] ]= i ; | ||
_alphabetCodeLen[ (int)s[i] ] = codeLen ; | ||
} | ||
_method = ALPHABET_CODE_PLAIN ; | ||
return codeLen ; | ||
} | ||
|
||
// s: list of the characters | ||
// freq: list of the frequencies for each character | ||
// n: number of character | ||
void InitHuffman(const ALPHABET *s, const uint64_t *freq, size_t n) | ||
{ | ||
size_t i ; | ||
this->_n = n ; | ||
_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * n) ; | ||
for (i = 0 ; i < n ; ++i) | ||
_alphabetList[i] = s[i] ; | ||
|
||
huffmanCode.InitFromFrequency(freq, n) ; | ||
|
||
for (i = 0 ; i < n ; ++i) | ||
{ | ||
int l ; | ||
_alphabetCode[i] = huffmanCode.Encode(i, l) ; | ||
_alphabetCodeLen[i] = l ; | ||
} | ||
_method = ALPHABET_CODE_HUFFMAN ; | ||
} | ||
|
||
size_t GetAlphabetCapacity() const | ||
{ | ||
if (ALPHABET_CODE_NOCODE) | ||
return 0 ; | ||
else if (ALPHABET_CODE_PLAIN) | ||
return 1<<(Utils::Log2Ceil(_n)) ; | ||
else if (ALPHABET_CODE_HUFFMAN) | ||
return _n ; | ||
return 0 ; | ||
} | ||
|
||
size_t GetSize() const | ||
{ | ||
return _n ; | ||
} | ||
|
||
// l: how many bits used in the coding | ||
ALPHABET Decode(WORD c, int l) const | ||
{ | ||
//l = _alphabetCodeLen[ (int)_alphabetList[i] ] ; | ||
size_t i ; | ||
if (_method == ALPHABET_CODE_NOCODE) | ||
{ | ||
return c ; | ||
} | ||
|
||
if (_method == ALPHABET_CODE_PLAIN) | ||
i = c ; | ||
else | ||
i = huffmanCode.Decode(c, l) ; | ||
return _alphabetList[i] ; | ||
} | ||
|
||
WORD Encode(ALPHABET c, int &l) const | ||
{ | ||
if (_method == ALPHABET_CODE_NOCODE) | ||
{ | ||
//l = Utils::CountBits(c) ; | ||
l = 0 ; | ||
return c ; | ||
} | ||
else | ||
{ | ||
l = _alphabetCodeLen[(int)c] ; | ||
return _alphabetCode[(int)c] ; | ||
} | ||
} | ||
|
||
WORD Encode(ALPHABET c) const | ||
{ | ||
if (_method == ALPHABET_CODE_NOCODE) | ||
return c ; | ||
else | ||
return _alphabetCode[(int)c] ; | ||
} | ||
|
||
// test whether the alphabet c is in the list | ||
bool IsIn(ALPHABET c) const | ||
{ | ||
size_t i ; | ||
for (i = 0 ; i < _n ; ++i) | ||
if (_alphabetList[i] == c) | ||
return true ; | ||
return false ; | ||
} | ||
|
||
Alphabet& operator=(const Alphabet &in) | ||
{ | ||
Free() ; | ||
_n = in._n ; | ||
_space = in._space ; | ||
_method = in._method ; | ||
|
||
_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * _n) ; | ||
_space = sizeof(ALPHABET) * _n ; | ||
memcpy(_alphabetList, in._alphabetList, sizeof(ALPHABET) * _n ) ; | ||
memcpy(_alphabetCode, in._alphabetCode, sizeof(_alphabetCode)) ; | ||
memcpy(_alphabetCodeLen, in._alphabetCodeLen, sizeof(_alphabetCodeLen)) ; | ||
huffmanCode = in.huffmanCode ; | ||
return *this ; | ||
} | ||
|
||
void Save(FILE *fp) | ||
{ | ||
SAVE_VAR(fp, _space) ; | ||
SAVE_VAR(fp, _method) ; | ||
SAVE_VAR(fp, _n) ; | ||
fwrite(_alphabetList, sizeof(ALPHABET), _n, fp) ; | ||
fwrite(_alphabetCode, sizeof(_alphabetCode[0]), 1<<(sizeof(ALPHABET) * 8), fp) ; | ||
fwrite(_alphabetCodeLen, sizeof(_alphabetCodeLen[0]), 1<<(sizeof(ALPHABET) * 8), fp) ; | ||
} | ||
|
||
void Load(FILE *fp) | ||
{ | ||
Free() ; | ||
LOAD_VAR(fp, _space) ; | ||
LOAD_VAR(fp, _method) ; | ||
LOAD_VAR(fp, _n) ; | ||
|
||
_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * _n) ; | ||
fread(_alphabetList, sizeof(ALPHABET), _n, fp) ; | ||
fread(_alphabetCode, sizeof(_alphabetCode[0]), 1<<(sizeof(ALPHABET) * 8), fp) ; | ||
fread(_alphabetCodeLen, sizeof(_alphabetCodeLen[0]), 1<<(sizeof(ALPHABET) * 8), fp) ; | ||
} | ||
} ; | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#ifndef _MOURISL_COMPACTDS_BITVECTOR | ||
#define _MOURISL_COMPACTDS_BITVECTOR | ||
|
||
#include "Utils.hpp" | ||
|
||
#define BITVECTOR_DEFAULT_SELECT_SPEED 3 | ||
|
||
// The overall functionality of bitvector | ||
namespace compactds { | ||
class Bitvector | ||
{ | ||
protected: | ||
size_t _space ; | ||
public: | ||
Bitvector() {_space = 0 ;} | ||
~Bitvector() {} | ||
|
||
// W is the plain bit vector | ||
virtual void Init(const WORD *W, const size_t n) = 0 ; | ||
virtual void Free() = 0 ; | ||
virtual size_t GetSpace() = 0; | ||
|
||
// Return the ith bits (0-based) | ||
virtual int Access(size_t i) const = 0 ; | ||
// Return the number of 1s before i | ||
virtual size_t Rank1(size_t i, int inclusive = 1) const = 0 ; | ||
// Return the index of th i-th (i is 1-based, so rank and select are inversible) 1 | ||
// it is for 1 only for now | ||
virtual size_t Select(size_t i) const = 0 ; | ||
|
||
// Return the rightmost 1 in [0..i] | ||
// TODO: Handle the boundary cases | ||
size_t Pred(size_t i) const | ||
{ | ||
return Select( Rank1(i) ) ; | ||
} | ||
|
||
// Return the leftmost 1 in [i..n-1] | ||
size_t Succ(size_t i) const | ||
{ | ||
return Select( Rank1(i, /*inclusive=*/0) + 1 ) ; | ||
} | ||
|
||
// Return the number of 0s before i | ||
size_t Rank0(size_t i, int inclusive = 1) const | ||
{ | ||
// There are i+1 elements in [0..i], and Rank(i) of them are 1's | ||
return i + inclusive - Rank1(i, inclusive) ; | ||
} | ||
|
||
size_t Rank(int type, size_t i, int inclusive = 1) const | ||
{ | ||
if (type == 1) | ||
return Rank1(i, inclusive) ; | ||
else | ||
return Rank0(i, inclusive) ; | ||
} | ||
|
||
virtual void Save(FILE *fp) | ||
{ | ||
SAVE_VAR(fp, _space) ; | ||
} | ||
|
||
virtual void Load(FILE *fp) | ||
{ | ||
LOAD_VAR(fp, _space) ; | ||
} | ||
} ; | ||
} | ||
#endif |
Oops, something went wrong.