Skip to content

Commit

Permalink
Add back compactds as source code instead of submodule
Browse files Browse the repository at this point in the history
  • Loading branch information
mourisl committed Nov 19, 2023
1 parent 9c0a733 commit 266f341
Show file tree
Hide file tree
Showing 58 changed files with 15,381 additions and 0 deletions.
196 changes: 196 additions & 0 deletions compactds/Alphabet.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#ifndef _MOURISL_COMPACTDS_DS_ALPHABET
#define _MOURISL_COMPACTDS_DS_ALPHABET

#include "Utils.hpp"
#include "HuffmanCode.hpp"
#include "FixedSizeElemArray.hpp"

typedef char ALPHABET ;

#define ALPHABET_CODE_NOCODE 0
#define ALPHABET_CODE_PLAIN 1
#define ALPHABET_CODE_HUFFMAN 2

// The data structe for mapping alphabet
// Conceptually, all the other data structure regard the alphabet as {0,...,|sigma|-1},
// This function serves to map these numeric alphabet to actually alphabet(char by default).
namespace compactds {
class Alphabet
{
private:
size_t _space ;
int _method ;
ALPHABET *_alphabetList ;
int _alphabetCode[1<<(sizeof(ALPHABET) * 8)] ;
short _alphabetCodeLen[1<<(sizeof(ALPHABET) * 8)] ; // the length of encoded bits.
size_t _n ;

HuffmanCode huffmanCode ;
public:
Alphabet()
{
_n = _space = 0 ;
_method = ALPHABET_CODE_NOCODE ;
}

~Alphabet() { Free() ; }

void Free()
{
if (_n != 0)
free(_alphabetList) ;
}

size_t GetSpace() { return _space + sizeof(*this); }

// Use plain binary number sequentially for the characters in s.
// @return: code length
int InitFromList(const ALPHABET *s, size_t n)
{
size_t i ;
this->_n = n ;
_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * n) ;
_space = sizeof(ALPHABET) * n ;
memset(_alphabetCode, 0, sizeof(_alphabetCode)) ;
memset(_alphabetCodeLen, 0, sizeof(_alphabetCodeLen)) ;

int codeLen = Utils::Log2Ceil(n) ;
for (i = 0 ; i < n ; ++i)
{
_alphabetList[i] = s[i] ;
_alphabetCode[ (int)s[i] ]= i ;
_alphabetCodeLen[ (int)s[i] ] = codeLen ;
}
_method = ALPHABET_CODE_PLAIN ;
return codeLen ;
}

// s: list of the characters
// freq: list of the frequencies for each character
// n: number of character
void InitHuffman(const ALPHABET *s, const uint64_t *freq, size_t n)
{
size_t i ;
this->_n = n ;
_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * n) ;
for (i = 0 ; i < n ; ++i)
_alphabetList[i] = s[i] ;

huffmanCode.InitFromFrequency(freq, n) ;

for (i = 0 ; i < n ; ++i)
{
int l ;
_alphabetCode[i] = huffmanCode.Encode(i, l) ;
_alphabetCodeLen[i] = l ;
}
_method = ALPHABET_CODE_HUFFMAN ;
}

size_t GetAlphabetCapacity() const
{
if (ALPHABET_CODE_NOCODE)
return 0 ;
else if (ALPHABET_CODE_PLAIN)
return 1<<(Utils::Log2Ceil(_n)) ;
else if (ALPHABET_CODE_HUFFMAN)
return _n ;
return 0 ;
}

size_t GetSize() const
{
return _n ;
}

// l: how many bits used in the coding
ALPHABET Decode(WORD c, int l) const
{
//l = _alphabetCodeLen[ (int)_alphabetList[i] ] ;
size_t i ;
if (_method == ALPHABET_CODE_NOCODE)
{
return c ;
}

if (_method == ALPHABET_CODE_PLAIN)
i = c ;
else
i = huffmanCode.Decode(c, l) ;
return _alphabetList[i] ;
}

WORD Encode(ALPHABET c, int &l) const
{
if (_method == ALPHABET_CODE_NOCODE)
{
//l = Utils::CountBits(c) ;
l = 0 ;
return c ;
}
else
{
l = _alphabetCodeLen[(int)c] ;
return _alphabetCode[(int)c] ;
}
}

WORD Encode(ALPHABET c) const
{
if (_method == ALPHABET_CODE_NOCODE)
return c ;
else
return _alphabetCode[(int)c] ;
}

// test whether the alphabet c is in the list
bool IsIn(ALPHABET c) const
{
size_t i ;
for (i = 0 ; i < _n ; ++i)
if (_alphabetList[i] == c)
return true ;
return false ;
}

Alphabet& operator=(const Alphabet &in)
{
Free() ;
_n = in._n ;
_space = in._space ;
_method = in._method ;

_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * _n) ;
_space = sizeof(ALPHABET) * _n ;
memcpy(_alphabetList, in._alphabetList, sizeof(ALPHABET) * _n ) ;
memcpy(_alphabetCode, in._alphabetCode, sizeof(_alphabetCode)) ;
memcpy(_alphabetCodeLen, in._alphabetCodeLen, sizeof(_alphabetCodeLen)) ;
huffmanCode = in.huffmanCode ;
return *this ;
}

void Save(FILE *fp)
{
SAVE_VAR(fp, _space) ;
SAVE_VAR(fp, _method) ;
SAVE_VAR(fp, _n) ;
fwrite(_alphabetList, sizeof(ALPHABET), _n, fp) ;
fwrite(_alphabetCode, sizeof(_alphabetCode[0]), 1<<(sizeof(ALPHABET) * 8), fp) ;
fwrite(_alphabetCodeLen, sizeof(_alphabetCodeLen[0]), 1<<(sizeof(ALPHABET) * 8), fp) ;
}

void Load(FILE *fp)
{
Free() ;
LOAD_VAR(fp, _space) ;
LOAD_VAR(fp, _method) ;
LOAD_VAR(fp, _n) ;

_alphabetList = (ALPHABET *)malloc(sizeof(ALPHABET) * _n) ;
fread(_alphabetList, sizeof(ALPHABET), _n, fp) ;
fread(_alphabetCode, sizeof(_alphabetCode[0]), 1<<(sizeof(ALPHABET) * 8), fp) ;
fread(_alphabetCodeLen, sizeof(_alphabetCodeLen[0]), 1<<(sizeof(ALPHABET) * 8), fp) ;
}
} ;
}
#endif
70 changes: 70 additions & 0 deletions compactds/Bitvector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#ifndef _MOURISL_COMPACTDS_BITVECTOR
#define _MOURISL_COMPACTDS_BITVECTOR

#include "Utils.hpp"

#define BITVECTOR_DEFAULT_SELECT_SPEED 3

// The overall functionality of bitvector
namespace compactds {
class Bitvector
{
protected:
size_t _space ;
public:
Bitvector() {_space = 0 ;}
~Bitvector() {}

// W is the plain bit vector
virtual void Init(const WORD *W, const size_t n) = 0 ;
virtual void Free() = 0 ;
virtual size_t GetSpace() = 0;

// Return the ith bits (0-based)
virtual int Access(size_t i) const = 0 ;
// Return the number of 1s before i
virtual size_t Rank1(size_t i, int inclusive = 1) const = 0 ;
// Return the index of th i-th (i is 1-based, so rank and select are inversible) 1
// it is for 1 only for now
virtual size_t Select(size_t i) const = 0 ;

// Return the rightmost 1 in [0..i]
// TODO: Handle the boundary cases
size_t Pred(size_t i) const
{
return Select( Rank1(i) ) ;
}

// Return the leftmost 1 in [i..n-1]
size_t Succ(size_t i) const
{
return Select( Rank1(i, /*inclusive=*/0) + 1 ) ;
}

// Return the number of 0s before i
size_t Rank0(size_t i, int inclusive = 1) const
{
// There are i+1 elements in [0..i], and Rank(i) of them are 1's
return i + inclusive - Rank1(i, inclusive) ;
}

size_t Rank(int type, size_t i, int inclusive = 1) const
{
if (type == 1)
return Rank1(i, inclusive) ;
else
return Rank0(i, inclusive) ;
}

virtual void Save(FILE *fp)
{
SAVE_VAR(fp, _space) ;
}

virtual void Load(FILE *fp)
{
LOAD_VAR(fp, _space) ;
}
} ;
}
#endif
Loading

0 comments on commit 266f341

Please sign in to comment.