Skip to content

Commit

Permalink
add similarity method to nodegraph and bitstorage
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Feb 15, 2018
1 parent a6e1189 commit ef4e0ed
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/oxli/hashgraph.hh
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ public:
: Hashgraph(ksize, new BitStorage(sizes)) { } ;

void update_from(const Nodegraph &other);
double similarity(const Nodegraph &other);
};

}
Expand Down
1 change: 1 addition & 0 deletions include/oxli/storage.hh
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ public:
}

void update_from(const BitStorage&);
double similarity(const BitStorage&);
};


Expand Down
1 change: 1 addition & 0 deletions khmer/_oxli/graphs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil:
CpNodegraph(WordLength, vector[uint64_t])

void update_from(const CpNodegraph &) except +oxli_raise_py_error
double similarity(const CpNodegraph &) except +oxli_raise_py_error


cdef extern from "oxli/labelhash.hh" namespace "oxli":
Expand Down
3 changes: 3 additions & 0 deletions khmer/_oxli/graphs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -866,3 +866,6 @@ cdef class Nodegraph(Hashgraph):

def update(self, Nodegraph other):
deref(self._ng_this).update_from(deref(other._ng_this))

def similarity(self, Nodegraph other):
return deref(self._ng_this).similarity(deref(other._ng_this))
17 changes: 17 additions & 0 deletions src/oxli/hashgraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,23 @@ void Nodegraph::update_from(const Nodegraph &otherBASE)
}
}

double Nodegraph::similarity(const Nodegraph &otherBASE)
{
if (_ksize != otherBASE._ksize) {
throw oxli_exception("both nodegraphs must have same k size");
}
BitStorage * myself = dynamic_cast<BitStorage *>(this->store);
const BitStorage * other;
other = dynamic_cast<const BitStorage*>(otherBASE.store);

// if dynamic_cast worked, then the pointers will be not null.
if (myself && other) {
return myself->similarity(*other);
} else {
throw oxli_exception("similarity failed with incompatible objects");
}
}

template void Hashgraph::consume_seqfile_and_tag<read_parsers::FastxReader>(
std::string const &filename,
unsigned int &total_reads,
Expand Down
27 changes: 27 additions & 0 deletions src/oxli/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,33 @@ void BitStorage::update_from(const BitStorage& other)
}
}

double BitStorage::similarity(const BitStorage& other)
{
if (_tablesizes != other._tablesizes) {
throw oxli_exception("both nodegraphs must have same table sizes");
}

uint64_t intersection = 0;
uint64_t union_size = 0;
for (unsigned int table_num = 0; table_num < _n_tables; table_num++) {
Byte * me = _counts[table_num];
Byte * ot = other._counts[table_num];
uint64_t tablesize = _tablesizes[table_num];
uint64_t tablebytes = tablesize / 8 + 1;

for (uint64_t index = 0; index < tablebytes; index++) {
// First, get how many values in common we have
intersection += __builtin_popcountll(me[index] & ot[index]);
union_size += __builtin_popcountll(me[index] | ot[index]);
}
}

if (union_size == 0) {
union_size = 1;
}

return double(intersection) / double(union_size);
}

void BitStorage::save(std::string outfilename, WordLength ksize)
{
Expand Down
20 changes: 20 additions & 0 deletions tests/test_nodegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,26 @@ def test_update_from_diff_num_tables():
print(str(err))


def test_similarity_1():
nodegraph = khmer.Nodegraph(5, 1000, 4)
other_nodegraph = khmer.Nodegraph(5, 1000, 4)

assert nodegraph.similarity(other_nodegraph) == 0

other_nodegraph.count('AAAAA')

assert nodegraph.similarity(other_nodegraph) == 0

nodegraph.count('GCGCG')

assert nodegraph.similarity(other_nodegraph) == 0

nodegraph.count('AAAAA')
other_nodegraph.count('GCGCG')

assert nodegraph.similarity(other_nodegraph) == 1


def test_n_occupied_1():
filename = utils.get_test_data('random-20-a.fa')

Expand Down

0 comments on commit ef4e0ed

Please sign in to comment.