From f12548780f91453a0f957059003f0711986be026 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Mon, 7 May 2018 20:18:11 +0000 Subject: [PATCH] Add containment --- include/oxli/hashgraph.hh | 1 + include/oxli/storage.hh | 1 + khmer/_oxli/graphs.pxd | 1 + khmer/_oxli/graphs.pyx | 3 +++ src/oxli/hashgraph.cc | 17 +++++++++++++++++ src/oxli/storage.cc | 28 ++++++++++++++++++++++++++++ 6 files changed, 51 insertions(+) diff --git a/include/oxli/hashgraph.hh b/include/oxli/hashgraph.hh index f0d0d4e2db..d26d745156 100644 --- a/include/oxli/hashgraph.hh +++ b/include/oxli/hashgraph.hh @@ -294,6 +294,7 @@ public: void update_from(const Nodegraph &other); double similarity(const Nodegraph &other); + double containment(const Nodegraph &other); }; } diff --git a/include/oxli/storage.hh b/include/oxli/storage.hh index 37a69d9df8..1228f69b7a 100644 --- a/include/oxli/storage.hh +++ b/include/oxli/storage.hh @@ -227,6 +227,7 @@ public: void update_from(const BitStorage&); double similarity(const BitStorage&); + double containment(const BitStorage&); }; diff --git a/khmer/_oxli/graphs.pxd b/khmer/_oxli/graphs.pxd index 16e8627591..6a275d7729 100644 --- a/khmer/_oxli/graphs.pxd +++ b/khmer/_oxli/graphs.pxd @@ -205,6 +205,7 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil: void update_from(const CpNodegraph &) except +oxli_raise_py_error double similarity(const CpNodegraph &) except +oxli_raise_py_error + double containment(const CpNodegraph &) except +oxli_raise_py_error cdef extern from "oxli/labelhash.hh" namespace "oxli": diff --git a/khmer/_oxli/graphs.pyx b/khmer/_oxli/graphs.pyx index bf9c9b4f5e..41212702a8 100644 --- a/khmer/_oxli/graphs.pyx +++ b/khmer/_oxli/graphs.pyx @@ -869,3 +869,6 @@ cdef class Nodegraph(Hashgraph): def similarity(self, Nodegraph other): return deref(self._ng_this).similarity(deref(other._ng_this)) + + def containment(self, Nodegraph other): + return deref(self._ng_this).containment(deref(other._ng_this)) diff --git a/src/oxli/hashgraph.cc b/src/oxli/hashgraph.cc index cc8aa0a3fe..3dd79d2be3 100644 --- a/src/oxli/hashgraph.cc +++ b/src/oxli/hashgraph.cc @@ -923,6 +923,23 @@ double Nodegraph::similarity(const Nodegraph &otherBASE) } } +double Nodegraph::containment(const Nodegraph &otherBASE) +{ + if (_ksize != otherBASE._ksize) { + throw oxli_exception("both nodegraphs must have same k size"); + } + BitStorage * myself = dynamic_cast(this->store); + const BitStorage * other; + other = dynamic_cast(otherBASE.store); + + // if dynamic_cast worked, then the pointers will be not null. + if (myself && other) { + return myself->containment(*other); + } else { + throw oxli_exception("similarity failed with incompatible objects"); + } +} + template void Hashgraph::consume_seqfile_and_tag( std::string const &filename, unsigned int &total_reads, diff --git a/src/oxli/storage.cc b/src/oxli/storage.cc index 8e9f559b20..83d45df95a 100644 --- a/src/oxli/storage.cc +++ b/src/oxli/storage.cc @@ -123,6 +123,34 @@ double BitStorage::similarity(const BitStorage& other) return double(intersection) / double(union_size); } +double BitStorage::containment(const BitStorage& other) +{ + if (_tablesizes != other._tablesizes) { + throw oxli_exception("both nodegraphs must have same table sizes"); + } + + uint64_t intersection = 0; + uint64_t self_size = 0; + for (unsigned int table_num = 0; table_num < _n_tables; table_num++) { + Byte * me = _counts[table_num]; + Byte * ot = other._counts[table_num]; + uint64_t tablesize = _tablesizes[table_num]; + uint64_t tablebytes = tablesize / 8 + 1; + + for (uint64_t index = 0; index < tablebytes; index++) { + // First, get how many values in common we have + intersection += __builtin_popcountll(me[index] & ot[index]); + self_size += __builtin_popcountll(me[index]); + } + } + + if (self_size == 0) { + self_size = 1; + } + + return double(intersection) / double(self_size); +} + void BitStorage::save(std::string outfilename, WordLength ksize) { if (!_counts[0]) {