diff --git a/include/oxli/storage.hh b/include/oxli/storage.hh index 1228f69b7a..52f7ad91a4 100644 --- a/include/oxli/storage.hh +++ b/include/oxli/storage.hh @@ -43,6 +43,7 @@ Contact: khmer-project@idyll.org #include #include using MuxGuard = std::lock_guard; +#include #include "gqf.h" @@ -137,6 +138,7 @@ public: _counts[i] = new Byte[tablebytes]; memset(_counts[i], 0, tablebytes); } + std::cout << "DEBUG: " << _counts[0] << std::endl << std::flush; } // Accessors for protected/private table info members diff --git a/khmer/_oxli/graphs.pyx b/khmer/_oxli/graphs.pyx index a054f8f687..0721f0028c 100644 --- a/khmer/_oxli/graphs.pyx +++ b/khmer/_oxli/graphs.pyx @@ -904,3 +904,17 @@ cdef class Nodegraph(Hashgraph): def containment(self, Nodegraph other): return deref(self._ng_this).containment(deref(other._ng_this)) + + def get_raw_tables(self): + cdef uint8_t ** table_ptrs = deref(self._ng_this).get_raw_tables() + cdef vector[uint64_t] sizes = deref(self._ng_this).get_tablesizes() + + new_raw_tables = [] + for table_idx, table_size in enumerate(sizes): + table = table_ptrs[table_idx] + counter = table[0] + new_table = [] + for i in range(table_size): + new_table.append(counter >> i) + new_raw_tables.append(new_table) + return new_raw_tables diff --git a/notebooks/debug_graphs.ipynb b/notebooks/debug_graphs.ipynb index 9b2168c580..0f99b03dd2 100644 --- a/notebooks/debug_graphs.ipynb +++ b/notebooks/debug_graphs.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -20,27 +20,31 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[0, 171, 134, 226, 57, 127, 0]" + "[0, 0, 0, 0, 0, 0, 0]" ] }, - "execution_count": 13, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ng.get_raw_tables()[0].tolist()" + "tables = []\n", + "counter = ng.get_raw_tables()[0][0]\n", + "for i, _ in enumerate(ng.get_raw_tables()[0].tolist()):\n", + " tables.append(counter >> i)\n", + "tables" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -49,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -58,7 +62,7 @@ "[0, 0, 0, 0, 0, 0, 0]" ] }, - "execution_count": 15, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -66,6 +70,13 @@ "source": [ "cg.get_raw_tables()[0].tolist()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/notebooks/nodegraph_similarity.ipynb b/notebooks/nodegraph_similarity.ipynb index d566318206..62e178fd5b 100644 --- a/notebooks/nodegraph_similarity.ipynb +++ b/notebooks/nodegraph_similarity.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 38, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ @@ -11,70 +11,67 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 79, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[0, 139, 170, 240, 110, 127, 0]" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 235, 90, 145, 27, 127, 0]\n", + "[[0, 0, 0, 0, 0, 0, 0]]\n" + ] } ], "source": [ "ng = Nodegraph(3, 10, 1)\n", - "ng.get_raw_tables()[0].tolist()" + "print(ng.get_raw_tables()[0].tolist())\n", + "print(fixed_raw(ng))" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 80, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[1, 139, 170, 240, 110, 127, 0]" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 235, 90, 145, 27, 127, 0]\n", + "[[0, 0, 0, 0, 0, 0, 1]]\n" + ] } ], "source": [ "ng.count('AAA')\n", - "ng.get_raw_tables()[0].tolist()" + "print(ng.get_raw_tables()[0].tolist())\n", + "print(fixed_raw(ng))" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 81, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[17, 139, 170, 240, 110, 127, 0]" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[17, 235, 90, 145, 27, 127, 0]\n", + "[[0, 0, 0, 0, 0, 0, 17]]\n" + ] } ], "source": [ "ng.count('CGC')\n", - "ng.get_raw_tables()[0].tolist()" + "print(ng.get_raw_tables()[0].tolist())\n", + "print(fixed_raw(ng))" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 82, "metadata": {}, "outputs": [ { @@ -83,7 +80,7 @@ "0.5" ] }, - "execution_count": 41, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -96,61 +93,78 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 83, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[1, 139, 170, 240, 110, 127, 0]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 235, 90, 145, 27, 127, 0]\n", + "[[0, 0, 0, 0, 0, 0, 1]]\n" + ] } ], "source": [ - "other_ng.get_raw_tables()[0].tolist()" + "print(other_ng.get_raw_tables()[0].tolist())\n", + "print(fixed_raw(other_ng))" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 84, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[17, 139, 170, 240, 110, 127, 0]" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[17, 235, 90, 145, 27, 127, 0]\n", + "[[0, 0, 0, 0, 0, 0, 17]]\n" + ] } ], "source": [ - "ng.get_raw_tables()[0].tolist()" + "print(ng.get_raw_tables()[0].tolist())\n", + "print(fixed_raw(ng))" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "def fixed_raw(ng):\n", + " raw_tables = ng.get_raw_tables()\n", + " new_raw_tables = []\n", + " for table in raw_tables:\n", + " counter = table[0]\n", + " new_table = []\n", + " for i in table:\n", + " new_table.append(counter >> i)\n", + " new_raw_tables.append(new_table)\n", + " return new_raw_tables" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "def py_similarity(a, b):\n", - " a_tables = a.get_raw_tables()\n", - " b_tables = b.get_raw_tables()\n", + " a_tables = fixed_raw(a)\n", + " b_tables = fixed_raw(b)\n", " \n", " intersection = 0\n", " union_size = 0\n", " for i, (me, other) in enumerate(zip(a_tables, b_tables)):\n", - " for v, t in zip(me.tolist(), other.tolist()):\n", + " for v, t in zip(me, other):\n", " intersection += bin(v & t).count('1')\n", " union_size += bin(v | t).count('1')\n", " print(v, t, intersection, union_size)\n", + "\n", " if union_size == 0:\n", " union_size = 1\n", " \n", @@ -159,29 +173,29 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 87, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "17 1 1 2\n", - "139 139 5 6\n", - "170 170 9 10\n", - "240 240 13 14\n", - "110 110 18 19\n", - "127 127 25 26\n", - "0 0 25 26\n" + "0 0 0 0\n", + "0 0 0 0\n", + "0 0 0 0\n", + "0 0 0 0\n", + "0 0 0 0\n", + "0 0 0 0\n", + "17 1 1 2\n" ] }, { "data": { "text/plain": [ - "0.9615384615384616" + "0.5" ] }, - "execution_count": 49, + "execution_count": 87, "metadata": {}, "output_type": "execute_result" } @@ -192,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 88, "metadata": {}, "outputs": [], "source": [ @@ -202,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 89, "metadata": {}, "outputs": [ { @@ -211,7 +225,7 @@ "0.0" ] }, - "execution_count": 18, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" } @@ -223,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 90, "metadata": {}, "outputs": [ { @@ -232,7 +246,7 @@ "0.0" ] }, - "execution_count": 19, + "execution_count": 90, "metadata": {}, "output_type": "execute_result" } @@ -244,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 91, "metadata": {}, "outputs": [ { @@ -253,7 +267,7 @@ "0.0" ] }, - "execution_count": 20, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -265,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 92, "metadata": {}, "outputs": [ { @@ -274,7 +288,7 @@ "1.0" ] }, - "execution_count": 21, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } @@ -286,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 93, "metadata": {}, "outputs": [ { @@ -295,7 +309,7 @@ "0.5" ] }, - "execution_count": 22, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } @@ -307,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ @@ -317,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 95, "metadata": {}, "outputs": [ { @@ -326,7 +340,7 @@ "0.0" ] }, - "execution_count": 24, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -338,7 +352,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 96, "metadata": {}, "outputs": [ { @@ -347,7 +361,7 @@ "0.0" ] }, - "execution_count": 25, + "execution_count": 96, "metadata": {}, "output_type": "execute_result" } @@ -359,7 +373,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 97, "metadata": {}, "outputs": [ { @@ -368,7 +382,7 @@ "0.0" ] }, - "execution_count": 26, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -380,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 98, "metadata": {}, "outputs": [ { @@ -389,7 +403,7 @@ "1.0" ] }, - "execution_count": 27, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } @@ -401,7 +415,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 99, "metadata": {}, "outputs": [ { @@ -410,7 +424,7 @@ "1.0" ] }, - "execution_count": 28, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/test_nodegraph.py b/tests/test_nodegraph.py index a5d1d42a7b..fbc18c833b 100755 --- a/tests/test_nodegraph.py +++ b/tests/test_nodegraph.py @@ -632,6 +632,24 @@ def test_extract_unique_paths_4(): assert x == ['TGGAGAGACACAGATAGACAGG', 'TAGACAGGAGTGGCGAT'] +def test_get_tiny_raw_tables(): + kh = khmer.Nodegraph(3, 7, 1) + tables = kh.get_raw_tables() + for size, table in zip(kh.hashsizes(), tables): + assert isinstance(table, memoryview) + assert size == len(table) + assert all(t == 0 for t in table.tolist()) + + kh.consume('ATG') + kh.consume('AGT') + kh.consume('ATA') + tables = kh.get_raw_tables() + + for size, table in zip(kh.hashsizes(), tables): + assert isinstance(table, memoryview) + assert size == len(table) + + def test_get_raw_tables(): kh = khmer.Nodegraph(10, 1e6, 4) kh.consume('ATGGAGAGAC')