Skip to content

Commit

Permalink
[MRG] output unassigned hashes when there are no gather matches (#613)
Browse files Browse the repository at this point in the history
Fixes #612 - gather fails to output unassigned hashes when no matches. Adjusts behavior so that full query signature is output as unassigned, instead.
  • Loading branch information
ctb authored Jan 9, 2019
1 parent 1d1f61b commit 314bd8e
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
15 changes: 6 additions & 9 deletions sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,7 @@ def gather(args):
sourmash_args.get_moltype(query))

# verify signature was computed right.
if query.minhash.max_hash == 0:
if query.minhash.scaled == 0:
error('query signature needs to be created with --scaled')
sys.exit(-1)

Expand All @@ -1043,6 +1043,8 @@ def gather(args):

found = []
weighted_missed = 1
new_max_hash = query.minhash.max_hash
next_query = query
for result, weighted_missed, new_max_hash, next_query in gather_databases(query, databases, args.threshold_bp, args.ignore_abundance):
# print interim result & save in a list for later use
pct_query = '{:.1f}%'.format(result.f_orig_query*100)
Expand Down Expand Up @@ -1086,10 +1088,7 @@ def gather(args):
(1 - weighted_missed) * 100)
print_results('')

if not found:
sys.exit(0)

if args.output:
if found and args.output:
fieldnames = ['intersect_bp', 'f_orig_query', 'f_match',
'f_unique_to_query', 'f_unique_weighted',
'average_abund', 'median_abund', 'std_abund', 'name', 'filename', 'md5']
Expand All @@ -1100,15 +1099,13 @@ def gather(args):
del d['leaf'] # actual signature not in CSV.
w.writerow(d)

if args.save_matches:
if found and args.save_matches:
outname = args.save_matches.name
notify('saving all matches to "{}"', outname)
sig.save_signatures([ r.leaf for r in found ], args.save_matches)

if args.output_unassigned:
if not found:
notify('nothing found - entire query signature unassigned.')
elif not len(query.minhash):
if not len(query.minhash):
notify('no unassigned hashes! not saving.')
else:
outname = args.output_unassigned.name
Expand Down
1 change: 1 addition & 0 deletions tests/test-data/2.fa.sig

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -2976,6 +2976,25 @@ def test_gather_metagenome_output_unassigned():
assert all(('1.3 Mbp 13.6% 28.2%' in out,
'NC_011294.1' in out))


@utils.in_tempdir
def test_gather_metagenome_output_unassigned_nomatches(c):
# test --output-unassigned when there are no matches
query_sig = utils.get_test_data('2.fa.sig')
against_sig = utils.get_test_data('47.fa.sig')

c.run_sourmash('gather', query_sig, against_sig,
'--output-unassigned', 'foo.sig')

print(c.last_result.out)
assert 'found 0 matches total;' in c.last_result.out

x = sourmash.load_one_signature(query_sig, ksize=31)
y = sourmash.load_one_signature(c.output('foo.sig'))

assert x.minhash == y.minhash


def test_gather_metagenome_downsample():
with utils.TempDirectory() as location:
testdata_glob = utils.get_test_data('gather/GCF*.sig')
Expand Down

0 comments on commit 314bd8e

Please sign in to comment.