Skip to content

Commit

Permalink
[MRG] minor cleanup in sourmash_args & sig submodules (#1586)
Browse files Browse the repository at this point in the history
* various cleanups of sourmash_args

* cleanup flakes errors

* clean up sourmash.sig submodule
  • Loading branch information
ctb authored Jun 14, 2021
1 parent 05d0df2 commit ff75ec0
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 52 deletions.
51 changes: 14 additions & 37 deletions src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import sourmash
from sourmash.sourmash_args import FileOutput

from sourmash.logging import set_quiet, error, notify, set_quiet, print_results, debug
from sourmash.logging import set_quiet, error, notify, print_results, debug
from sourmash import sourmash_args
from sourmash.minhash import _get_max_hash_for_scaled

Expand Down Expand Up @@ -122,7 +122,6 @@ def split(args):

progress = sourmash_args.SignatureLoadingProgress()

total = 0
for sigfile in args.signatures:
# load signatures from input file:
this_siglist = sourmash_args.load_file_as_signatures(sigfile,
Expand Down Expand Up @@ -175,9 +174,8 @@ def split(args):

notify('loaded {} signatures from {}...', n_signatures, sigfile,
end='\r')
total += n_signatures

notify('loaded and split {} signatures total.', total)
notify(f'loaded and split {len(progress)} signatures total.')


def describe(args):
Expand All @@ -201,14 +199,11 @@ def describe(args):
# load signatures and display info.
progress = sourmash_args.SignatureLoadingProgress()

n_loaded = 0
for signature_file in args.signatures:
try:
loader = sourmash_args.load_file_as_signatures(signature_file,
progress=progress)
for sig in loader:
n_loaded += 1

# extract info, write as appropriate.
mh = sig.minhash
ksize = mh.ksize
Expand Down Expand Up @@ -245,7 +240,7 @@ def describe(args):
error('(continuing)')
raise

notify('loaded {} signatures total.', n_loaded)
notify(f'loaded {len(progress)} signatures total.')

if csv_fp:
csv_fp.close()
Expand Down Expand Up @@ -377,7 +372,7 @@ def merge(args):
if this_n:
notify('loaded and merged {} signatures from {}...', this_n, sigfile, end='\r')

if not total_loaded:
if not len(progress):
error("no signatures to merge!?")
sys.exit(-1)

Expand All @@ -386,7 +381,7 @@ def merge(args):
with FileOutput(args.output, 'wt') as fp:
sourmash.save_signatures([merged_sigobj], fp=fp)

notify('loaded and merged {} signatures', total_loaded)
notify(f'loaded and merged {len(progress)} signatures')


def intersect(args):
Expand All @@ -400,7 +395,6 @@ def intersect(args):

first_sig = None
mins = None
total_loaded = 0

progress = sourmash_args.SignatureLoadingProgress()

Expand All @@ -419,10 +413,9 @@ def intersect(args):
sys.exit(-1)

mins.intersection_update(sigobj.minhash.hashes)
total_loaded += 1
notify('loaded and intersected signatures from {}...', sigfile, end='\r')

if total_loaded == 0:
if len(progress) == 0:
error("no signatures to merge!?")
sys.exit(-1)

Expand Down Expand Up @@ -454,7 +447,7 @@ def intersect(args):
with FileOutput(args.output, 'wt') as fp:
sourmash.save_signatures([intersect_sigobj], fp=fp)

notify('loaded and intersected {} signatures', total_loaded)
notify(f'loaded and intersected {len(progress)} signatures')


def subtract(args):
Expand All @@ -478,7 +471,6 @@ def subtract(args):

progress = sourmash_args.SignatureLoadingProgress()

total_loaded = 0
for sigfile in args.subtraction_sigs:
for sigobj in sourmash_args.load_file_as_signatures(sigfile,
ksize=args.ksize,
Expand All @@ -495,9 +487,8 @@ def subtract(args):
subtract_mins -= set(sigobj.minhash.hashes)

notify('loaded and subtracted signatures from {}...', sigfile, end='\r')
total_loaded += 1

if not total_loaded:
if not len(progress):
error("no signatures to subtract!?")
sys.exit(-1)

Expand All @@ -510,7 +501,7 @@ def subtract(args):
with FileOutput(args.output, 'wt') as fp:
sourmash.save_signatures([subtract_sigobj], fp=fp)

notify('loaded and subtracted {} signatures', total_loaded)
notify(f'loaded and subtracted {len(progress)} signatures')


def rename(args):
Expand Down Expand Up @@ -538,7 +529,7 @@ def rename(args):

save_sigs.close()

notify("set name to '{}' on {} signatures", args.name, len(save_sigs))
notify(f"set name to '{args.name}' on {len(save_sigs)} signatures")


def extract(args):
Expand All @@ -553,16 +544,13 @@ def extract(args):
save_sigs = sourmash_args.SaveSignaturesToLocation(args.output)
save_sigs.open()

total_loaded = 0
for filename in args.signatures:
siglist = sourmash_args.load_file_as_signatures(filename,
ksize=args.ksize,
select_moltype=moltype,
progress=progress)
siglist = list(siglist)

total_loaded += len(siglist)

# select!
if args.md5 is not None:
siglist = [ ss for ss in siglist if args.md5 in ss.md5sum() ]
Expand All @@ -572,8 +560,7 @@ def extract(args):
for ss in siglist:
save_sigs.add(ss)

notify("loaded {} total that matched ksize & molecule type",
total_loaded)
notify(f"loaded {len(progress)} total that matched ksize & molecule type")
if not save_sigs:
error("no matching signatures!")
sys.exit(-1)
Expand All @@ -596,16 +583,13 @@ def filter(args):
save_sigs = sourmash_args.SaveSignaturesToLocation(args.output)
save_sigs.open()

total_loaded = 0
for filename in args.signatures:
siglist = sourmash_args.load_file_as_signatures(filename,
ksize=args.ksize,
select_moltype=moltype,
progress=progress)
siglist = list(siglist)

total_loaded += len(siglist)

# select!
if args.md5 is not None:
siglist = [ ss for ss in siglist if args.md5 in ss.md5sum() ]
Expand Down Expand Up @@ -636,8 +620,7 @@ def filter(args):

save_sigs.close()

notify("loaded {} total that matched ksize & molecule type",
total_loaded)
notify(f"loaded {len(progress)} total that matched ksize & molecule type")
notify("extracted {} signatures from {} file(s)", len(save_sigs),
len(args.signatures))

Expand All @@ -654,16 +637,13 @@ def flatten(args):
save_sigs = sourmash_args.SaveSignaturesToLocation(args.output)
save_sigs.open()

total_loaded = 0
for filename in args.signatures:
siglist = sourmash_args.load_file_as_signatures(filename,
ksize=args.ksize,
select_moltype=moltype,
progress=progress)
siglist = list(siglist)

total_loaded += len(siglist)

# select!
if args.md5 is not None:
siglist = [ ss for ss in siglist if args.md5 in ss.md5sum() ]
Expand All @@ -676,8 +656,7 @@ def flatten(args):

save_sigs.close()

notify("loaded {} total that matched ksize & molecule type",
total_loaded)
notify(f"loaded {len(progress)} total that matched ksize & molecule type")
notify("extracted {} signatures from {} file(s)", len(save_sigs),
len(args.signatures))

Expand All @@ -702,7 +681,6 @@ def downsample(args):

progress = sourmash_args.SignatureLoadingProgress()

total_loaded = 0
for sigfile in args.signatures:
siglist = sourmash_args.load_file_as_signatures(sigfile,
ksize=args.ksize,
Expand All @@ -713,7 +691,6 @@ def downsample(args):
mh = sigobj.minhash

notify('loading and downsampling signature from {}...', sigfile, end='\r')
total_loaded += 1
if args.scaled:
if mh.scaled:
mh_new = mh.downsample(scaled=args.scaled)
Expand Down Expand Up @@ -743,7 +720,7 @@ def downsample(args):

save_sigs.close()

notify("loaded and downsampled {} signatures", total_loaded)
notify(f"loaded and downsampled {len(progress)} signatures")


def sig_import(args):
Expand Down
28 changes: 13 additions & 15 deletions src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,22 @@
"""
import sys
import os
import argparse
import itertools
from enum import Enum
import traceback
import gzip
import zipfile

import screed
import sourmash

from sourmash.sbtmh import load_sbt_index
from sourmash.lca.lca_db import load_single_database
import sourmash.exceptions

from . import signature
from .logging import notify, error, debug_literal

from .index import (LinearIndex, ZipFileLinearIndex, MultiIndex)
from . import signature as sig
from .sbt import SBT
from .sbtmh import SigLeaf
from .lca import LCA_Database
import sourmash
from . import signature as sigmod

DEFAULT_LOAD_K = 31

Expand Down Expand Up @@ -304,7 +298,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
db = load_fn(filename,
traverse_yield_all=traverse_yield_all,
cache_size=cache_size)
except ValueError as exc:
except ValueError:
debug_literal(f"_load_databases: FAIL on fn {desc}.")
debug_literal(traceback.format_exc())

Expand All @@ -321,7 +315,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
# CTB: could be kind of time consuming for a big record, but at the
# moment screed doesn't expose format detection cleanly.
with screed.open(filename) as it:
record = next(iter(it))
_ = next(iter(it))
successful_screed_load = True
except:
pass
Expand All @@ -338,7 +332,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
return db


def load_file_as_index(filename, yield_all_files=False):
def load_file_as_index(filename, *, yield_all_files=False):
"""Load 'filename' as a database; generic database loader.
If 'filename' contains an SBT or LCA indexed database, or a regular
Expand All @@ -356,7 +350,7 @@ def load_file_as_index(filename, yield_all_files=False):
return _load_database(filename, yield_all_files)


def load_file_as_signatures(filename, select_moltype=None, ksize=None,
def load_file_as_signatures(filename, *, select_moltype=None, ksize=None,
yield_all_files=False,
progress=None):
"""Load 'filename' as a collection of signatures. Return an iterable.
Expand All @@ -382,7 +376,7 @@ def load_file_as_signatures(filename, select_moltype=None, ksize=None,
db = db.select(moltype=select_moltype, ksize=ksize)
loader = db.signatures()

if progress:
if progress is not None:
return progress.start_file(filename, loader)
else:
return loader
Expand Down Expand Up @@ -501,6 +495,9 @@ def __init__(self, reporting_interval=10):
self.interval = reporting_interval
self.screen_width = 79

def __len__(self):
return self.n_sig

def short_notify(self, msg_template, *args, **kwargs):
"""Shorten the notification message so that it fits on one line.
Expand Down Expand Up @@ -626,7 +623,7 @@ def add(self, ss):
i += 1

with gzip.open(outname, "wb") as fp:
sig.save_signatures([ss], fp, compression=1)
sigmod.save_signatures([ss], fp, compression=1)


class SaveSignatures_SigFile(_BaseSaveSignaturesToLocation):
Expand Down Expand Up @@ -689,7 +686,8 @@ def _exists(self, name):
return False

def add(self, ss):
assert self.zf
if not self.zf:
raise ValueError("this output is not open")
super().add(ss)

md5 = ss.md5sum()
Expand Down

0 comments on commit ff75ec0

Please sign in to comment.