Skip to content

Commit

Permalink
Merge pull request #4079 from vgteam/no-translation
Browse files Browse the repository at this point in the history
Convert GBWTGraph to GFA without using translation
  • Loading branch information
jltsiren authored Sep 8, 2023
2 parents bcd5712 + 699a1de commit 01ecaa3
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 9 deletions.
31 changes: 24 additions & 7 deletions src/subcommand/convert_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ int main_convert(int argc, char** argv) {
bool rgfa_pline = false;
bool wline = true;
algorithm_type gfa_output_algorithm = ALGORITHM_DEFAULT;
int num_threads = omp_get_max_threads(); // For GBWTGraph to GFA.

// For GBWTGraph to GFA.
int num_threads = omp_get_max_threads();
bool use_translation = true;

if (argc == 2) {
help_convert(argv);
Expand All @@ -74,6 +77,7 @@ int main_convert(int argc, char** argv) {
constexpr int OPT_REF_SAMPLE = 1000;
constexpr int OPT_GBWTGRAPH_ALGORITHM = 1001;
constexpr int OPT_VG_ALGORITHM = 1002;
constexpr int OPT_NO_TRANSLATION = 1003;

int c;
optind = 2; // force optind past command positional argument
Expand All @@ -98,6 +102,7 @@ int main_convert(int argc, char** argv) {
{"no-wline", no_argument, 0, 'W'},
{"gbwtgraph-algorithm", no_argument, 0, OPT_GBWTGRAPH_ALGORITHM},
{"vg-algorithm", no_argument, 0, OPT_VG_ALGORITHM},
{"no-translation", no_argument, 0, OPT_NO_TRANSLATION},
{"gam-to-gaf", required_argument, 0, 'G'},
{"gaf-to-gam", required_argument, 0, 'F'},
{"threads", required_argument, 0, 't'},
Expand Down Expand Up @@ -173,6 +178,9 @@ int main_convert(int argc, char** argv) {
case OPT_VG_ALGORITHM:
gfa_output_algorithm = algorithm_vg;
break;
case OPT_NO_TRANSLATION:
use_translation = false;
break;
case 'G':
no_multiple_inputs(input);
input = input_gam;
Expand Down Expand Up @@ -411,6 +419,7 @@ int main_convert(int argc, char** argv) {

gbwtgraph::GFAExtractionParameters parameters;
parameters.num_threads = num_threads;
parameters.use_translation = use_translation;
gbwtgraph::gbwt_to_gfa(*gbwt_graph, std::cout, parameters);
} else if (gfa_output_algorithm == algorithm_vg) {
// Use HandleGraph GFA conversion code
Expand Down Expand Up @@ -468,14 +477,22 @@ void help_convert(char** argv) {
<< " -p, --packed-out output in PackedGraph format [default]" << endl
<< " -x, --xg-out output in XG format" << endl
<< " -f, --gfa-out output in GFA format" << endl
<< " -H, --drop-haplotypes do not include haplotype paths in the output (useful with GBWTGraph / GBZ inputs)" << endl
<< " -H, --drop-haplotypes do not include haplotype paths in the output" << endl
<< " (useful with GBWTGraph / GBZ inputs)" << endl
<< "gfa output options (use with -f):" << endl
<< " -P, --rgfa-path STR write given path as rGFA tags instead of lines (multiple allowed, only rank-0 supported)" << endl
<< " -Q, --rgfa-prefix STR write paths with given prefix as rGFA tags instead of lines (multiple allowed, only rank-0 supported)" << endl
<< " -P, --rgfa-path STR write given path as rGFA tags instead of lines" << endl
<< " (multiple allowed, only rank-0 supported)" << endl
<< " -Q, --rgfa-prefix STR write paths with given prefix as rGFA tags instead of lines" << endl
<< " (multiple allowed, only rank-0 supported)" << endl
<< " -B, --rgfa-pline paths written as rGFA tags also written as lines" << endl
<< " -W, --no-wline write all paths as GFA P-lines instead of W-lines. Allows handling multiple phase blocks and subranges used together." << endl
<< " --gbwtgraph-algorithm Always use the GBWTGraph library GFA algorithm. Not compatible with other GBWT output options or non-GBWT graphs." << endl
<< " --vg-algorithm Always use the VG GFA algorithm. Works with all options and graph types, but can't preserve original GFA coordinates." << endl
<< " -W, --no-wline Write all paths as GFA P-lines instead of W-lines." << endl
<< " Allows handling multiple phase blocks and subranges used together." << endl
<< " --gbwtgraph-algorithm Always use the GBWTGraph library GFA algorithm." << endl
<< " Not compatible with other GFA output options or non-GBWT graphs." << endl
<< " --vg-algorithm Always use the VG GFA algorithm. Works with all options and graph types," << endl
<< " but can't preserve original GFA coordinates." << endl
<< " --no-translation When using the GBWTGraph algorith, convert the graph directly to GFA." << endl
<< " Do not use the translation to preserve original coordinates." << endl
<< "alignment options:" << endl
<< " -G, --gam-to-gaf FILE convert GAM FILE to GAF" << endl
<< " -F, --gaf-to-gam FILE convert GAF FILE to GAM" << endl
Expand Down
12 changes: 11 additions & 1 deletion test/t/48_vg_convert.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PATH=../bin:$PATH # for vg

export LC_ALL="C" # force a consistent sort order

plan tests 102
plan tests 106

vg construct -r complex/c.fa -v complex/c.vcf.gz > c.vg
cat <(vg view c.vg | grep ^S | sort) <(vg view c.vg | grep L | uniq | wc -l) <(vg paths -v c.vg -E) > c.info
Expand Down Expand Up @@ -361,13 +361,23 @@ vg convert -f components.gbz | sort > sorted.gfa
cmp sorted.gfa correct.gfa
is $? 0 "GBZ to GFA conversion works with multiple threads"

# GFA extraction from GBZ with/without translation.
vg gbwt --gbz-format -g chopping.gbz --max-node 2 -G graphs/chopping_walks.gfa
vg convert -f -t 1 chopping.gbz > with-translation.gfa
is $? 0 "GBZ to GFA with translation"
is "$(grep -c "^S" with-translation.gfa)" "8" "8 segments"
vg convert -f -t 1 --no-translation chopping.gbz > no-translation.gfa
is $? 0 "GBZ to GFA without translation"
is "$(grep -c "^S" no-translation.gfa)" "9" "9 segments"

rm -f components.gbwt components.gg components.gbz
rm -f direct.hg correct_paths.gaf correct_haplotypes.gaf
rm -f components.hg hg_paths.gaf hg_haplotypes.gaf gbz_hg_paths.gaf gbz_hg_haplotypes.gaf
rm -f components.xg xg_paths.gaf xg_haplotypes.gaf gbz_xg_paths.gaf gbz_xg_haplotypes.gaf
rm -f no_haplotypes.xg no_haplotypes.hg
rm -f extracted.gfa gbz.gfa extracted.hg
rm -f sorted.gfa correct.gfa
rm -f chopping.gbz with-translation.gfa no-translation.gfa

#####
# Reference path conversion
Expand Down

0 comments on commit 01ecaa3

Please sign in to comment.