-
Notifications
You must be signed in to change notification settings - Fork 6
/
mixup-by-region.sh
executable file
·73 lines (68 loc) · 3.85 KB
/
mixup-by-region.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Written 2013 by Peter Ralph and Graham Coop
#
# contact: [email protected]
#
# To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
#
# You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Usage:
# mixup-by-regions.sh (chromosome number)
# Numbers of individuals we have:
# Denmark Latvia Slovakia Slovenia Bulgaria
# 1 1 1 1 2
# Finland Norway Ukraine Albania Cyprus
# 2 2 2 3 4
# Russia Greece Turkey Czech Republic Sweden
# 5 6 7 12 12
# Austria Romania Netherlands Hungary Poland
# 13 16 17 20 20
# Belgium Ireland Yugoslavia Germany France
# 36 60 70 75 98
# Spain Portugal Italy United Kingdom Switzerland
# 128 129 193 394 1050
#
# ... restrict to Hungary and above.
if [ -d /home/ibd/data ]
then
BASEDIR=/home/ibd/data
else
BASEDIR=$HOME/projects/ibd/data
fi
SCRIPTDIR=/home/peter/projects/genome
# Note must be in the same order as in Euro-samples-info.tsv.
# COUNTRIES='"France" "Spain" "Poland" "Hungary" "Yugoslavia" "Switzerland" "Italy" "Germany" "Portugal" "United.Kingdom" "Belgium" "Ireland"'
# COUNTRYPATT='\(France\)\|\(Spain\)\|\(Poland\)\|\(Hungary\)\|\(Yugoslavia\)\|\(Switzerland\)\|\(Italy\)\|\(Germany\)\|\(Portugal\)\|\(United Kingdom\)\|\(Belgium\)\|\(Ireland\)'
# COUNTRY_CSPLIT="/France/ /Spain/ /Poland/ /Hungary/ /Yugoslavia/ /Switzerland/ /Italy/ /Germany/ /Portugal/ /United.Kingdom/ /Belgium/ /Ireland/"
COUNTRIES='"Belgium" "France" "Germany" "Hungary" "Ireland" "Italy" "Poland" "Portugal" "Spain" "Switzerland" "United.Kingdom" "Yugoslavia"'
COUNTRYPATT='\(Belgium\)\|\(France\)\|\(Germany\)\|\(Hungary\)\|\(Ireland\)\|\(Italy\)\|\(Poland\)\|\(Portugal\)\|\(Spain\)\|\(Switzerland\)\|\(United Kingdom\)\|\(Yugoslavia\)'
COUNTRY_CSPLIT='/Belgium/ /France/ /Germany/ /Hungary/ /Ireland/ /Italy/ /Poland/ /Portugal/ /Spain/ /Switzerland/ /United.Kingdom/ /Yugoslavia/'
# make files with samples corresponding to countries
cat $BASEDIR/POPRES/european_labels/Euro-samples-info.tsv | cut -f 1,2 | grep "$COUNTRYPATT" | csplit -f country -z - $COUNTRY_CSPLIT
CFILES=$(ls country??)
# get order of columns from input file
CHROM=$1
BFILE=POPRES_chr$CHROM.beagle
BDIR=$BASEDIR/POPRES/beagle-input
MAPFILE=$BASEDIR/genetic_maps/marker.genetic$CHROM.gmap
zcat $BDIR/$BFILE.gz | head -n 1 | tr ' ' '\n' > columns_all
for CFILE in $CFILES
do
COLNUMS=$(cat $CFILE | cut -f 1 | grep -n -x -f - columns_all | tr ":" "\t" | cut -f 1 | tr "\n" "," | sed -e "s/,$//")
echo $COLNUMS > colnums_$CFILE
zcat $BDIR/$BFILE.gz | cut -f 1,2,$COLNUMS -d ' ' | python $SCRIPTDIR/mixup-genomes.py -i - -m $MAPFILE -g reorder.$CFILE.$BFILE.gz -o - | cut -f 3- -d ' ' >mixedup.$CFILE.$BFILE
done
zcat $BDIR/$BFILE.gz | cut -f 1,2 -d ' ' | paste -d ' ' - $(ls mixedup.*.$BFILE) | gzip -c > RANDOM.REGIONAL.$BFILE.gz
# save log files and whatnot.
tar -cvzf reorder.REGIONAL.$BFILE.tar.gz reorder.*.$BFILE.gz colnums_* columns_all $CFILES
# clean up
for CFILE in $CFILES
do
rm colnums_$CFILE
rm $CFILE
rm mixedup.$CFILE.$BFILE
rm reorder.$CFILE.$BFILE.gz
done
rm columns_all