#!/usr/bin/perl use strict; # This scripts reads the Bowtie .aln output and determines the insert size from mapped mate-pair reads. # # usage: perl get_insert_sizes_from_bowtie_aln.pl ## # @author - Matthew Conte @ARGV == 1 or die "usage: perl get_insert_sizes_from_bowtie_aln.pl \n"; my $bowtie_output = @ARGV[0]; open (BOWTIE_OUTPUT, "< $bowtie_output") or die "Can't open $bowtie_output\n"; my $prev_seq_name; my $prev_pos; my $prev_base_name; my $prev_lane; my $prev_dir; my $first_bool = 0; my $count = 0; my @diffs; while () { next if (/^@/); my @line = split(/\t/, $_); die "not enough fields in line $_\n" if (@line < 11); my $seq_name = @line[0]; my $pos = @line[3]; my @tmp = split(/\//, $seq_name); my $base_name = @tmp[0]; my $dir = @line[1]; if ($dir eq $prev_dir){ die "$_ read pairs were mapped in the same orientation, check on this\n"; } #get the length of the sequence b/c we are going to add it to the difference since #bowtie reports the starts of both pairs in reference space, but velvet wants the #length of the sequenced fragment which includes the reads my $sequence = @line[9]; my $seq_length = length($sequence); if ($base_name eq $prev_base_name) { $count++; my $diff; if ($prev_pos > $pos) { $diff = $prev_pos - $pos + $seq_length; } elsif ($pos > $prev_pos) { $diff = $pos - $prev_pos + $seq_length; } else { die "something looks wrong with the positions of the mapped reads, $seq_name\n"; } push (@diffs, $diff); } $prev_base_name = $base_name; $prev_dir = $dir; #$prev_lane = $lane; $prev_pos = $pos; } for my $i (@diffs) { print "$i\n"; } #print "@diffs\n"; #print "$count\n";