Predicted piRNA cluster no. '."$id".'
Show proTRAC run info
Hide proTRAC run info
'."$proTRAC_runinfo_html".'
Show proTRAC cluster info
Hide proTRAC cluster info
Location |
'."$cl_location_for_image".' |
Coordinates |
'."$start-$end".' |
Size [bp] |
'."$print_clustersize".' |
Sequence hit loci |
'."$hits_abs".' |
Mapped reads (normalized) |
'."$print_hits_normalized".' |
Mapped reads (normalized) per kb |
'."$print_density".' |
Normalized reads with 1T (1U) |
'."$print_1T".'% |
Normalized reads with 10A |
'."$print_10A".'% |
Normalized reads with length'." $min_piRNAsize-$max_piRNAsize".' nt |
'."$print_size".'% |
Normalized reads on the main strand(s) |
'."$print_strandbias".'% |
Predicted directionality |
'."#REPLACEDIRECTIONALITY#".' |
100%
0%
1T (1U)
reads
10A reads
'."$min_piRNAsize-$max_piRNAsize nt
reads".'
reads on mainstrand
Either the amount of reads with 1T (1U) OR 10A has to exceed '."$html_perc_1Uor10A".'% (set with option: -1Tor10A)
Alternatively the amount of reads with 1T (1U) AND 10A has to exceed '."$html_perc_1Uand10A".'% (set with option: -1Tand10A)
Minimum amount of reads with preferred size is '."$html_perc_size".'% (set with option: -pisize)
Minimum amount of reads on the main strand(s) is '."$html_perc_strand".'% (set with option: -clstrand)
Show read coverage
Hide read coverage
#CONTAINER1#
#CONTAINER2#
';
$html_annotation='
Gene Set Annotation
#REPLACEGENESET#
RepeatMasker Annotation
#REPLACEREPEATMASKER#
Transcription Factor Binding Sites
#REPLACETFBS#
';
# draw RepeatMasker annotation
$html_RMline="";
$html_RMannotation_box="";
if($RM_index==1)
{
@html_RM_colors_plus=("#0035C6","#2656D8","#4F77E5","#7394EE","#90ABF6","#ADC2FD","#C0D1FF","#D0DDFF");
@html_RM_colors_minus=("#D40000","#E22B2B","#E64A4A","#EC6A6A","#F58888","#FCA5A5","#FFBDBD","#FFD1D1");
$s=int($start/1000000);
$loc="$cl_location#$s";
$repeat_id=0;
$repeat_in_cl=0;
draw_RM();
sub draw_RM
{
while(1)
{
$repeat_id++;
if($RM{$loc}{$repeat_id})
{
$ref=$RM{$loc}{$repeat_id};
@rm=@$ref;
$cluster_plus_flank=$end-$start+1+($flank_size*2);
# repeat inside piRNA cluster
if($rm[0]>=$start&&$rm[1]<=$end)
{
$rm_start=$rm[0];
$rm_end=$rm[1];
draw_RMelement();
sub draw_RMelement
{
$repeat_in_cl++;
if($rm[4]<2){$rm_color_code=0;}
elsif($rm[4]<5){$rm_color_code=1;}
elsif($rm[4]<10){$rm_color_code=2;}
elsif($rm[4]<15){$rm_color_code=3;}
elsif($rm[4]<20){$rm_color_code=4;}
elsif($rm[4]<25){$rm_color_code=5;}
elsif($rm[4]<30){$rm_color_code=6;}
else{$rm_color_code=7;}
# html
$relative_position=$rm_start-$start+1+$flank_size;
$pixel_pos=($relative_position/$cluster_plus_flank)*500+80;
$pixel_width=((($rm_end-$start+1+$flank_size)/$cluster_plus_flank)*500+80)-$pixel_pos;
if($pixel_width<=0.5)
{
$pixel_width=1;
}
if($pixel_pos+$pixel_width>581)
{
$pixel_width=581-$pixel_pos;
}
if($rm[3]=~/\+/)
{
$html_RMannotation_box.='
'."$repeat_in_cl. $rm[2]".': '."$rm[0]-$rm[1] (+)".', Divergence to consensus: '."$rm[4]".'%'."\n";
$html_RMline.='
'."\n";
$html_RMline.='
'."\n";
}
elsif($rm[3]=~/C/)
{
$html_RMannotation_box.='
'."$repeat_in_cl. $rm[2]".': '."$rm[0]-$rm[1] (-)".', Divergence to consensus: '."$rm[4]".'%'."\n";
$html_RMline.='
'."\n";
$html_RMline.='
'."\n";
}
}
}
# repeat overlaps the 5' end of the piRNA cluster
elsif($rm[0]<=$start&&$rm[1]>=$start)
{
$rm_start=$start;
$rm_end=$rm[1];
draw_RMelement();
}
# repeat overlaps the 3' end of the piRNA cluster
elsif($rm[0]<=$end&&$rm[1]>=$end)
{
$rm_start=$rm[0];
$rm_end=$end;
draw_RMelement();
}
}
else
{
last;
}
}
}
if(int($start/1000000)
=$start&&$gtf[1]<=$end)
{
$gtf_start=$gtf[0];
$gtf_end=$gtf[1];
draw_GTFelement();
sub draw_GTFelement
{
$gene_in_cl++;
# html
$relative_position=$gtf_start-$start+1+$flank_size;
$pixel_pos=($relative_position/$cluster_plus_flank)*500+80;
$pixel_width=((($gtf_end-$start+1+$flank_size)/$cluster_plus_flank)*500+80)-$pixel_pos;
if($pixel_width<=0.5)
{
$pixel_width=1;
}
if($pixel_pos+$pixel_width>581)
{
$pixel_width=581-$pixel_pos;
}
if($gtf[3]=~/\+/)
{
if($gtf[2]=~/pseudogene/||$gtf[2]=~/pseudo_gene/)
{
$html_color="#ADC2FD";
}
elsif($gtf[2]=~/protein_coding/)
{
$html_color="#0035C6";
}
else
{
$html_color="#162757";
}
$html_GTFannotation_box.=''."$gene_in_cl. $gtf[2]".': '."$gtf[0]-$gtf[1] (+)".'
'."\n";
$html_GTFline.=''."\n";
$html_GTFline.=''."\n";
}
elsif($gtf[3]=~/-/)
{
if($gtf[2]=~/pseudogene/||$gtf[2]=~/pseudo_gene/)
{
$html_color="#FCA5A5";
}
elsif($gtf[2]=~/protein_coding/)
{
$html_color="#D40000";
}
else
{
$html_color="#57163D";
}
$html_GTFannotation_box.=''."$gene_in_cl. $gtf[2]".': '."$gtf[0]-$gtf[1] (-)".'
'."\n";
$html_GTFline.=''."\n";
$html_GTFline.=''."\n";
}
}
}
# gene overlaps the 5' end of the piRNA cluster
elsif($gtf[0]<=$start&&$gtf[1]>=$start)
{
$gtf_start=$start;
$gtf_end=$gtf[1];
draw_GTFelement();
}
# gene overlaps the 3' end of the piRNA cluster
elsif($gtf[0]<=$end&&$gtf[1]>=$end)
{
$gtf_start=$gtf[0];
$gtf_end=$end;
draw_GTFelement();
}
}
else
{
last;
}
}
}
if(int($start/1000000)39)
{
$cl_location_for_image=substr($cl_location,0,34);
$cl_location_for_image.='[...]';
}
if($best_split>-1)
{
$split1=$coordinates_for_split[$best_split];
$split2=$coordinates_for_split[$best_split+1];
$directionality.=" (split between $split1 and $split2)";
}
$html=~s/#REPLACEDIRECTIONALITY#/$directionality/;
if($results_table==1)
{
print RESULTS_TEXT"Cluster $id\tLocation: $cl_location\tCoordinates: $start-$end\tSize [bp]: $print_clustersize\tHits (absolute): $hits_abs\tHits (normalized): $hits_norm\tHits (normalized) per kb: $print_density\tNormalized hits with 1T: $print_1T%\tNormalized hits with 10A: $print_10A%\tNormalized hits $min_piRNAsize-$max_piRNAsize nt: $print_size%\tNormalized hits on the main strand(s): $print_strandbias%\tPredicted directionality: $directionality";
}
$start_with_flank=$start-$flank_size;
$end_with_flank=$end+$flank_size;
$html_topo='
WHAT DO I SEE HERE?
This chart shows the location of mapped sequence reads within a predicted piRNA cluster. The color refers to the number of genomic hits produced by the sequence read in question. A dark red bar indicates that this sequence read produces many other hits elsewhere in the genome. Many adjacent red or yellow bars can indicate the presence of a multi-copy element such as transposons or rRNA genes. A dark green bar indicates that this sequence read maps uniquely to this locus.
1 hit
2-5 hits
6-10 hits
11-20 hits
21-50 hits
51-100 hits
> 100 hits
'."$cl_location_for_image".'
'."$start_with_flank".'
'."$end_with_flank".'
#REPLACEFLANK1#
#REPLACEFLANK2#
Gene Set
RepeatMasker
#REPLACESPLITQUBES#
#REPLACESCALE#
#REPLACETOPO#
#REPLACECOVERAGE#
#REPLACERMLINE#
#REPLACEGTFLINE#
#REPLACETFBSPOINTS#
RepeatMasker Color Code
+
100-98% Identity
<98-95% Identity
<95-90% Identity
<90-85% Identity
<85-80% Identity
<80-75% Identity
<75-70% Identity
<70% Identity
-
Gene Set Color Code
+
Gene
Pseudogene
Other
-
Topology/Coverage Color Code
Coverage Plus Strand
Coverage Minus Strand
Mainstrand: Plus
Mainstrand: Minus
Complementary Strand
Flanking Region
(if option -flank >0)
';
$cluster_plus_flank=$end-$start+1+($flank_size*2);
if($best_split>-1)
{
$split_coordinate=(($split1+$split2)/2)-$start+$flank_size;
$html_split_coordinate=($split_coordinate/$cluster_plus_flank)*500+80;
if($directionality=~/plus-minus/)
{
# calculate some html values
$html_start_blue=80+($flank_size/$cluster_plus_flank*500);
$html_width_blue=$html_split_coordinate-80-($flank_size/$cluster_plus_flank*500);
$html_width_red=580-$html_split_coordinate-($flank_size/$cluster_plus_flank*500);
$html_splitqubes='
';
$html_topo=~s/#REPLACESPLITQUBES#/$html_splitqubes/;
}
elsif($directionality=~/minus-plus/)
{
# calculate some html values
$html_start_red=80+($flank_size/$cluster_plus_flank*500);
$html_width_red=$html_split_coordinate-80-($flank_size/$cluster_plus_flank*500);
$html_width_blue=580-$html_split_coordinate-($flank_size/$cluster_plus_flank*500);
$html_splitqubes='
';
$html_topo=~s/#REPLACESPLITQUBES#/$html_splitqubes/;
}
}
elsif($directionality eq"mono:plus")
{
$html_start_blue=80+($flank_size/$cluster_plus_flank*500);
$html_width_blue=500-(2*($flank_size/$cluster_plus_flank*500));
$html_splitqubes='
';
$html_topo=~s/#REPLACESPLITQUBES#/$html_splitqubes/;
}
elsif($directionality eq"mono:minus")
{
$html_start_red=80+($flank_size/$cluster_plus_flank*500);
$html_width_red=500-(2*($flank_size/$cluster_plus_flank*500));
$html_splitqubes='
';
$html_topo=~s/#REPLACESPLITQUBES#/$html_splitqubes/;
}
$html_flank1="";
$html_flank2="";
if($flank_size>0)
{
$flank_block_up_width=($flank_size/$cluster_plus_flank*500);
$flank_block_pos_down=580-($flank_size/$cluster_plus_flank*500);
$html_flank1='
';
$html_flank2='
';
}
$html_topo=~s/#REPLACEFLANK1#/$html_flank1/;
$html_topo=~s/#REPLACEFLANK2#/$html_flank2/;
open(IN,"$out_folder/$id.fasta");
%transcription_plus=();
%transcription_minus=();
$extreme=0;
while()
{
if($_=~/^>/)
{
@d=split("\t",$_);
foreach$element(0..5)
{
$d[$element]=~s/.+://;
}
if($d[5]=~/\+/)
{
if($normalize_by_total_number_of_mapped_reads==0)
{
foreach$pos($d[1]..($d[1]+length$d[1])-1)
{
$transcription_plus{$pos}+=$d[4];
if($transcription_plus{$pos}>$extreme)
{
$extreme=$transcription_plus{$pos};
}
}
}
# calculate values per million mapped reads (rpm)
else
{
foreach$pos($d[1]..($d[1]+length$d[1])-1)
{
$transcription_plus{$pos}+=($d[4]/$total_reads)*1000000;
if($transcription_plus{$pos}>$extreme)
{
$extreme=$transcription_plus{$pos};
}
}
}
}
else
{
if($normalize_by_total_number_of_mapped_reads==0)
{
foreach$pos($d[1]..($d[1]+length$d[1])-1)
{
$transcription_minus{$pos}+=$d[4];
if($transcription_minus{$pos}>$extreme)
{
$extreme=$transcription_minus{$pos};
}
}
}
# calculate values per million mapped reads (rpm)
else
{
foreach$pos($d[1]..($d[1]+length$d[1])-1)
{
$transcription_minus{$pos}+=(($d[4])/$total_reads)*1000000;
if($transcription_minus{$pos}>$extreme)
{
$extreme=$transcription_minus{$pos};
}
}
}
}
}
}
close IN;
$print_extreme=(int(($extreme*100)+0.5))/100;
$length_extremestring=length$print_extreme;
$extreme_poscorr=$length_extremestring*5;
$html_coverage_scale='
Mapped
Reads
'."$print_extreme".'
plus strand
minus strand
'."$print_extreme".'
';
$html_topo=~s/#REPLACESCALE#/$html_coverage_scale/;
$prev_pos=-1;
$html_coverage='';
$html_max_plus=0;
$html_max_minus=0;
foreach$pos($start-$flank_size..$end+$flank_size)
{
$relative_position=$pos-$start+1+$flank_size;
$html_pixel=int((($relative_position/$cluster_plus_flank)*500+80)+0.5);
if($transcription_plus{$pos})
{
# save highest value per pixel for html
if($transcription_plus{$pos}>$html_max_plus)
{
$html_max_plus=$transcription_plus{$pos};
}
}
if($transcription_minus{$pos})
{
# save highest value per pixel for html
if($transcription_minus{$pos}>$html_max_minus)
{
$html_max_minus=$transcription_minus{$pos};
}
}
# check if pixel changes for html output
if($html_pixel>$prev_pos&&$prev_pos!=-1)
{
$last_cluster_pos++;
$html_max_plus_round=(int(($html_max_plus*100)+0.5))/100;
$html_max_minus_round=(int(($html_max_minus*100)+0.5))/100;
$html_coverage.=''." Region: $cl_location_for_image $last_cluster_pos-$pos. Max. coverage (+): $html_max_plus_round. Max coverage (-): $html_max_minus_round".'
';
$html_coverage.='';
if($html_max_plus>0)
{
$html_height=int((($html_max_plus/$extreme)*100)+0.5);
if($html_height>0)
{
$html_y_coord=201-$html_height;
$html_coverage.=''."\n";
}
}
if($html_max_minus>0)
{
$html_height=int((($html_max_minus/$extreme)*100)+0.5);
if($html_height>0)
{
$html_coverage.=''."\n";
}
}
$html_max_plus=0;
$html_max_minus=0;
$last_cluster_pos=$pos;
}
$prev_pos=$html_pixel;
}
# paint last html pixel
$last_cluster_pos++;
$html_max_plus_round=(int(($html_max_plus*100)+0.5))/100;
$html_max_minus_round=(int(($html_max_minus*100)+0.5))/100;
$html_coverage.=''." Region: $cl_location_for_image $last_cluster_pos-$pos. Max. coverage (+): $html_max_plus_round. Max coverage (-): $html_max_minus_round".'
';
$html_coverage.='';
if($html_max_plus>0)
{
$html_height=int((($transcription_plus{$pos}/$extreme)*100)+0.5);
$html_y_coord=201-$html_height;
$html_coverage.=''."\n";
}
if($html_max_minus>0)
{
$html_height=int((($transcription_minus{$pos}/$extreme)*100)+0.5);
$html_coverage.=''."\n";
}
$html_topo=~s/#REPLACECOVERAGE#/$html_coverage/;
undef%transcription_plus;
undef%transcription_minus;
# search binding motifs
$html_tfbs_list="";
$html_tfbs_points="";
if($search_bindingsites==1)
{
%sites_in_cluster=();
$sites_in_cluster="";
$found_motifs=0;
$found_motifs_rc=0;
foreach$motif_name(keys%binding_motifs)
{
$check_sequence=$cluster_sequence;
while(1)
{
if($check_sequence=~/$binding_motifs{$motif_name}/)
{
$hit_motif=$&;
$position=index($check_sequence,$hit_motif);
$position++;
$replace="";
foreach(1..length$hit_motif)
{
$replace.="X";
}
$check_sequence=~s/$binding_motifs{$motif_name}//;
$check_sequence=$replace.$check_sequence;
$print_motiv_name=$motif_name;
$print_motiv_name=~s/ rc$//;
if($motif_name=~/ rc$/)
{
unless($sites_in_cluster{"$hit_motif-$position"})
{
$found_motifs_rc++;
$sites_in_cluster{"$hit_motif-$position"}=1;
$position=$position-$flank_size;
$sites_in_cluster.="$print_motiv_name ($hit_motif: $position) ";
$pixel_pos=int(((($position/$cluster_plus_flank)*500)+76)+0.5);
$position+=$start-1;
$html_tfbs_list.=''."$print_motiv_name (Sequence: $hit_motif (-): $position)".'
'."\n";
$html_tfbs_points.=''."\n";
}
}
else
{
unless($sites_in_cluster{"$hit_motif-$position"})
{
$found_motifs++;
$sites_in_cluster{"$hit_motif-$position"}=1;
$position=$position-$flank_size;
$sites_in_cluster.="$print_motiv_name ($hit_motif: $position) ";
$sites_in_cluster{"$hit_motif-$position"}=1;
$pixel_pos=int(((($position/$cluster_plus_flank)*500)+76)+0.5);
$position+=$start-1;
$html_tfbs_list.=''."$print_motiv_name (Sequence: $hit_motif (+): $position)".'
'."\n";
$html_tfbs_points.=''."\n";
}
}
}
else
{
last;
}
}
}
if($results_table==1&&$found_motifs+$found_motifs_rc>0)
{
$sites_in_cluster=~s/ $//;
print RESULTS_TEXT"\tBinding sites: $sites_in_cluster\n";
}
else
{
print RESULTS_TEXT"\n";
}
undef%sites_in_cluster;
}
elsif($results_table==1)
{
print RESULTS_TEXT"\n";
}
@redundancy_code=("1-1","2-5","6-10","11-20","21-50","51-100","101-999999999");
$html_topo_bars="";
@html_colors=("#0D9011","#05B70B","#6AE102","#CAE102","E1CA02","E17902","E10202");
foreach$redundancy(0..6)
{
open(IN,"$out_folder/$id.fasta")||print"\nUnanble to open $out_folder/$id.fasta to generate html image file.\n$!\n\n";
$color_id=6-$redundancy;
$range=pop@redundancy_code;
@range=split('-',$range);
$html_prev_pixel=-1;
while()
{
if($_=~/^>/)
{
$_=~s/\s*$//;
@d=split("\t",$_);
$d[1]=~s/Coordinate://;
$d[3]=~s/Hits://;
$d[5]=~s/Strand://;
if($d[3]>=$range[0]&&$d[3]<=$range[1])
{
$relative_position=$d[1]-$start+1+$flank_size;
# html
$html_pixel=int((($relative_position/$cluster_plus_flank)*500+80)+0.5);
if($html_pixel>$html_prev_pixel)
{
if($d[5]=~/\+/)
{
$html_topo_bars.=''."\n";
}
else
{
$html_topo_bars.=''."\n";
}
}
$html_prev_pixel=$html_pixel;
}
}
}
close IN;
}
$html_topo=~s/#REPLACETOPO#/$html_topo_bars/;
if($html_files==1)
{
$html=~s/#CONTAINER1#/$html_topo/;
$html=~s/#CONTAINER2#/$html_annotation/;
$html=~s/#REPLACEREPEATMASKER#/$html_RMannotation_box/;
$html=~s/#REPLACERMLINE#/$html_RMline/;
$html=~s/#REPLACEGENESET#/$html_GTFannotation_box/;
$html=~s/#REPLACEGTFLINE#/$html_GTFline/;
$html=~s/#REPLACETFBSPOINTS#/$html_tfbs_points/;
$html=~s/#REPLACETFBS#/$html_tfbs_list/;
$html.=' ';
open(HTML,">$out_folder/$id.html");
print HTML $html;
close HTML;
}
}
if($fasta_piRNA_files==0)
{
unlink"$out_folder/$id.fasta";
}
}
}
undef%seqs_in_candidate;
}
}
$stat=0;
}
}
}
close OUT;
close MAP;
#add buttons to html files
if($html_files==1&&$id>0)
{
print"\nFinalizing html output files...";
$file_id=0;
while(1)
{
$file_id++;
last if(!-e"$out_folder/$file_id.html");
open(HTML,"$out_folder/$file_id.html");
@html=;
close HTML;
$next=$file_id+1;
$previous=$file_id-1;
if($next>$id)
{
$next=1;
}
if($previous<1)
{
$previous=$id;
}
open(FINALHTML,">$out_folder/$file_id.html");
$lines=@html;
foreach(@html)
{
if($_=~/