#!/usr/local/bin/perl
use Getopt::Long;
use LWP::Simple;
use LWP::Online 'online';
$|=1;

GetOptions
	(
	"sra_accession|id=s{1,}"=>\@SRA_id,
	"cl_sequence|s=i{1,}"=>\@cluster_seq,
        "cl_reads|r=i{1,}"=>\@cluster_reads,
	"cl_image|i=i{1,}"=>\@cluster_image,
	"sr|rs=i{1,}"=>\@sr,
	"si|is=i{1,}"=>\@si,
	"ri|ir=i{1,}"=>\@ri,
	"sri|sir|isr|irs|rsi|ris=i{1,}"=>\@sri,
	"clipped_filtered"=>\$clipped_filtered,
	"miR"=>\$miR,
	"miRprec"=>\$miRprec,
	"directory|d=s"=>\$target_directory,
	"help|h|?"=>\$show_help
	);

print"

           ------------------------------------------------------
             piFETCH: Fetching data from piRNA cluster database

";

if($show_help==1)
	{
	print"
 ------------------------------------------------------------------------------
  OPTION          VALUE          DESCRIPTION

  -h                             Will print this information
  -?
  -help

  -id             SRA accession  Specifies the desired SRA dataset(s). You can
  -sra_accession  SRA accession  use multiple SRA accessions at once. When
                                 providing more than one SRA accession piFETCH
                                 will download the complete proTRAC results.
                                 The options -s -r and -i will be ignored.
  
  -s              integer        Specifies the desired cluster id (or number)
  -cl_sequence    integer        and will download the genomic cluster sequence
                                 Applies only if exactly one SRA accession was
                                 provided. You can use multiple cluster id's
                                 as desired.

  -r              integer        Specifies the desired cluster id (or number)
  -cl_reads       integer        and will download a FASTA file comprising
                                 the reads that were mapped to the cluster
                                 Applies only if exactly one SRA accession was
                                 provided. You can use multiple cluster id's
                                 as desired.

  -i              integer        Specifies the desired cluster id (or number)
  -cl_image       integer        and will download the proTRAC image file.
                                 Applies only if exactly one SRA accession was
                                 provided. You can use multiple cluster id's
                                 as desired.

  -sr             integer        A combination of the options -s and -r.
  -si             integer        A combination of the options -s and -i.
  -ri             integer        A combination of the options -r and -i.
  -sri            integer        A combination of the options -s, -r and -i.

  -clipped_filtered              Will download the clipped and filtered
                                 sequence data for the specified SRA datasets.

  -miR                           Will download sequence reads that produced
                                 perfect full-length matches to mature miRNA
                                 sequences deposited at miRBase.

  -miRprec                       Will download sequence reads that produced
                                 perfect full-length matches to miRNA hairpin
                                 sequences deposited at miRBase.

  -d                             Specifies the target directory for downloads.
  -directory                     By default piFETCH will save the files in the
                                 current working directory.


  HERE ARE SOME EXAMPLE COMMANDS HOW TO USE piFETCH:
  
  1.
  Download complete proTRAC results:
  perl piFETCH.pl -id SRR1755255
  
  You can use multiple SRA accessions at once like this:
  perl piFETCH.pl -id SRR1755255 SRR1654828 SRR1184429
  
  2.
  Custom download information for specific clusters
  (Only if exactly one SRA accession is privided)
   -s [piRNA cluster #] -> Sequence of specified cluster(s)
   -r [piRNA cluster #] -> Reads mapped to specified cluster(s) 
   -i [piRNA cluster #] -> proTRAC image file for specified cluster(s)
  You can use multiple cluster # at once:

  perl piFETCH.pl -id SRR1755255 -s 1 -r 1 2 3 4 5 -i 20 30
  
  You can combine the options -s -r and -i as follows:
  perl piFETCH.pl -id SRR1755255 -sr 1 2 3 -ri 10 11 12 -sri 20 30 40
  

";
	exit;
	}

print"Check http access: ";
die "No basic http access to the web.\n"unless online();
print"OK.";


# Sort values from sr/si/ri/sri arrays
foreach(@sr)
	{
	push(@cluster_seq,$_);
	push(@cluster_reads,$_);
	}
foreach(@si)
	{
	push(@cluster_seq,$_);
	push(@cluster_image,$_);
	}
foreach(@ri)
	{
	push(@cluster_reads,$_);
	push(@cluster_image,$_);
	}
foreach(@sri)
	{
	push(@cluster_seq,$_);
	push(@cluster_reads,$_);
	push(@cluster_image,$_);
	}


 # Remove redundant values from arrays
remove_redundancies(\@SRA_id);@SRA_id=@nonred;
remove_redundancies(\@cluster_seq);@cluster_seq=@nonred;
remove_redundancies(\@cluster_reads);@cluster_reads=@nonred;
remove_redundancies(\@cluster_image);@cluster_image=@nonred;

sub remove_redundancies
	{
	@red=@{$_[0]};
	%nonred=();
	foreach(@red)
		{
		$nonred{$_}=1;
		}
	@nonred=keys%nonred;
	@nonred=sort{$a<=>$b}@nonred;
	return@nonred;
	}


# Check list of SRA IDs
if(@SRA_id>0)
	{
	# Get species name and number of clusters for each SRA ID
	print"\nCheck available data: ";
	$SRA_table=get("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/SRAsets_table.html")||die print"\nError! Session has timed out. Waiting too long for server answer.\n";
	@SRA_table=split("\n",$SRA_table);
	%SRA2species=();
	%SRA2clnum=();
	foreach(@SRA_table)
		{
		if($_=~s/^\t<tr><td><i>//)
			{
			$_=~s/^[^<]+//;
			$species=$&;
			$species=~s/ /_/;
			$species=lc$species;
			$_=~s/<\/i><\/td><td>//;
			$_=~s/^[^<]+//;
			$SRA_id=$&;
			$SRA2species{$SRA_id}=$species;
			$_=~s/\d+<\/td><\/tr>//;
			$clnum=$&;
			$clnum=~s/[^\d]//g;
			$SRA2clnum{$SRA_id}=$clnum;
			}
		}
	$available_SRA_sets=keys%SRA2species;
	print"Found data for $available_SRA_sets SRA sets.";
	
	# Check if SRA dataset is present in piRNA cluster database
	print"\nCheck whether specified SRA ID(s) are available:";
	@valid_IDs=();
	foreach$SRA_id(@SRA_id)
		{
		$SRA_id=uc$SRA_id;
		print"\n $SRA_id\t-> ";
		if($SRA2species{$SRA_id})
			{
			push(@valid_IDs,$SRA_id);
			print"OK.";
			}
		else
			{
			print"not found (skip).";
			}
		}
	}
else
	{
	# Exit if no SRA ID was specified
	die print"\nError! No SRA IDs specified. Use command -SRA [SRA accession(s)] to specify one or multiple datasets.\n";
	}


# Check/Create targed directory
$dir_info="";
if($target_directory)
	{
	print"\n\nCheck/Create target directory for download files: ";
	if(-d$target_directory)
		{
		print"Exists.";
		}
	else
		{
		mkdir($target_directory)||die print"\nError! Not able to create $target_directory.\n";
		print"Created.";
		}
	$file_prefix=$target_directory.'/';
	$file_prefix=~s/^\/+/\//;
	}
else
	{
	$dir_info="current working directory";
	print"\n\nDestination folder: '$dir_info$file_prefix'";
	}

# Download complete proTRAC results (zip-compressed folders) if EITHER more than one SRA ID was specified OR no clusters were selected
if(@valid_IDs>1)
	{
	SIMPLE_DOWNLOAD();
	}
elsif(@cluster_seq==0&&@cluster_reads==0&&@cluster_image==0)
	{
	SIMPLE_DOWNLOAD();
	}
elsif(@valid_IDs>0)
	{
	CUSTOM_DOWNLOAD();
	}
else
	{
	print"\nNo downloadable data.\n\n";
	exit;
	}

sub SIMPLE_DOWNLOAD
	{
	print"\n\nStart downloading proTRAC results for available SRA IDs.";
	foreach$SRA_id(@valid_IDs)
		{
		print"\n $SRA_id\t-> ";
		$status=getstore("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/$SRA2species{$SRA_id}/proTRAC_$SRA_id/proTRAC_$SRA_id.zip","$file_prefix"."proTRAC_$SRA_id.zip");
		if(is_success($status))
			{
			print"Finished.";
			}
		else
			{
			print"FAILED ($status)!";
			}
		}
	}

# Download information for specified clusters only
sub CUSTOM_DOWNLOAD
	{
	$SRA_id=@valid_IDs[0];
	print"\n\nStart downloading selected files.";
	print"\n\nCluster sequence (FASTA file):";
	if(@cluster_seq>0&&$cluster_seq[0]!=0)
		{
		@cluster_seq=sort{$a<=>$b}@cluster_seq;
		foreach$id(@cluster_seq)
			{
			if($id>$SRA2clnum{$SRA_id})
				{
				print"\nSkip $id: $SRA_id has only $SRA2clnum{$SRA_id} clusters.";
				next;
				}
			$status=getstore("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/$SRA2species{$SRA_id}/proTRAC_$SRA_id/cl$id.fasta","$file_prefix"."cl$id.fasta");
			if(is_success($status))
				{
				print" $id";
				}
			else
				{
				print" ($id FAILED: $status)";
				}
			}
		}
	else
		{
		print" None specified.";
		}
	
	print"\n\nReads mapped to cluster (FASTA file):";
	if(@cluster_reads>0&&$cluster_reads[0]!=0)
		{
		@cluster_reads=sort{$a<=>$b}@cluster_reads;
		foreach$id(@cluster_reads)
			{
			if($id>$SRA2clnum{$SRA_id})
				{
				print"\nSkip $id: $SRA_id has only $SRA2clnum{$SRA_id} clusters.";
				next;
				}
			$status=getstore("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/$SRA2species{$SRA_id}/proTRAC_$SRA_id/$id.fasta","$file_prefix"."$id.fasta");
			if(is_success($status))
				{
				print" $id";
				}
			else
				{
				print" ($id FAILED: $status)";
				}
			}
		}
	else
		{
		print" None specified.";
		}
	
	print"\n\nproTRAC image file:";
	if(@cluster_image>0&&$cluster_image[0]!=0)
		{
		@cluster_image=sort{$a<=>$b}@cluster_image;
		foreach$id(@cluster_image)
			{
			if($id>$SRA2clnum{$SRA_id})
				{
				print"\nSkip $id: $SRA_id has only $SRA2clnum{$SRA_id} clusters.";
				next;
				}
			$status=getstore("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/$SRA2species{$SRA_id}/proTRAC_$SRA_id/$id.png","$file_prefix"."$id.png");
			if(is_success($status))
				{
				print" $id";
				}
			else
				{
				print" ($id FAILED: $status)";
				}
			}
		}
	else
		{
		print" None specified.";
		}
	}

# Download processed sequence data
if($clipped_filtered||$miR||$miRprec)
	{
	print"\n\nStart downloading sequence sets:";
	foreach$SRA_id(@valid_IDs)
		{
		if($clipped_filtered)
			{
			print"\n Filtered sequences for $SRA_id -> ";
			$status=getstore("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/seqsets/$SRA_id.clipped.nr.ns.zip","$file_prefix"."$SRA_id.clipped.nr.ns.zip");
			if(is_success($status))
				{
				print"Finished.";
				}
			else
				{
				print"FAILED ($status)!";
				}
			}
		if($miR)
			{
			print"\n miR sequences for $SRA_id -> ";
			$status=getstore("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/seqsets/$SRA_id.miR-mature.zip","$file_prefix"."$SRA_id.miR-mature.zip");
			if(is_success($status))
				{
				print"Finished.";
				}
			else
				{
				print"FAILED ($status)!";
				}
			}
		if($miRprec)
			{
			print"\n miR precursor sequences for $SRA_id -> ";
			$status=getstore("http://www.smallrnagroup.uni-mainz.de/piRNAclusterDB/seqsets/$SRA_id.miR-hairpin.zip","$file_prefix"."$SRA_id.miR-hairpin.zip");
			if(is_success($status))
				{
				print"Finished.";
				}
			else
				{
				print"FAILED ($status)!";
				}
			}
		}
	}

print"\n\nFINISHED FETCHING\n\n";