#!/usr/bin/perl
#___________________________________________________________________________________
# Title     : Bio::Bioinf (for Bio Perlogical) or bio_lib.pl
# Usage     : require "Bioinf.pl"; ##<-- This is very slow, so you'd better
#             use Bio::Bioinf; <- When you have Bioinf.pm module installed under Bio
#              or
#             use Bioinf;
#
#             or, copy the subroutines in your prog. or make a smaller lib files
#               which are classified according to functions(like, Bio_Seq.pl
#               for sequence handling, Bio_Array.pl for various array
#               subroutines..), or make your own module out of this, do whatever
#               you want....
#
# Function  : 1) This is a comprehensive perl subroutine library developed
#                 under Bioperl project and others.
#                 URL: http://cyrah.med.harvard.edu/Proj/Comp/Perl/Sub_DB/
#
#             2) The design of this module is for simple layer biological
#                 module than multilevel object oriented module.
#
#             3) This also serves as the depository database for various
#                 perl subroutines or algorithms developed in
#                 Bioinformatics and Genome projects.
#
#             4) You can copy any of the sub routines in this file, modify, use
#                 in yours...
#                 PLEASE MODIFY AS FREELY AS YOU WANT !! All has the
#                 same PERL copyright
#
#             5) All the subroutines are tested in small files
#                 If you want to have such single example program
#                 to see how they really work, pls contact me( A Biomatic )
#                 For example, a file called  'handle_arguments.pl' exists to
#                 test the subroutine 'handle_arguments'. Usually you can find them
#                 in  http://cyrah.med.harvard.edu/Proj/Bio/Bioperl/Bioinf.html
#
# Example   : require "Bioinf.pl"; BUT, I recommand you take subroutines out and
#             use it directly or modify in your programs.
#             use Bioinf;
#
# Warning   : For the enhancement of Biology, Biomatics, and Science.
#             This is a development companion.
#             Class is for classification of my subroutines. If it is Bio, it can
#             be useful for biological sequence data handling. If it's Utility,
#             it can also be used for general purpose file handling stuff.
#             File, Array, Hash,... are my classification items.
# Keywords  : Biology, perl library, sequence handling lib
# Options   : nothing (used as subroutine library or as Bioinf.pm module)
#
# Author    : J. Park, Andreas Heger, Jason Johnson, Sarah Teichmann, Alex Bateman,
#               Astrid Reinhardt, and anybody contributed AND YOU!!
#               jong@biosophy.org
# Category  : Bioinf Bioperl
# Version   : 3    (Dec/31/1999)
#------------------------------------------------------------------


print "\n ################################################################\n";
print   " #                                                              #\n";
print   " # Using Bioperl subroutine Module for Bioinformatics & Biology #\n";
print   " #                                                              #\n";
print   " ################################################################\n";


## The following box is used as the header for any subroutines developed to
##  give information on the subroutines. It is used by Jong as a template.


#______________________________________________________________________________
# Title     :
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------


## Following variables in 'my' are very commonly used ones. I have
## put those to be copied into any new subroutines to be developed
## This is because, in Perl, every variable is global unless you mark them
## to be inside the subroutines. Many BUGs are coming from not localizing vars.
## This array variables are used  as a defalt insertion for the subroutine
## 'handle_arguments'. If you add this box in any sub, 'handle_arguments'
## subroutine will be called and any arguments passed to the subroutine will
## be classified to file, dir, string, hash(as reference), array(as reference),
## pure number, or option(with -) prefix. etc. For more detail look at
## handle_argument's header.


sub IGNORE_THIS{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
}


#________________________________________________________________________
# Title     : handle_arguments
# Usage     : Just put the whole box delimited by the two '###..' lines below
#             to inside of your subroutines. It will call 'handle_arguments'
#             subroutine and parse all the given input arguments.
#             To use, claim the arguments, just use the variable in the box.
#             For example, if you had passed 2 file names for files existing
#             in your PWD(or if the string looks like this: xxxx.ext),
#             you can claim them by $file[0], $file[1] in
#             your subroutine.
# Function  : Sorts input arguments going into subroutines and returns default
#             arrays of references for various types (file, dir, hash, array,,,,)
#             If you give (\@out, @file), it will put @out into @array as a ref
#             and also the contents of @out will be dereferenced and put to
#             raw_string regardless what is in it).
#
# Example   : 'handle_arguments(\@array, $string, \%hash, 8, 'any_string')
# Warning   :
# Keywords  : handling arguments, parsing arguments,
# Options   :
# Returns   : Following GLOBAL variables
#
#             $num_opt,    @num_opt     @file          @dir
#             $char_opt,   @char_opt    %vars          @array,
#             @hash        @string,     @raw_string    @range,
#
#             $num_opt has 10,20
#             @num_opt has (10, 20)
#             @file has  xxxx.ext
#             @dir has  dir  or /my/dir
#             $char_opt has 'A,B'
#             @char_opt has (A, B)
#             @array has  (\@ar1, \@ar2)
#             @hash has (\%hash1, \%hash2)
#             @string  ('sdfasf', 'dfsf')
#             @raw_string (file.ext, dir_name, 'strings',,)
#             @range has values like  10-20
#             %vars deals with x=2, y=3 stuff.
#
# Argument  : any type, any amount
# Version   : 5.0
#--------------------------------------------------------------------
sub handle_arguments{
    my($c, $d, $e, $f, $i, $j, $k, $l, $s, $t, $x, $y, $z, $char_opt, $dir, @hash,
        $file, $in_dir, $num_opt, @char_opt, @dir, @file, @string, @file_dir, @k,
        @num_opt, @raw_string, @array, %vars, @range, @temp, $temp,
        @char_options, %duplicate);

    &set_debug_option;
    if(@_<1){ print chr(7),"\n This is handle_arguments. No args Passed, Error?\n"}
    elsif( (@_ ==1)&& (ref($_[0]) eq 'ARRAY') ){ # when there is only 1 argument
      push(@array, $_[0]);
      push(@k, $_[0]);
    }elsif( (@_==1)&&( !ref($_[0]) ) ){
      if(-f $_[0]){ push(@file, $_[0]);   push(@string, $_[0]) }
      elsif(-d $_[0]){ push(@dir, $_[0]); push(@string, $_[0]) }
      elsif($_[0]=~/^\d+$/){ push(@num_opt, $_[0]); $num_opt.=$_[0] }
      elsif($_[0]=~/^\w+$/){ push(@string, $_[0]); }
    }elsif(@_ >=1){ @k = @_ }

    #####______Start of  general argument handling______######
    for($k=0; $k < @k ;$k++){
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # Check if the input was REFERENCE
      #________________________________________________
      if( !ref($k[$k]) ){
          if($k[$k]=~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){  push(@char_opt, $1); $char_opt .= "$1\,";
          }elsif($k[$k]=~ /^\-([a-zA-Z]+)$/){          ## When multiple option is given,
                          @char_options = split(/\,|/, $1);  push(@char_opt, @char_options);
                          $char_opt .= join("\,", @char_options); ## '-' should be used. eg. '-HEGI'
          }elsif($k[$k]=~ /^(\w+)\=(\S*\s*)$/){  $vars{$1}=$2;  $vars .= "$1\,";
          }elsif($k[$k]=~ /^(\-?\d+)$/){ push(@num_opt, $1);  $num_opt .= "$1\,";
          }elsif($k[$k]=~ /^\d+\.?\d*\-\d+\.?\d*$/){  push(@range,  $k[$k] );
          }elsif(-f $k[$k]){                          push(@file,   $k[$k] );
          }elsif(-d $k[$k]){                          push(@dir,    $k[$k] ); }
          if(    $k[$k]=~ /\/[\w\d\.\-]+\/.+?\/$/){   push(@dir,    $k[$k] );
          }elsif($k[$k]=~ /\/.+?\/\w+\.\w+$/){        push(@file,    $k[$k] );
          }elsif($k[$k]=~ /^\/[\w\d\.\-]+[\/]*$/){    push(@dir,    $k[$k] );
          }elsif($k[$k]=~ /^[\/\w\d\-\.]+\.\w+$/){    push(@file,   $k[$k] );
          }elsif($k[$k]=~ /\S\/[\/\w\d\-\.]+\.\w+$/){ push(@file,   $k[$k] );
          }elsif($k[$k]=~/^\w+[\/\\\w\d\.\-]+$/){     push(@string, $k[$k] );
                      # string does not have space, but includes '\', '/', '.'
          }else{                                      push(@raw_string, $k[$k] ); }
      }elsif( ref($k[$k]) ){
          if( ref($k[$k]) eq "SCALAR"){
              if(${$k[$k]} =~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){ push(@char_opt, $1); $char_opt  .= "$1\,";
              }elsif(${$k[$k]}=~ /^\-([a-zA-Z]+)$/){ push(@char_opt, @char_options);
                              $char_opt  .= join("\,", @char_options);  ## as an option string.
              }elsif(${$k[$k]}=~ /^(\w+)\=(\S*\s*)$/){  $vars{$1}=$2;  $vars .= "$1\,";
              }elsif(${$k[$k]}=~ /^(\-?\d+)$/){ $num_opt .= "$1\,";  push(@num_opt, $1);
              }elsif(${$k[$k]}=~ /^\d+\.?\d*\-\d+\.?\d*$/){    push(@range,  $k[$k] );
              }elsif(-f ${$k[$k]}){                            push(@file,   ${$k[$k]} );
              }elsif(-d ${$k[$k]}){                            push(@dir,    ${$k[$k]} );
              }elsif(${$k[$k]}=~ /\/[\/\w\d\.\-]+[\/].+[\/]$/){ push(@dir,    ${$k[$k]} );
              }elsif(${$k[$k]}=~ /\/\S*\.\w+\s*$/){   push(@file,    ${$k[$k]} );
              }elsif(${$k[$k]}=~/^\/[\/\w\d\.\-]+[\/]*$/){     push(@dir,    ${$k[$k]} );
              }elsif($k[$k]=~ /\/[\w\d\.\-]+\/.+?\/\w+\.\w+$/){   push(@file,    $k[$k] );
              }elsif(${$k[$k]}=~ /^[\/\w\d\-\.]+\.\w+$/){      push(@file,   ${$k[$k]} );
              }elsif(${$k[$k]}=~/^\w+[\w\d\.\-]+$/){           push(@string, ${$k[$k]} );
              }else{                                           push(@raw_string, ${$k[$k]}) }
          }elsif(ref($k[$k]) eq "ARRAY"){
              my @temp_arr = @{$k[$k]}; push(@array, $k[$k]);
              for ($i=0; $i<@temp_arr; $i++){
                 if(-f $temp_arr[$i]){                            push(@file, $temp_arr[$i]); next;
                 }elsif($temp_arr[$i]=~/^\d+\.?\d*\-\d+\.?\d*$/){ push(@range,$temp_arr[$i] ); next;
                 }elsif(-d $temp_arr[$i]){                        push(@dir , $temp_arr[$i]);  next;
                 }
                 if($temp_arr[$i]=~ /\/[\/\w\d\.\-]+[\/].+\/$/){ push(@dir,    ${$k[$k]} );
                 }elsif($temp_arr[$i]=~ /\/[\w\d\.\-]+\/.+?\/\w+\.\w+$/){   push(@file,    $k[$k] );
                 }elsif($temp_arr[$i]=~/^\/[\/\w\d\.\-]+[\/]*$/){ push(@dir, $temp_arr[$i] );
                 }elsif($temp_arr[$i]=~/^[\/\w\d\-\.]+\.\w+$/){   push(@file,$temp_arr[$i] );
                                                                  push(@string,$temp_arr[$i] );
                 }elsif($temp_arr[$i]=~/^\w+[\w\d\.\-]+$/){       push(@string,$temp_arr[$i]);
                 }else{                                           push(@raw_string, $temp_arr[$i]);
              }
          }
      }elsif(ref($k[$k]) eq "HASH"){
          push(@hash,   $k[$k] ); }
      }
    }
    @raw_string=(@raw_string, @string);
    @file = grep { ! $duplicate{$_}++ } @file;
    return(\@hash, \@array, \@string, \@dir, \@file, \@num_opt,
            \@char_opt, \$num_opt, \$char_opt, \@raw_string, \%vars, \@range );
}



#________________________________________________________________________
# Title     : set_debug_option
# Usage     : &set_debug_option;
# Function  : If you put '#' or  '##' at the prompt of any program which uses
#             this sub you will get verbose printouts for the program if the program
#             has a lot of comments.
# Example   : set_debug_option #    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of verbose printouts
#             ##  for even more verbose printouts
# $debug  becomes 1 by '#'  or '_'
# $debug2 becomes 1 by '##'  or '__'
#
# Returns   :  $debug
# Argument  :
# Category  :
# Version   : 1.8
#--------------------------------------------------------------------
sub set_debug_option{
    my($j, $i, $level);
    unless( defined($debug) ){
        for($j=0; $j < @ARGV; $j ++){
            if( $ARGV[$j] =~/^(_+)$|^(#+)$/){ # in bash, '#' is a special var, so use '_'
                 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
                 $debug=1;
                                  print chr(7);
                 print __LINE__," \$debug  is set to ", $debug, "\n";
                 splice(@ARGV,$j,1); $j-- ;
                 $level = length($1)+1;
                 for($i=0; $i < $level; $i++){
                      ${"debug$i"}=1;
                      print __LINE__," \$debug${i} is set to ", ${"debug$i"}, "\n";
                 }
            }
        }
    }
}

#______________________________________________________________________________
# Title     : set_options_for_DALI
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : holm@ebi.ac.uk,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub set_options_for_DALI {
        local ($fsspfile)=@_;
print "<FORM METHOD=GET ACTION=$qz3>\n<INPUT TYPE=hidden NAME=filename VALUE=$fsspfile>\n<H2>Family alignments</H2>\n";
print "Maximum pairwise identity:\n",$query->popup_menu(-name=>'idecut',-default=>'80 %',-values=>['100 %','90 %','80 %','70 %','60 %','50
%','40 %','30 %']);
print "<H2>3D
superimpositions</H2>\nStereo:\n",$query->popup_menu(-name=>'stereo',-default=>'mono',-values=>['mono','left-right','right-left']);
print "<HR><INPUT TYPE=submit Value='Use these settings'><INPUT TYPE=Reset Value=Reset><HR></FORM>\n";
print "<H2>Viewers</H2><A HREF=http://www.umass.edu/microbio/rasmol/>RasMol2.6</A><BR>\n";

EOS
}


#______________________________________________________________________________
# Title     : align_intermediate_alignments
# Usage     : @aligned_alignments=@{&align_intermediate_alignments($hash[$i], $hash[$i+1])};
#             %hash1=%{$aligned_alignments[0]};
#             %hash2=%{$aligned_alignments[1]};
#             $common_interm_seq_name=${$aligned_alignments[2]};
#             $non_common_seq_entry1=${$aligned_alignments[3]};
#             $non_common_seq_entry2=${$aligned_alignments[4]};
# Function  :
# Example   :
# Keywords  : align_multiple_alignments
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.3
#------------------------------------------------------------------------------
sub align_intermediate_alignments{
		my ($i, %hash1, %hash2, $pair1_name1_orig, $seq1_name_orig, @seq_names,
				$INT_seq2_name_orig, $seq2_name_orig, @aligned_alignments, %temp2,
				$common_INT_seq_name, $non_common_seq_entry1, $non_common_seq_entry2,
				%lookup_table1, %lookup_table2, $inter_temp_name, @splited1_interm,
				@splited1_seq, @splited2_interm, @splited2_seq,
				$gap_char);
		$gap_char='-';

		%hash1=%{$_[0]};
		%hash2=%{$_[1]};

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# GEtting the Common seq (INTERMs)
		#_____________________________________________________________
		@names1= keys %hash1;
		@names2= keys %hash2;
		%lookup_table1=($names1[0]=>$names1[1], $names1[1]=> $names1[0]);
		%lookup_table2=($names2[0]=>$names2[1], $names2[1]=> $names2[0]);

		@seq_names=(@names1, @names2);
		($INT_seq1_name_orig, $INT_seq2_name_orig, $seq1_name_orig, $seq2_name_orig)=@{&find_common_seq_names(\@seq_names)};


		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Following is necessary to know which keys belong to which hash
		#___________________________________________________________________
		if($lookup_table1{$INT_seq1_name_orig} and $lookup_table1{$seq1_name_orig}){
		    # Default situation. No change in key and value in hash1, hash2
		}elsif($lookup_table2{$INT_seq1_name_orig} and $lookup_table2{$seq1_name_orig}){
				%temp=%hash1; %hash1=%hash2; %hash2=%temp; ## total reverse case
				#$temp=$INT_seq2_name_orig; $INT_seq2_name_orig=$INT_seq1_name_orig; $INT_seq1_name_orig=$temp;
				#$temp=$seq1_name_orig; $seq1_name_orig=$seq2_name_orig; $seq2_name_orig=$temp;
		}elsif($lookup_table1{$INT_seq1_name_orig} and $lookup_table2{$seq1_name_orig}){
				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# When Query seqs are swapped.
				#___________________________________________________________________________
				print "\n      > seq1_name_orig seq2_name_orig  were swapped\n";
				%hash1=($INT_seq1_name_orig, $hash1{$INT_seq1_name_orig},
								$seq2_name_orig, $hash1{$seq2_name_orig});
				%hash2=($INT_seq2_name_orig, $hash2{$INT_seq2_name_orig},
								$seq1_name_orig, $hash2{$seq1_name_orig});
				$temp=$seq1_name_orig; $seq1_name_orig=$seq2_name_orig; $seq2_name_orig=$temp;
		}elsif($lookup_table2{$INT_seq1_name_orig} and $lookup_table2{$seq2_name_orig}){
				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# When Interm seqs are swapped.
				#___________________________________________________________________________
				print "\n      > INT_seq1_name_orig INT_seq2_name_orig  were swapped\n";
				%hash1=($INT_seq1_name_orig, $hash1{$INT_seq2_name_orig},
								$seq1_name_orig,     $hash1{$seq1_name_orig});
				%hash2=($INT_seq2_name_orig, $hash2{$INT_seq1_name_orig},
								$seq2_name_orig,     $hash2{$seq2_name_orig});
		}else{
				print "\n========================================================\n";
				&show_hash(\%hash1);
				&show_hash(\%hash2);
		    print "\nError ??  \n$INT_seq1_name_orig => $seq1_name_orig\n$INT_seq2_name_orig => $seq2_name_orig\n\n\a";
				#sleep(1);
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Check if it is an MSF like alignment (saves time)
		#________________________________________________________________________________
		if( $INT_seq1_name_orig eq $INT_seq2_name_orig
				and ($hash1{$seq1_name_orig}=~/\./ or $hash1{$INT_seq1_name_orig}=~/\./)){
					$MSF_input=1;
					return([\%hash1, \%hash2, \$INT_seq1_name_orig, \$seq1_name_orig, \$seq2_name_orig]);
		}


		#print "\n22222222--------------------------------------\n";
		#&show_hash(\%hash1);
		#&show_hash(\%hash2);
		#print "\n So far I have hash1 and hash2";


		if($INT_seq1_name_orig=~/(\S+)_(\d+)\-(\d+)/){
		    $INT_seq1_name=$1;
				@INT_seq1_range=($2, $3);
				$INT_seq1_leng=$INT_seq1_range[1]-$INT_seq1_range[0];
		}else{ $INT_seq1_leng=length($hash1{$INT_seq1_name_orig}); }
		if($INT_seq2_name_orig=~/(\S+)_(\d+)\-(\d+)$/){
		    $INT_seq2_name=$1;
				@INT_seq2_range=($2, $3);
				$INT_seq2_leng=$INT_seq2_range[1]-$INT_seq2_range[0];
		}else{ $INT_seq2_leng=length($hash2{$INT_seq2_name_orig}); }


		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (1) sequ1  ----------
		#     inter  ---------- . . . . .
		#     inter                      ------------
		#     sequ2                      ------------
		#____________________________________________________________
		if($INT_seq2_range[0] >= $INT_seq1_range[1]){
				if($seq1_name_orig    =~/(\S+)_(\d+)\-(\d+)/){ $seq1_name=$1;     @seq1_range    =($2, $3);
						$seq1_leng    =length($hash1{$seq1_name_orig});}
				if($seq2_name_orig    =~/(\S+)_(\d+)\-(\d+)/){ $seq2_name=$1;     @seq2_range    =($2, $3);
						$seq2_leng    =length($hash2{$seq2_name_orig});}
				$INT_seq1_leng=length($hash1{$INT_seq1_name_orig});
				$INT_seq2_leng=length($hash2{$INT_seq2_name_orig});

				$leng_between_matched_segments=$INT_seq2_range[0]-$INT_seq1_range[1];
				$hash1{$seq1_name_orig}=$hash1{$seq1_name_orig}."$gap_char"x($leng_between_matched_segments+$INT_seq2_leng);
				#print "\n(1) 1. $seq1_name_orig\t$hash1{$seq1_name_orig}\n";
				$hash1{$INT_seq1_name_orig}=$hash1{$INT_seq1_name_orig}."$gap_char"x$leng_between_matched_segments.$hash2{$INT_seq2_name_orig};
				$hash1{$INT_seq1_name}     =$hash1{$INT_seq1_name_orig};
				#print "\n(1) 2. $INT_seq1_name\t$hash1{$INT_seq1_name}\n";
				$hash2{$seq2_name_orig}    ="$gap_char"x($INT_seq1_leng+$leng_between_matched_segments).$hash2{$seq2_name_orig};
				$hash2{$INT_seq2_name_orig}="$gap_char"x($INT_seq1_leng+$leng_between_matched_segments).$hash2{$INT_seq2_name_orig};
				#print "\n(1) 3. $seq2_name_orig\t$hash2{$seq2_name_orig}\n $INT_seq1_leng $leng_between_matched_segments \n";
				$hash2{$INT_seq2_name}=$hash1{$INT_seq1_name};

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (2) sequ1  -------------------
		#     inter  -------------------
		#     inter            ----------------------
		#     sequ2            ----------------------
		#____________________________________________________________
		}else{ # when there is a overlap
				if($seq1_name_orig    =~/(\S+)_(\d+)\-(\d+)/){ $seq1_name=$1;
				    @seq1_range    =($2, $3);
						$seq1_leng    =$3-$2;
				}else{ $seq1_leng=length($hash1{$seq1_name_orig}); }
				if($seq2_name_orig    =~/(\S+)_(\d+)\-(\d+)/){ $seq2_name=$1;
				    @seq2_range    =($2, $3);
						$seq2_leng    =$3-$2;
				}else{ $seq2_leng=length($hash2{$seq2_name_orig}); }

				$overlap_between_matched_segments=$INT_seq1_range[1]-$INT_seq2_range[0];

				#$hash1{$seq1_name_orig}=$hash1{$seq1_name_orig}."$gap_char"x($seq2_leng - $overlap_between_matched_segments);
				#print "\n \$seq2_leng : $seq2_leng $overlap_between_matched_segments\n";

				#$hash1{$INT_seq1_name_orig}=$hash1{$INT_seq1_name_orig}."$gap_char"x($seq2_leng - $overlap_between_matched_segments);
				#print "\n(2) 1. $seq1_name_orig  $hash1{$seq1_name_orig} $seq2_leng $overlap_between_matched_segments $INT_seq1_range[1]-$INT_seq2_range[0]\n";

				#$hash1{$INT_seq1_name}=substr($hash1{$INT_seq1_name_orig}, 0, $INT_seq1_leng-$overlap_between_matched_segments).$hash2{$INT_seq2_name_orig};
				#print "\n(2) 2. $INT_seq1_name       $hash1{$INT_seq1_name}\n";
				#print "(2) 2.$INT_seq1_name_orig  $hash1{$INT_seq1_name_orig}\n";

				$len=$INT_seq2_range[0]-$INT_seq1_range[0];
				$leading_gap_seq="$gap_char"x$len;

				$hash2{$seq2_name_orig}=$leading_gap_seq.$hash2{$seq2_name_orig};
				$hash2{$INT_seq2_name_orig}="$gap_char"x($INT_seq2_range[0]-$INT_seq1_range[0]).$hash2{$INT_seq2_name_orig};
				#print "\n(2) 3. $seq2_name_orig  $hash2{$seq2_name_orig}\n";
				$hash2{$INT_seq2_name}=$hash1{$INT_seq1_name};

				@splited1_interm=split(//, $hash1{$INT_seq1_name_orig} );
				@splited1_seq   =split(//, $hash1{$seq1_name_orig} );
				@splited2_interm=split(//, $hash2{$INT_seq2_name_orig} );
				@splited2_seq   =split(//, $hash2{$seq2_name_orig} );

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				#  Core algorithm. Splits seq. If gap char is found in One interm seq, it puts gaps in the other seq and interm
				#_______________________________________________________________________________________________________________________
				for($i=$len; $i< (@splited1_interm+@splited2_interm); $i++){
						if($splited1_interm[$i] ne $splited2_interm[$i]){
								if($splited1_interm[$i] =~/[\-\.]/ and $splited2_interm[$i]=~/\w/){
										splice(@splited2_interm, $i, 0, $gap_char);
										splice(@splited2_seq,    $i, 0, $gap_char);
								}elsif($splited2_interm[$i] =~/[\-\.]/ and $splited1_interm[$i]=~/\w/){
										splice(@splited1_interm, $i, 0, $gap_char);
										splice(@splited1_seq,    $i, 0, $gap_char);
								}elsif( !$splited1_interm[$i] and $splited2_interm[$i]=~/\S/){
										$splited1_interm[$i]=$splited2_interm[$i];
										$splited1_seq[$i]=$gap_char;
								}elsif( !$splited2_interm[$i] and $splited1_interm[$i]=~/\S/){
										$splited2_interm[$i]=$splited1_interm[$i];
								    $splited2_seq[$i]=$gap_char;
								}
						}
				}

				$hash1{$INT_seq1_name_orig}=join('', @splited1_interm);
				$hash1{$seq1_name_orig}    =join('', @splited1_seq);
				$hash2{$INT_seq2_name_orig}=join('', @splited2_interm);
				$hash2{$seq2_name_orig}    =join('', @splited2_seq);

				$hash1{$INT_seq1_name}=$hash2{$INT_seq2_name}=$hash1{$INT_seq1_name_orig};
				#print "\n\n===========44444444444-------------------------------------\n";
				#&show_hash(\%hash1);
				#&show_hash(\%hash2);
				#print "\n So far I have hash1 and hash2";

		}
		$common_INT_seq_name  =$INT_seq1_name; # <--- This never has ranges in the names
		$non_common_seq_entry1=$seq1_name_orig;
		$non_common_seq_entry2=$seq2_name_orig;

		return([\%hash1, \%hash2, \$common_INT_seq_name, \$non_common_seq_entry1, \$non_common_seq_entry2]);
}





#__________________________________________________________________________
# Title     : sort_by_E_values
# Usage     : @out=@{&sort_by_E_values(\@input_line_array)};
# Function  : it sorts by the 2nd column(E-value, in mspa file), small comes top
# Example   :
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Authors   : jong@ebi.ac.uk
# Version   : 1.0
#----------------------------------------------------------------------------
sub sort_by_E_values{
    my (@in);
    if(ref $_[0] eq 'ARRAY'){
               @in = @{$_[0]};
    }else{
               @in = @_;
    }
    @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map {/^ {0,3}\S+\s+(\S+)/ && [$_, $1] } @in;
    return(\@in);
}

#__________________________________________________________________________
# Title     : sort_sequence_ranges
# Usage     : @out=@{&sort_sequence_ranges(\@input_line_array)};
# Function  :
# Example   : 10-20, 38-40, 1-9, 20-34
#             => 1-9, 10-20, 20-34, 38-40
# Keywords  : sort_ranges sort_seq_ranges sort_by_sequence_ranges
#             sort_ranges_by_start_point
# Options   :
# Returns   :
# Argument  :
# Category  :
# Authors   : jong@ebi.ac.uk
# Version   : 1.1
#----------------------------------------------------------------------------
sub sort_sequence_ranges{
    my (@in);
    if(ref $_[0] eq 'ARRAY'){
        @in = @{$_[0]};
    }else{
        @in = @_;
    }
    @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map {/(\d+)\-\d+/ && [$_, $1] } @in;
    return(\@in);
}



#__________________________________________________________________________
# Title     : sort_hash_value_by_column
# Usage     : @out=@{&sort_by_column(\%input_line_hash, <column num>)};
# Function  : it sorts values of hash by the given column , small comes top. Unless number is
#             is given, it sorts by the first column.
#             It returnns ARRAY of the keys of the input HASH!!!
#
#             It can handle gzipped file. It called gunzip to open and sort.
#
# Example   : Above will sort the file xxxx.mspa by its 3rd column(numerically)
#               small numbers will come to the top.
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues, sort_hash_by_column, sort_value_by_column,
# Options   :
#      s  for sorting stringwise
#      d  for sorting by digit
#      n  for sorting by digit(numerically)
#   $sort_numerically_RV=r by r
#
#   numerically  an alias of n
#
# Category  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub sort_hash_value_by_column{
    my (%in, $i, $col, $sort_numerically, $sort_numerically_RV,
        $sort_non_numerically, @keys);
    $sort_numerically=1;
    if(@_ < 2  ){ print "\n# FATAL: sort_by_column needs 2 arguments\n"; die }
    for (@_){
            if(ref $_ eq 'HASH'){ %in =%{$_}; }
            elsif( ref $_ eq 'SCALAR'){ $col=${$_}; }
            elsif(/^\d+$/){ $col=$_ }
            elsif(/^\s*r\s*$/i){ $sort_numerically=1; $sort_numerically_RV='r'; $sort_non_numerically=0; }
            elsif(/^\s*[nd]\s*$/i){ $sort_numerically=1; $sort_non_numerically=0; }
            elsif(/^\s*n[umerically]*\s*$/i){ $sort_numerically=1; $sort_non_numerically=0; }
            elsif(/^\s*s\s*$/i){ $sort_non_numerically=1; $sort_numerically=0; }
    }
    $col--;

    @keys= keys %in;
    if($sort_numerically ){
         if($sort_numerically_RV){
             @keys= map {$_->[0]} sort { $b->[1] <=> $a->[1] } map { [$_, ($in{$_}=~/(\S+)/g)[$col] ] } @keys;
         }else{
             @keys= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($in{$_}=~/(\S+)/g)[$col] ] } @keys;
         }
    }else{ # here let's do the sring sort
             @keys= map {$_->[0]} sort { $a->[1] cmp $b->[1] } map { [$_, ($in{$_}=~/(\S+)/g)[$col] ] } @keys;
    }
    return(\@keys);
}


#__________________________________________________________________________
# Title     : sort_by_column
# Usage     : @out=@{&sort_by_column(\@input_line_array, <column num>)};
# Function  : it sorts by the given column , small comes top. Unless number is
#             is given, it sorts by the first column.
#
#             It can handle gzipped file. It called gunzip to open and sort.
#
# Example   : sort_by_column.pl 3 xxxx.mspa
#               Above will sort the file xxxx.mspa by its 3rd column(numerically)
#               small numbers will come to the top.
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues, sort_columns
# Options   :
#   $sort_numerically=n by n -n ## s is NOT numerically, n is numerically
#   $sort_by_string=s by s -s
#   $column_to_use= by c=
#   $Columns_to_use= by C=
#   $write_output_file=w by w
#   $NO_write_on_SCREEN=q by q
#   $bigger_num_first=b by b
#   $reverse_order=r by r
#
# Version   : 2.2
#----------------------------------------------------------------------------
sub sort_by_column{
	 my (@in, @M, $col, $sort_numerically, $sort_non_numerically,$temp,
         $COL, $sort_by_string, @COL, $NO_write_on_SCREEN, $reverse_order);
	 unless(@_ > 1  ){ print "\n# FATAL: sort_by_column needs 2 arguments\n"; exit }
	 $sort_numerically='n';
	 for (@_){
         if(ref $_ eq 'ARRAY'){ @in =@{$_}; }
         elsif( ref $_ eq 'SCALAR'){ $col=${$_}; }
         elsif(/c=(\d+)/){ $COL[0]=$1; $col=$1; }
         if(/^\d+$/){    $COL[0]=$_; $col=1; }
         elsif(/C=(\d+.*)/){
              $temp=$1;
              if($temp=~/\w\W\w/){
                  @COL= map { $_ - 1;  } split(/\W/, $temp);
              }elsif($temp=~/^\d+$/){
                  push(@COL, $temp); $col=$temp;
              }
              for $col(@COL){ if($col<0){ $col=0 } };
         }
         elsif(/^\s*b[igger_num_first]*\s*$/i){ $bigger_num_first=1; $sort_non_numerically=0; }
         elsif(/^\s*[nd]\s*$/i){ $sort_numerically=1; $sort_non_numerically=0; }
         elsif(/^\s*r\s*$/i){ $reverse_order='r'; $bigger_num_first='b' }
         elsif(/^\s*s\s*$/i){ $sort_non_numerically=$sort_by_string='s'; $sort_numerically=0; }
         elsif(/^\s*Q\s*$/i){ $NO_write_on_SCREEN='Q'; }
	 }
     $col--;
	 if($sort_numerically ){ ## if the first and last elements are digits?
         if($bigger_num_first){
              if($in[0]=~/\d+\.\d+\./ and $in[$#in]=~/\d+\.\d+\./){ ## when the column number contains things like:  10.10.2.1
                  print "\n# (i) sort_by_column: The column $col contains $in[0], sorting reasonably\n" unless $NO_write_on_SCREEN;
                  @in= map {$_->[0]} sort { $b->[1] <=> $a->[1]   ## this is good for SCOP classification
                                 or $b->[2] <=> $a->[2]    or $b->[3] <=> $a->[3]
                                 or $b->[4] <=> $a->[4]    or $b->[5] <=> $a->[5]
                                 or $b->[6] <=> $a->[6]    or $b->[7] <=> $a->[7]
                                 or $b->[8] <=> $a->[8]    or $b->[9] <=> $a->[9]
                                 or $b->[10] <=> $a->[10]  or $b->[11] <=> $a->[11]
                                  }
                                  map { [$_->[0], ($_->[1])=~/(\d+)/g] }
                                  map { [$_,  ($_=~/(\S+)/g)[$col] ] } @in;
              }else{
                  @in= map {$_->[0]} sort { $b->[1] <=> $a->[1]   or $b->[2] <=> $a->[2]
                                  or $b->[3] <=> $a->[3]   or $b->[4] <=> $a->[4]
                                  or $b->[5] <=> $a->[5]   or $b->[6] <=> $a->[6]
                                  or $b->[7] <=> $a->[7]   or $b->[8] <=> $a->[8]
                                  or $b->[9] <=> $a->[9]   or $b->[10] <=> $a->[10]
                                  or $b->[11] <=> $a->[11] or $b->[12] <=> $a->[12]
                                  or $b->[13] <=> $a->[13] or $b->[14] <=> $a->[14]
                                  }
                                  map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
              }
		 }else{
					 if($in[0]=~/\d+\.\d+\./ and $in[$#in]=~/\d+\.\d+\./){ ## when the column number contains things like:  10.10.2.1
							 print "\n# (i) sort_by_column: The column $col contains $in[0], sorting reasonably\n" unless $NO_write_on_SCREEN;
							 @in= map {$_->[0]} sort { $a->[1] <=> $b->[1]   ## this is good for SCOP classification
                                             or $a->[2] <=> $b->[2]    or $a->[3] <=> $b->[3]
                                             or $a->[4] <=> $b->[4]    or $a->[5] <=> $b->[5]
                                             or $a->[6] <=> $b->[6]    or $a->[7] <=> $b->[7]
                                             or $a->[8] <=> $b->[8]    or $a->[9] <=> $b->[9]
                                             or $a->[10] <=> $b->[10]  or $a->[11] <=> $b->[11]
                                              }
                                              map { [$_->[0], ($_->[1])=~/(\d+)/g] }
                                              map { [$_,  ($_=~/(\S+)/g)[$col] ] } @in;
					 }else{
							 @in= map {$_->[0]} sort { $a->[1] <=> $b->[1]   or $a->[2] <=> $b->[2]
                                             or $a->[3] <=> $b->[3]   or $a->[4] <=> $b->[4]
                                             or $a->[5] <=> $b->[5]   or $a->[6] <=> $b->[6]
                                             or $a->[7] <=> $b->[7]   or $a->[8] <=> $b->[8]
                                             or $a->[9] <=> $b->[9]   or $a->[10] <=> $b->[10]
                                             or $a->[11] <=> $b->[11] or $a->[12] <=> $b->[12]
                                             or $a->[11] <=> $b->[13] or $a->[12] <=> $b->[14]
                                             }
                                             map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
					 }
			 }
	 }elsif($sort_by_string){ # here let's do the sring sort
			 @in= map {$_->[0]} sort {     $a->[1] cmp $b->[1]   or $a->[2] cmp $b->[2]
                                        or $a->[3] cmp $b->[3]   or $a->[4] cmp $b->[4]
                                        or $a->[5] cmp $b->[5]   or $a->[6] cmp $b->[6]
                                        or $a->[7] cmp $b->[7]   or $a->[8] cmp $b->[8]
                                        or $a->[9] cmp $b->[9]   or $a->[10] cmp $b->[10]
                                        or $a->[11] cmp $b->[11] or $a->[12] cmp $b->[12]
                                        or $a->[11] <=> $b->[13] or $a->[12] <=> $b->[14]
                                        }
                                        map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
	 }
	 return(\@in);
}



#__________________________________________________________________________
# Title     : sort_by_scop_classification
# Usage     : @out=@{&sort_by_scop_classification(\@input_line_array, <classification column no>)};
# Function  : it sorts by the given column , small comes top. Unless number is
#             is given, it sorts by the first column.
#
#             It can handle gzipped file. It called gunzip to open and sort.
#
# Example   : sort_by_scop_classification.pl 3 xxxx.mpfa
#               Above will sort the file xxxx.mpfa by its 3rd column(numerically)
#               small numbers will come to the top.
#  sorts things like:
#     >dsfsf 1.2.3.1.4
#     >dsfsa 1.2.10.1.5
#
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues,
# Options   :
#      s  for sorting stringwise
#      d  for sorting by digit
#      n  for sorting by digit(numerically)
# Version   : 1.1
#----------------------------------------------------------------------------
sub sort_by_scop_classification{
	 my (@in, @M, $col, $sort_numerically, $sort_non_numerically, $sort_by_string);
	 unless(@_ > 1  ){ print "\n# FATAL: sort_by_scop_classification needs 2 arguments\n"; die }
	 $sort_numerically='n'; # for SCOP it is n
	 for (@_){
			if(ref $_ eq 'ARRAY'){ @in =@{$_}; }
			elsif( ref $_ eq 'SCALAR'){ $col=${$_}; }
			elsif(/^\d+$/){ $col=$_ }
			elsif(/^\s*[nd]\s*$/i){ $sort_numerically=1; $sort_non_numerically=0; }
			elsif(/^\s*s\s*$/i){ $sort_non_numerically=$sort_by_string='s'; $sort_numerically=0; }
	 }
	 $col--;
	 if($sort_numerically ){ ## if the first and last elements are digits?
			@in= map {$_->[0]} sort{ $a->[1] <=> $b->[1]   ## this is good for SCOP classification
														 or $a->[2] <=> $b->[2]
														 or $a->[3] <=> $b->[3]
														 or $a->[4] <=> $b->[4]
														 or $a->[5] <=> $b->[5]
														 or $a->[6] <=> $b->[6]
														 or $a->[7] <=> $b->[7]
														 or $a->[8] <=> $b->[8]
														 or $a->[9] <=> $b->[9]
														 or $a->[10] <=> $b->[10]
														 or $a->[11] <=> $b->[11]
														 }
														 map { [$_->[0], ($_->[1])=~/(\d+)/g] }
														 map { [$_,  ($_=~/(\S+)/g)[$col] ] } @in;
	 }elsif($sort_by_string){ # here let's do the sring sort
			 @in= map {$_->[0]} sort { $a->[1] cmp $b->[1] } map { [$_, ($_=~/(\d+)/g)[$col] ] } @in;
	 }
	 return(\@in);
}



#__________________________________________________________________________
# Title     : sort_by_cluster_size
# Usage     : @out=@{&sort_by_cluster_size(\@input_line_array)};
# Function  : it sorts by the 1st digit before '-'  as in 2-183_cluster, 2-140_cluster,
#               etc.
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#             sort_by_pattern
# Options   :
# Category  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub sort_by_cluster_size{
		my (@in, @M, $col);
		if(@_ < 1  ){ print "\n# FATAL: sort_by_cluster_size needs 1 argument\n"; die }
		if(ref $_[0] eq 'ARRAY'){        @in = @{$_[0]};      }else{     @in = @_;    }
		$col=0;
		@in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($_=~/^(\S+)\-/)[$col] ] } @in;
		return(\@in);
}


#__________________________________________________________________________
# Title     : sort_by_column_bigger_first
# Usage     : @out=@{&sort_by_column_bigger_first(\@input_line_array, 1)};
# Function  : it sorts by the 2nd column(E-value, in mspa file), small comes top
#             by the help of  ts <decoux@moulon.inra.fr>
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#
# Options   :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub sort_by_column_bigger_first{
		my (@in, @M);
		unless(@_ ==2  ){ print "\n# FATAL: sort_by_column_bigger_first needs 2 arguments\n"; die }
		if(ref $_[0] eq 'ARRAY'){        @in = @{$_[0]};      }else{     @in = @_;    }
		if(ref $_[1] eq 'SCALAR'){       $col=${$_[1]};       }else{     $col=$_[1];  }
		$col--;
		@in= map {$_->[0]} sort { $b->[1] <=> $a->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
		return(\@in);
}

#______________________________________________________________________________
# Title     : make_2_seq_comparison_matrix
# Usage     :
# Function  :
# Example   :
# Keywords  : make_protein_seq_comparison_matrix, make_matrix,
#             make_sequence_comparison_matrix
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_2_seq_comparison_matrix{
		my(%exchange_matrix, %seq, %comparison_matrix);
		%exchange_matrix=%{$_[0]};
		%seq=%{$_[1]};

		#&show_hash(\%exchange_matrix);
		#&show_hash(\%seq);
		@seq_names=keys %seq;
		@seq1=split(//, $seq{$seq_names[0]});
		@seq2=split(//, $seq{$seq_names[1]});
		for($i=0; $i< @seq1; $i++){
			 for($j=0; $j<@seq2; $j++){
					$comparison_matrix{$seq1[$i]}{$seq2[$j]}=$exchange_matrix{$seq1[$i]}{$seq2[$j]};
					print "\n$comparison_matrix{$seq1[$i]}{$seq2[$j]}";
			 }
		}
		return(\%comparison_matrix);
}

#______________________________________________________________________________
# Title     : make_Fold_domain_interact_network_PML_file
# Usage     : &make_interaction_network_PML_file($PSDIP_file, $PDBG_file);
# Function  : generates various analysis files
# Example   :
#   PSDIP file is like:
#   PROTEIN_NONINTERACT     d168lb_ d168lc_ 168l  4.0 0 b: c:
#   PROTEIN_NONINTERACT     d168lc_ d168ld_ 168l  4.7 0 c: d:
#   PROTEIN_INTERACT        d168ld_ d168le_ 168l  4.2 5 d: e:
#   PROTEIN_INTERACT        d175la_ d175lb_ 175l a: b:
#   PROTEIN_INTERACT        d176la_ d176lb_ 176l a: b:
#  PML file looks like:
#   node pwgel_106 650 675 H2O                             compound        comp_C00001     8
#   node pwgel_108 650 735 Orthophosphate  compound        comp_C00009     8
#   node pwgel_110 650 755 Pyrophosphate   compound        comp_C00013     8
#   node pwgel_112 650 775 S-Adenosyl-L-methionine compound        comp_C00019     8
#
#   arc  pwgel_13 pwgel_12 pwgel_10 expression   expression      expr_AK3_ECOLI  1
#   arc  pwgel_11 pwgel_10 pwgel_1  catalysis    catalysis       cat_AK3_ECOLI_2_7_2_4   1
#   arc  pwgel_17 pwgel_16 pwgel_14 expression   expression      expr_AK1H_ECOLI 1
#   arc  pwgel_15 pwgel_14 pwgel_1  catalysis    catalysis       cat_AK1H_ECOLI_2_7_2_4  1
#
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub make_Fold_domain_interact_network_PML_file{
    my($PSDIP_file,  $PDBG_file, %pdbg, @sorted_pair, %fold, %folds,
       $i, %psdip, $fold1, $fold2, %fold_pair_interact, $file1, $file2,
       $total_self_self_inter_pair, %self_nelf_inter, %self_self_inter,
       %fold_count, @folds, %fold_versatility, $num_self_self_inter,
       $ratio_self_inter, $total_r_nelf_inter, $total_r_self_inter,
       @FOLD_mem_no_vs_Interact_PAIR_no1, @FOLD_mem_no_vs_Interact_PAIR_no2,
       @FOLD_mem_no_vs_Interact_PAIR_no3, $member_no, $match_FOLD_num,$PDBG_type,
       %non_redun_fold_match, @fold_matched, %fold_pair_interact,
       %pdbg_desc, %total_fold_number, $total_non_interaction_pair, $total_pairing,
       $total_interact_fold_num, %pdbd_count);
    $file1=${$_[0]} || $_[0];   $file2 =${$_[1]} || $_[1];
    if($file1=~/\.[pdbg|mpfa]/ and $file2=~/\.psdip/){ $PDBG_file=$file1; $PSDIP_file=$file2
    }elsif($file1=~/\.psdip/ and $file2=~/\.[pdbg|mpfa]/){ $PDBG_file=$file2; $PSDIP_file=$file1 }
    unless(-s $PSDIP_file and -s $PDBG_file){ die "\n I need PSDIP and PDBG file <- make_interaction_network_PML_file\n";}

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Check what PDBG file(%) was used to add in output file names
    #_________________________________________________________________
    if($PDBG_file=~/pd\w+?(\d+)\w*_/){    $PDBG_type=$1;    }
    open(PDBG, "<$PDBG_file") || die;
    while(<PDBG>){
        if(/\>(\S+)\s(\d+\.\d+)\.\d+\S+\s+(.+)/){
           $pdbg{$1}=$2; $pdbg_desc{$2}.="\t$3\n";
           push(@{$fold{$2}}, $1);
           $total_fold_number{$2}++;
        }
    }
    close(PDBG);
    $total_fold_number=keys %total_fold_number;

    open(PSDIP, "<$PSDIP_file") || die;
    while(<PSDIP>){
       if(/PROTEIN_INTERACT\s+(\S+)\s+(\S+)/){
           $pdbd1=$1; $pdbd2=$2;
           @sorted_pair=sort($pdbd1, $pdbd2);
           $fold1=$pdbg{$pdbd1};
           $fold2=$pdbg{$pdbd2};
           $fold1=~s/\./_/g;
           $fold2=~s/\./_/g;
           if($fold1 and $fold2){
               @sorted_sf_pair=sort($fold1, $fold2);
               $node_id{$fold1}=$fold1;
               $node_id{$fold2}=$fold2;
               $fold_pair{"@sorted_sf_pair"}++;
           }else{ next }
       }elsif(/PROTEIN_NONINTERACT\s+(\S+)\s+(\S+)/){ $total_non_interaction_pair++ }
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Creating various files
    #__________________________________________________________________________
    $date=${&get_date_text()};
    $pml_file="PSIFA_$PDBG_type\_$date\_Fold\.pml";
    open(PML_FILE, ">$pml_file") || die ; ## protein interaction versitility
    print PML_FILE "# made by: make_interaction_network_PML_file to make piver file\n";
    print PML_FILE "# Total fold number: $total_fold_number in $PDBG_file\n";

    @node_id=sort keys %node_id;

    for($i=0; $i< @node_id; $i++){
       unless($node_id[$i]=~/\S+/){ next; }
       $node_id[$i]=~s/[\. ]/_/g;
       print PML_FILE "\nnode $node_id[$i]\t0 0 DESC";
    }


    @folds=sort keys %fold_pair;

    print PML_FILE "\n";
    #print "\n@folds\n";
    for($i=0; $i< @folds; $i++){
       $edge_strength= $fold_pair{$folds[$i]};
       unless($folds[$i]=~/\S+/){ next; }
       $folds[$i]=~/(\S+)\s+(\S+)/;
       $edge_orig=$1;
       $edge_dest=$2;
       $edge_orig=~s/\./_/g;
       $edge_dest=~s/\./_/g;
       $edge_id="$edge_orig\_$edge_dest";
       $arc_line=sprintf("arc %-17s %-9s %-9s %-9s\n", $edge_id, $edge_orig, $edge_dest, $edge_strength);
       print PML_FILE $arc_line;
       print $arc_line;

    }
    print "\n";
    print PML_FILE "\n";
    close(PML_FILE);
    print "\n# \$pml_file $pml_file is produced \n";
}


#______________________________________________________________________________
# Title     : make_superfam_superfam_interaction_pair_from_Y2H
# Usage     : $0 pdb100d_1_48.mpfa nonred_scid_interacts_JONG.y2hin psi_pdbisl_merge_assign.strass
# Function  :
# Example   : &make_superfam_superfam_interaction_pair_from_Y2H(@ARGV );
#   y2hin file: nonred_scid_interacts_JONG.y2hin
#
#     YMR229C YGL171W
#     YBR0832 YDR227W
#     YAL001C YDR362C
#
#   strass file: psi_pdbisl_merge_assign.strass
#     YAL003W-206 121-180 d1b64__-91 6-65 2e-24
#     YAL005C-642 388-600 d1dkza_-219 1-209 5e-95
#     YAL005C-642 1-364 d1dkgd_-383 1-369 1e-167
#     YAL011W-638 181-273 d1br0__-120 13-120 3e-07
# Keywords  : BIT
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub make_superfam_superfam_interaction_pair_from_Y2H{
    my($PDBG_file, $pml_file, $base, $file, $y2hin_file, $strass_file,
       $sfinp_file, @ORF1, @ORF2, %non_dup, %superfam_next, $orf,$pdbd,
       @superfam_matched1, @superfam_matched2, $arc_line, @superfams,
       %PDBD_lookup, %ORF_inter, $superfam, $class_type, $i, $j, $k);
    $file1=${$_[0]} || $_[0];   $file2 =${$_[1]} || $_[1];
    $file3=${$_[2]} || $_[2];   $file4 =${$_[3]} || $_[3];

    @files=($file1, $file2, $file3, $file4);
    for($i=0; $i< @files; $i++){
       $file=$files[$i];
       if($file=~/\.pdbg|mpfa/){  $PDBG_file=$file;
       }elsif($file=~/\.y2hin/){  $y2hin_file=$file;
       }elsif($file=~/\.strass/){ $strass_file=$file; }
    }
    $base=${&get_base_names(\$y2hin_file)};
    $sfinp_file="$base\.sfinp";
    open(SFINP, ">$sfinp_file") || die;
    $pml_file="$base\.pml";
    open(PDBG_FILE, "<$PDBG_file") || die "\n Can not open $PDBG_file \n\n";
    while(<PDBG_FILE>){
       if(/^\>([de](\d\w\w\w)\S+)\s+(((\d+)\.\d+)\.\d+)/){
           $PDBD_name=$1;           $PDB_ID=$2;
           $superfam=$3;           $fold_type=$4;
           $class_type=$5;
           $PDBD_lookup{$PDBD_name}=$superfam;
       }
    }
    close(PDBG_FILE);
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Reading in Physical domain interaction pairs to make the table
    #__________________________________________________________________
    open(STRASS_FILE, "<$strass_file") || die;
    while(<STRASS_FILE>){
       if(/(\S+)\-\d+\s+\S+\s+(\S+)\-\d+\s+\S+\s+(\S+)/){
           $orf=$1;
           $pdbd=$2; $eval=$3;
           if($eval < 0.001){
              if($PDBD_lookup{$pdbd}){
                 $superfam=$PDBD_lookup{$pdbd};
                 push(@{$ORF_inter{$1}}, $superfam);
                 print "\n $1 @{$ORF_inter{$1}} ";
              }
           }else{ next }
       }
    }
    close(STRASS_FILE);

    open(Y2H_FILE, "<$y2hin_file") || die ;
    while(<Y2H_FILE>){
        if(/^(\S+)\s+(\S+)/){
            $orf1=$1;
            $orf2=$2;
            @ORF1=sort @{$ORF_inter{$orf1}};
            @ORF2=sort @{$ORF_inter{$orf2}}; ## has 2.2.1 , 2.1.1, etc
            $orf_num1=@ORF1=@{&remove_dup_in_array(\@ORF1)};
            $orf_num2=@ORF2=@{&remove_dup_in_array(\@ORF2)};
            #print "\n$orf_num1 $orf_num2";
            if(@ORF1 and @ORF2){
                for($i=0; $i< @ORF1; $i++){
                    $superfam1=$ORF1[$i];
                    $superfam1=~s/\./_/g;
                    for($j=0; $j< @ORF2; $j++){
                       $superfam2=$ORF2[$j];
                       $superfam2=~s/\./_/g;
                       if($superfam1 and $superfam2){
                           $SFINP{"$superfam1\t$superfam2"}="$superfam1\t$superfam2";
                           push(@{$superfam_next{$superfam1}}, $superfam2);
                           push(@{$superfam_next{$superfam2}}, $superfam1);
                       }
                    }
                }
            }
        }
    }
    close(Y2H_FILE);

    $num_superfam_pair_inter=@superfam_pair=sort keys %SFINP;
    for($i=0; $i<@superfam_pair; $i++){
        print SFINP "\n$superfam_pair[$i]";
    }
    print "\n# \$num_superfam_pair_inter is $num_superfam_pair_inter\n";
    close(SFINP);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Creating various files
    #__________________________________________________________________________
    $PML_file="$base\_Y2H\.pml";
    open(PML_FILE, ">$PML_file") || die ; ## protein interaction versitility
    print PML_FILE "# made by: make_interaction_network_PML_file to make piver file\n";
    print PML_FILE "# Total superfam number: $total_superfam_number in $PDBG_file\n";

    @superfams=sort keys %superfam_next;
    for($i=0; $i< @superfams; $i++){
       unless($superfams[$i]=~/\S+/){ next; }
       $superfams[$i]=~s/\./_/g;
       print PML_FILE "\nnode $superfams[$i]\t0 0 DESC";
       @superfam_matched=@{$superfam_next{$superfams[$i]}};
       @superfam_matched=sort @{&remove_dup_in_array(\@superfam_matched)};
       push(@{$non_dup{$superfams[$i]}}, @superfam_matched);
    }


    @superfams=sort keys %non_dup;
    print PML_FILE "\n";

    for($i=0; $i< @superfams; $i++){
       unless($superfams[$i]=~/\S+/){ next; }
       $superfams[$i]=~s/\./_/g;
       $superfam_before=$superfams[$i];
       @superfam_matched1 = sort @{$non_dup{$superfams[$i]}};
       #print "\n @superfam_matched1 \n";
       for($j=0; $j< @superfam_matched1; $j++){
          $superfam_middle=$superfam_matched1[$j];
          @superfam_matched2 = sort @{$non_dup{$superfam_middle}};
          for($k=0; $k< @superfam_matched2; $k++){
              $superfam_next=$superfam_matched2[$k];
              #if($superfam_before eq $superfam_middle and $superfam_middle eq $superfam_next){
              if($superfam_before eq $superfam_middle or $superfam_middle eq $superfam_next){
                 next;
              }
              $arc_line=sprintf("arc %-9s %-9s %-9s SF:%-9s\n", $superfam_middle, $superfam_before, $superfam_next, $superfam_middle);
              print PML_FILE $arc_line;
              #print $arc_line;
          }

       }
    }
    print "\n";
    print PML_FILE "\n";
    close(PML_FILE);

    print "\n# $sfinp_file, $PML_file : created \n";
}




#______________________________________________________________________________
# Title     : make_scop_domain_interaction_pair_table
# Usage     : ($contact_list_ref, $out_file_name_ref)=
#                       &make_scop_domain_interaction_pair_table($SCOP_domain_def_file,
#                                                $PROT_STR_DOM_interact_pair,
#                                                $PDB_database_dir_path);
# Function  :
# Example   :
# Keywords  : make_protein_domain_interaction_pair_table,
#             make_SCOP_domain_interaction_pair_table
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.4
#------------------------------------------------------------------------------
sub make_scop_domain_interaction_pair_table{
    my($SCOP_domain_def_file, $PROT_STR_DOM_interact_pair, $PDB_database_dir_path,
       $base, $i, $j, $known_interact, $unknown_interact, $out_file_name, @pairs,
       $previous_interact_file, %interacting_PDB_domains, %PROT_STR_DOM_lookup_hash,
       %PROT_STR_DOM_class_hash, @PDB_IDs, $ftp_server_name, $server_directory, $username,
       $client_directory, $domain1, $domain2, $pdbd_name1, $pdbd_name2, @xyz_coord1,
       @xyz_coord2, $distance, @Interact_FLAG, %PDB_entry_checked, $single_domain_num,
       $final_output, $contact_num_thresh, $distance_thresh, $atom_sample_interva);
    $|=1;
    $distance_thresh=5;      # Angstrom
    $contact_num_thresh=10 ; # minimum number of atomic contacts to be considered to be interacting
    $atom_sample_interval=12; # this must be the same or larger than 3
    $SCOP_domain_def_file=$_[0];
    $PROT_STR_DOM_interact_pair=$previous_interact_file=$_[1];
    $PDB_database_dir_path=$_[2];
    $base=${&get_base_names($SCOP_domain_def_file)};
    $out_file_name="$base\_PROT_STR_DOM_interact.psdip"; ## protein structural domain interaction pair file
    print "\n# (0) Reading in $previous_interact_file file \n";
    %protein_str_dom_interact=%{&open_protein_structural_domain_interact_pair_file($previous_interact_file)}; # $protein_str_dom_interact{'PROTEIN_INTERACT'}{$1}=$2;
    &cp($previous_interact_file, "$previous_interact_file\.bak");

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # writing down fresh %protein_str_dom_interact to a PSDIP file (PROT_STR_DOM_interaction_pair.psdip)
    #___________________________________________________________________________________________________________
    open(CONTACT_LIST, ">$out_file_name") || die "can not open \$out_file_name $out_file_name\n";
    @Interact_FLAG=keys %protein_str_dom_interact; ## @Interact_FLAG=(PROTEIN_INTERACT, PROTEIN_NONINTERACT)
    for($i=0; $i< @Interact_FLAG; $i++){
        $num_of_pairs=@pairs=sort keys %{$protein_str_dom_interact{$Interact_FLAG[$i]}};
        print "# There were $num_of_pairs of already known $Interact_FLAG[$i] pairs from \%protein_str_dom_interact\n";
        print "     \%protein_str_dom_interact is from &open_protein_structural_domain_interact_pair_file()\n";
        for($j=0; $j<@pairs; $j++){
           if($pairs[$j]=~/^\S(\d\w\w\w)\S+/){   $PDB_entry_checked{$1}=$1;  }
           print CONTACT_LIST "$Interact_FLAG[$i]\t$pairs[$j]\n";
        }
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Openning the main input SCOP domain def. file
    #_________________________________________________________________
    ($interacting_PDB_domains, $PROT_STR_DOM_lookup_hash, $PROT_STR_DOM_class_hash)
               =&open_SCOP_domain_definition_file($SCOP_domain_def_file);
    %interacting_PDB_domains             =%{$interacting_PDB_domains};
    %PROT_STR_DOM_lookup_hash=%{$PROT_STR_DOM_lookup_hash}; ## $PROT_STR_DOM_lookup_hash{"$PDB_ID\U$1"}=$PROT_STR_DOM_name;
    %PROT_STR_DOM_class_hash =%{$PROT_STR_DOM_class_hash};  ## $PROT_STR_DOM_class_hash{$PROT_STR_DOM_name}=$classification;

    $num_interacting_PDB_dom=@PDB_IDs=sort keys %interacting_PDB_domains; ## @interacting_PDB_domains=('a:10-20'...)

    for($i=0; $i< @PDB_IDs; $i++){
         my($sorted_pair, $PDB_ID, @domains, $PDB_file, %PDB_atom_domains, @chains, @files, @atom_numbers1, @atom_numbers2);
         $PDB_ID=$PDB_IDs[$i];
         if($PDB_entry_checked{$PDB_ID}){ next }
         @domains=sort @{$interacting_PDB_domains{$PDB_ID}};  # push(@{$interacting_PDB_domains{$PDB_ID}}, "$domain_num\_$region");
         if(@domains == 1){ $single_domain_num++ ; next }
         if(-s "$PDB_database_dir_path\/$PDB_ID\.brk"){   $PDB_file="$PDB_database_dir_path\/$PDB_ID\.brk";
         }elsif(-s "$PDB_database_dir_path\/$PDB_ID\.brk_mod"){ $PDB_file="$PDB_database_dir_path\/$PDB_ID\.brk_mod"; }

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         #  $DB_atom_domains{$chain}{residue}{$atom_number}=[$x, $y,$z];, The default chain is 'NO_CHAIN'
         #_______________________________________________________________________________________________
         %PDB_atom_domains=%{&open_PDB_atom_files_for_domains($PDB_file)};
         @chains=sort keys %PDB_atom_domains; ## @domains=('a', 'b',,,,)
         if(@chains < 1){
              @files=("$PDB_ID\.brk");   $ftp_server_name='jura.ebi.ac.uk';
              $server_directory='/data/research/pdb';
              $username='jong';  $passwd='7890uiop';
              $client_directory='/DB/PDB/Pdb';
              print "\n Opening $PDB_file failed. FTPing from $ftp_server_name \n\n";
              &ftp_get_files_given_from_a_list(\@files,         $ftp_server_name,
                                                $server_directory,
                                                $username,      $passwd,
                                                $client_directory
                                               );
              %PDB_atom_domains=%{&open_PDB_atom_files_for_domains($PDB_file)};# $DB_atom_domains{$chain}{$atom_number}=[$x, $y,$z];
              @chains=sort keys %PDB_atom_domains; ## @chains=(a, b, c, d,,,)
              if(@chains < 1){ print "\n FTP to get $PDB_file frm $ftp_server_name seems have failed \n\n";
                  push(@list_of_failed_PDB_entry, $PDB_file);
                  &send_mail_to_me(@list_of_failed_PDB_entry);
              }
         }
         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # If there are only 2 domains to be checked, I assume they interact
         #_________________________________________________________________________
         #if(@domains == 2){ ## @domains have (a:, b:,,,)
         #     $domain1=$domains[0];  $domain2=$domains[1];
         #     $pdbd_name1=$PROT_STR_DOM_lookup_hash{"$PDB_ID${domain1}"};
         #     $pdbd_name2=$PROT_STR_DOM_lookup_hash{"$PDB_ID${domain2}"};
         #     @contacting_pair=sort($pdbd_name1, $pdbd_name2);
         #     $contact_list{'PROTEIN_INTERACT'}{$sorted_pair}="@contacting_pair";
         #     print CONTACT_LIST "PROTEIN_NONINTERACT @contacting_pair\t$PDB_ID $domain1 $domain2\n";
         #     print "PROTEIN_INTERACT\t@contacting_pair\t$PDB_ID $domain1 $domain2 \n";
         #     next;
         #}

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # If there are more than 2 scop domain entries for each PDB whole entry
         #________________________________________________________________________
         for($j=0; $j< (@domains-1); $j++){
            my($interact_or_not, @contacting_pair, $domain1_orig, $domain2_orig, @atom_numbers1,
               @xyz_coord1, @xyz_coord2, @atom_numbers2, $shortest, $contact_counter);
            $shortest=1000;
            $domain1=$domains[$j];  $domain2=$domains[$j+1];
            $pdbd_name1=$PROT_STR_DOM_lookup_hash{"$PDB_ID${domain1}"};
            $pdbd_name2=$PROT_STR_DOM_lookup_hash{"$PDB_ID${domain2}"};
            @contacting_pair=sort($pdbd_name1, $pdbd_name2);
            if($protein_str_dom_interact{'PROTEIN_NONINTERACT'}{"@contacting_pair"} or
               $protein_str_dom_interact{'PROTEIN_INTERACT'}{"@contacting_pair"}){
               next
            }
            @xyz_coord1=@{&get_atom_coord_from_SCOP_domain_def_info(\$domain1, \%PDB_atom_domains)};
            @xyz_coord2=@{&get_atom_coord_from_SCOP_domain_def_info(\$domain2, \%PDB_atom_domains)};
            if(@xyz_coord1 < 3 or @xyz_coord2 < 3){ die "\n $pdbd_name1, $pdbd_name2, $domain1 $domain2 \@xyz_coord1 and \@xyz_coord2 are too small \n"; }
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # check_contacts_between_two_molecules
            #_________________________________________________
            ATOM1: for($k=0; $k< @xyz_coord1; $k+= $atom_sample_interval){ ## default is 12
                @xyz1=@xyz_coord1[$k..$k+3];
                if(@xyz1 < 3){ die "\n Can not get \@xyz1 "; }
                for($l=0; $l< @xyz_coord2; $l+= $atom_sample_interval){  ## default is 12
                    @xyz2=@xyz_coord2[$l..($l+3)];
                    if(@xyz2 < 3){ die "\n Can not get \@xyz2 "; }
                    $distance=${&calculate_distance_for_2_points_in_3D(\@xyz1, \@xyz2)};
                    if($distance < $shortest){ $shortest=sprintf("%4.1f", $distance); }
                    if($distance <= $distance_thresh){ $contact_counter++;  }
                    if($contact_counter >= $contact_num_thresh){
                       $sorted_pair=join(' ', sort("$PDB_ID$domain1_orig", "$PDB_ID$domain2_orig"));
                       $pdbd_class1=$PROT_STR_DOM_class_hash{$pdbd_name1};
                       $pdbd_class2=$PROT_STR_DOM_class_hash{$pdbd_name2};
                       $contact_list{'PROTEIN_INTERACT'}{$sorted_pair} = join(' ', sort("$pdbd_name1 $pdbd_class1", "$pdbd_name2 $pdbd_class2"));
                       @contacting_pair=sort($pdbd_name1, $pdbd_name2);
                       print CONTACT_LIST "PROTEIN_INTERACT\t@contacting_pair\t$PDB_ID $shortest $contact_counter $domain1 $domain2\n";
                       print "PROTEIN_INTERACT\t@contacting_pair\t$PDB_ID $shortest $contact_counter $domain1 $domain2\n";
                       $interact_or_not=1;
                       last ATOM1;
                    }
                }
            }
            unless($interact_or_not){
                $contact_counter=0;
                $contact_list{'PROTEIN_NONINTERACT'}{$sorted_pair}="@contacting_pair";
                print CONTACT_LIST "PROTEIN_NONINTERACT @contacting_pair\t$PDB_ID $shortest $contact_counter $domain1 $domain2\n";
                print "PROTEIN_NONINTERACT\t@contacting_pair\t$PDB_ID $shortest $contact_counter $domain1 $domain2\n";
            }
         }
    }
    $final_output="ProStrDomIntPair_$contact_num_thresh\_$distance_thresh\_$atom_sample_interval\.psdip";
    &write_protein_structural_domain_interact_pair_file(\%contact_list, \$final_output);
    print CONTACT_LIST "\n \$single_domain_num : $single_domain_num\n";
    print "\n \$single_domain_num : $single_domain_num\n";
    return(\%contact_list, \$out_file_name);
}






#______________________________________________________________________________
# Title     : make_structure_interact_domain_super_family_links
# Usage     : &make_structure_interact_domain_super_family_links($file[0], $file[1], $file[2]);
#            => psi_pdbisl_merge_assign.txt pdb100d_1_48.pdbg strintergene_pairs.txt
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.5
#------------------------------------------------------------------------------
sub make_structure_interact_domain_super_family_links{
    my($MSPA, %PDB, %interact_pairs, $i, $j, $k, %ORF_desc, $discarded_ORFs,$selected_ORFs,
       @interact_pairs, $total_domain, @PDB, $homolog1, $homolog2, $seq_id_start,
       $seq_id_stop, $seq_id_start, $seq_id_stop, $Eval_thresh, $MSPA, $PDBG_file,
       $strintergene_list);
    $seq_id_start=0.1;
    $seq_id_stop =0.85+$seq_id_start;
    $Eval_thresh=0.00000001;
    $MSPA=$_[0];
    $PDBG_file=$_[1];
    $strintergene_list=$_[2];

    print "\n# Param: \$seq_id_start $seq_id_start, \$Eval_thresh $Eval_thresh\n";
    open(STRINTERGENE, "<$strintergene_list") || die "Can not open $strintergene_list \n";
    while(<STRINTERGENE>){    if(/^(\S+)/){    $strintergene{$1}=$1;     }    }
    close(STRINTERGENE);

    open(ORF_desc, "<orf_descriptions.txt") || die "\n Can not open orf_descriptions.txt \n";
    while(<ORF_desc>){   if(/^(\S+)\s+(.+)/){ $ORF_desc{$1}=$2 }    }
    close(ORF_desc);

    my $domain_number=100; ## 100 is absurdly high. 30 seems the maximum
    open(PDBG_FILE, "<$PDBG_file") || die "\n Can not open $PDBG_file \n\n";
    while(<PDBG_FILE>){
       if(/^\>([de](\d\w\w\w)\S+)\s+(((\d+)\.\d+)\.\d+)/){
           $PDBD_name=$1; $PDB_ID=$2;  $superfam=$3;    $fold_type=$4;     $class_type=$5;
           $domain_counter{$PDB_ID}++;
           $PDBD_lookup{$PDBD_name}=$superfam;
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # If I set $domain_number=2, I get only dimers
           #_______________________________________________
           if($domain_counter{$PDB_ID} > $domain_number){
               print "\n !!! Deleting $domain_counter{$PDB_ID} $PDB_ID $PDBD_name";
               delete ($superfam{$PDB_ID});
               delete ($PDBD_interm{$superfam}{$PDB_ID});
           }else{
               $superfam1{$PDB_ID}{$superfam}= $superfam;
               $superfam{$PDBD_name}{$superfam}= $superfam;
               $PDBD_interm{$superfam}{$PDBD_name}=$PDBD_name;
           }
       }
    }
    close(PDBG_FILE);


    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # openning input MSPA file (is MSPA file is given)
    #__________________________________________________________
    open(MSPA, "<$MSPA") || die "\n Can not open $MSPA \n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # 88.8    1e-17   0.32    4    61 d155c__ 7   60  YJR048W     1 GDAAKGEKEFN-KCKACHMIQAPDGTDIKGGKTGPNLYGVVGRKIASEEGFKYGEGILE<=>GSAKKGATLFKTRCLQCHTVEKGGP-----HKVGPNLHGIFGRHSGQAEGYSYTDANIK
    #__________________________________________________________________________________________
    while(<MSPA>){
       if(/^\S+\s+(\S+)\s+(\S+)\s+\d+\s+\d+\s+([de](\d\w\w\w)\S+)\s+\d+\s+\d+\s+(\S+)\s+\d+\s+(\S+)/){
           $PDBD_name=$3; $PDB_ID=$4;  $matched=$5;
           unless($strintergene{$matched}){ $discarded_ORFs++; next }else{ $selected_ORFs++; }
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # removing single domain matches
           #_____________________________________________________
           #if($PDBD_name=~/[de]\d\w\w\w__/){ next }

           for($i= $seq_id_start; $i <= $seq_id_stop; $i+=0.05){
              if($1 <= $Eval_thresh and $2 >= $i ){
                  push(@{$PDB{$i}{$PDB_ID}}, $matched);
                  push(@{$PDBD{$i}{$PDBD_name}}, "$matched\_$region");
                  $PDB_domain_count{$PDB_ID}{$PDBD_name}=$PDBD_name;
              }
           }
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Sarah's psi_pdbisl_merge_assign.txt file format
       #__________________________________________________________
       }elsif(/^(\S+)\-\d+\s+(\S+)\s+([de](\d\w\w\w)\S+)\-\d+\s+\S+\s+(\S+)/){
           $PDBD_name=$3;   $PDB_ID=$4;  $matched=$1; $evalue=$5;
           $region=$2;
           unless($strintergene{$matched}){ $discarded_ORFs++; next }else{ $selected_ORFs++; }
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # removing single domain matches
           #_____________________________________________________
           #if($PDBD_name=~/[de]\d\w\w\w__/){ next }
           $HOMOLOG_superfam_lookup{$matched}{$PDBD_lookup{$PDBD_name}}=$PDBD_lookup{$PDBD_name};
           for($i= $seq_id_start; $i <= $seq_id_stop; $i+=0.05){
              if($evalue <= $Eval_thresh){
                  push(@{$PDB{$i}{$PDB_ID}}, $matched);
                  push(@{$PDBD{$i}{$PDBD_name}}, "$matched\_$region");
                  $PDB_domain_count{$PDB_ID}{$PDBD_name}=$PDBD_name;
                  $PDBD_and_matched{"$matched\_$region"}="$PDBD_lookup{$PDBD_name} $evalue"; ## YALXXXX -> 1.3.1
              }
           }
       }
    }
    close(MSPA);

    @SeqID_thresh=sort {$a<=>$b} keys %PDB;
    open(ID_vs_SINTER_PAIRS, ">Seq_ID_vs_Sinteractio_pairs.txt") || die;

    for($t=0; $t < @SeqID_thresh; $t++){
       my(%interact_pairs, $SeqID_thresh, @PDB);
       $SeqID_thresh=$SeqID_thresh[$t];
       @PDB=sort keys %{$PDB{$SeqID_thresh}}; # @PDB has '1tco 1amu 1amw 1dea 1amy 1xva 1a33..'
       $out_file="Sinteraction_pair_seqID_$SeqID_thresh\_$Eval_thresh.txt";
       $PDB_vs_ORFs="PDB_vs_ORFs_$SeqID_thresh\_$Eval_thresh.txt";
       open(SINTERACION, ">$out_file") || die;
       open(PDB_vs_ORFs, ">$PDB_vs_ORFs") || die;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Structural intermediate connection
       #_________________________________________________________
       for($i=0; $i< @PDB; $i++){
            my(@PDBD_interm, @superfam, $h, @homolog_matched1, @homolog_matched2 );
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # retrieving all the possible superfam for ONE PDB entry AND all the PDB members for all the superfams
            #____________________________________________________________________________________________________
            @superfam=keys %{$superfam1{$PDB[$i]}}; ## all superfam members for 1abc
            if(@superfam > 3){ next }
            if(@superfam < 1){ $missing_PDB_in_PDBG{$PDB[$i]}=$PDB[$i]; next }
            print "#---- $PDB[$i]: @superfam\n";
            for($j=0; $j< (@superfam-1); $j++){

               #if($superfam[$j]=~/3.30.1/){ next }

               unless($superfam[$j]=~/\S/){ die "\n $j \$superfam[$j] is empty. "; next }
               @PDBD_group1=keys %{$PDBD_interm{$superfam[$j]}};
               @PDBD_group2=keys %{$PDBD_interm{$superfam[$j+1]}};
               #print "\n@PDBD_group1\n@PDBD_group2\n";
               $num_pdb_interm += @PDBD_group1 + @PDBD_group2;
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               # Putting all the homologs of PDBD found(in MIPS) to @homolog_matched
               #_______________________________________________________________
               for($h=0; $h< @PDBD_group1; $h++){
                  if($PDBD{$SeqID_thresh}{$PDBD_group1[$h]}){
                      push(@homolog_matched1, @{$PDBD{$SeqID_thresh}{$PDBD_group1[$h]}});
                  }
               }
               for($h=0; $h < @PDBD_group2; $h++){
                  if($PDBD{$SeqID_thresh}{$PDBD_group2[$h]}){
                      push(@homolog_matched2, @{$PDBD{$SeqID_thresh}{$PDBD_group2[$h]}});
                  }
               }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               # 3bta =>
               # >d3btaa1 2.26.1.5.2 (872-1078) Botulinum neurotoxin serotype A {(Clostridium botulinum)}
               # >d3btaa2 2.37.4.2.2 (1079-1295) Botulinum neurotoxin serotype A {(Clostridium botulinum)}
               # >d3btaa3 4.71.1.5.1 (1-546) Botulinum neurotoxin serotype A {(Clostridium botulinum)}
               #________________________________________________________________________________________
               print PDB_vs_ORFs "\n>$PDB[$i]: $superfam[$j] : \@PDBD_group1 : @homolog_matched1\n";
               print PDB_vs_ORFs ">$PDB[$i]: $superfam[$j+1] : \@PDBD_group2 : @homolog_matched2\n";
               $pair_count_for_2_superfam=0;
               LOOP1: for($b=0; $b< @homolog_matched1; $b++){
                    if($homolog_matched1[$b]=~/(\S+)\_(\d+)\-(\d+)/){
                        $homolog1=$1; $homolog1_start=$2; $homolog1_stop=$3; }
                    LOOP2: for($k=0; $k< @homolog_matched2; $k++){
                        if($homolog_matched2[$k]=~/(\S+)\_(\d+)\-(\d+)/){
                           $homolog2=$1; $homolog2_start=$2; $homolog2_stop=$3;  }
                        if($homolog1 eq $homolog2){
                           if(${&get_sequence_overlap_size($homolog1_start, $homolog1_stop,
                                                           $homolog2_start, $homolog2_stop)} > 30){
                              print "$PDB[$i]: $superfam[$j] $homolog_matched1[$b] : $superfam[$j+1] $homolog_matched2[$k]\n";  next
                           }
                        }
                        print PDB_vs_ORFs " \> $homolog_matched1[$b] $PDBD_and_matched{$homolog_matched1[$b]} <-> $homolog_matched2[$k] $PDBD_and_matched{$homolog_matched2[$k]} $ORF_desc{$homolog1}\n";
                        $pair_count_for_2_superfam++;
                        if($pair_count_for_2_superfam > 20){ last }
                        $interact_pair=join(' ', sort($homolog1, $homolog2));
                        $interact_pairs{$interact_pair}=$interact_pair;
                    }
               }
            }
            $num_pdb_interm=$num_pdb_interm/2;

       }
       $interact_pair_num=@interact_pairs=sort keys %interact_pairs;
       for($p=0; $p< @interact_pairs; $p++){
           print SINTERACION "\n$interact_pairs[$p]";
       }
       push(@out_files, $out_file);
       print  "\n# \$interact_pair_num is $interact_pair_num \n";
       print SINTERACION "\n# \$interact_pair_num is $interact_pair_num \n";
       close(SINTERACION);
       close(PDB_vs_ORFs);
       print ID_vs_SINTER_PAIRS "\n$SeqID_thresh $interact_pair_num";
       print "# $out_file is created. \n";
    }
    close(ID_vs_SINTER_PAIRS);
    open(PDB_STAT, ">PDB_domain_stat.txt") || die;
    $num_PDB=@PDB=keys %PDB_domain_count;
    for($i=0; $i< @PDB; $i++){
       $num_of_domain=@domains=keys %{$PDB_domain_count{$PDB[$i]}};
       print PDB_STAT "\n$PDB[$i] $num_of_domain @domains";
       $total_domain+=$num_of_domain;
    }
    $av_domain=$total_domain/$num_PDB;
    print "\n# \$num_PDB $num_PDB, \$total_domain $total_domain, \$av_domain $av_domain\n";
    print "# Discarded ORF(not found to be assigned from psi_pdbisl_merge_assign.txt: $discarded_ORFs \n";
    print "# Selected  ORF(    found to be assigned from psi_pdbisl_merge_assign.txt: $selected_ORFs \n\n";

    @missin_pdb=keys %missing_PDB_in_PDBG;
    print "\n# \"@missin_pdb\" are missing in PDBG file \n";
    close(PDB_STAT);
    return(\@out_files);
}





#______________________________________________________________________________
# Title     : make_structure_interaction_domain_family.pl
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub make_structure_interaction_domain_family{
    my($MSPA, %PDB, %interact_pairs, $i, $j, $k,
       @interact_pairs, @PDB, $homolog1, $homolog2);
    $Seq_ID_thresh=0.4;
    $Eval_thresh=0.01;
    $MSPA=$_[0];
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # openning input MSPA file
    #__________________________________________________________
    open(MSPA, "<$MSPA") || die "\n Can not open $MSPA \n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # 88.8    1e-17   0.32    4    61 d155c__ 7   60  YJR048W     1 GDAAKGEKEFN-KCKACHMIQAPDGTDIKGGKTGPNLYGVVGRKIASEEGFKYGEGILE<=>GSAKKGATLFKTRCLQCHTVEKGGP-----HKVGPNLHGIFGRHSGQAEGYSYTDANIK
    #__________________________________________________________________________________________
    while(<MSPA>){
       if(/\S+\s+(\S+)\s+(\S+)\s+\d+\s+\d+\s+([de]\w\w\w\w)\S+\s+\d+\s+\d+\s+(\S+)\s+\d+\s+(\S+)/){
           for($i=0.1; $i <= 0.95; $i+=0.05){
              if($1 <= $Eval_thresh and $2 >= $i ){  push(@{$PDB{$i}{$3}}, $4);    }
           }
       }
    }
    close(MSPA);
    @ID_thresh=sort {$a<=>$b} keys %PDB;
    open(ID_vs_SINTER_PAIRS, ">Seq_ID_vs_Sinteractio_pairs.txt") || die;
    for($t=0; $t < @ID_thresh; $t++){
       my(%interact_pairs, $ID_thresh, @PDB);
       $ID_thresh=$ID_thresh[$t];
       @PDB=keys %{$PDB{$ID_thresh}};
       for($i=0; $i<@PDB; $i++){
          @homolog_matched=@{$PDB{$ID_thresh}{$PDB[$i]}};
          for($j=0; $j< @homolog_matched; $j++){
              $homolog1=$homolog_matched[$j];
              for($k=$j; $k< @homolog_matched; $k++){
                  $homolog2=$homolog_matched[$k];
                  if($homolog1 eq $homolog2){ next }
                  $interact_pair=join(' ', sort($homolog1, $homolog2));
                  $interact_pairs{$interact_pair}=$interact_pair;
              }
          }
       }
       $interact_pair_num=@interact_pairs=sort keys %interact_pairs;
       $out_file="Sinteraction_pair_seqID_$ID_thresh\_$Eval_thresh.txt";
       push(@out_files, $out_file);
       open(SINTERACION, ">$out_file") || die;
       for($i=0; $i< @interact_pairs; $i++){
          print SINTERACION "\n$interact_pairs[$i]";
       }
       print  "\n# \$interact_pair_num is $interact_pair_num \n";
       print SINTERACION "\n# \$interact_pair_num is $interact_pair_num \n";
       close(SINTERACION);
       print ID_vs_SINTER_PAIRS "\n$ID_thresh $interact_pair_num";
       print "# $out_file is created \n";
    }
    close(ID_vs_SINTER_PAIRS);
    return(\@out_files);
}


#______________________________________________________________________________
# Title     : make_structure_interact_domain_super_family_links
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.3
#------------------------------------------------------------------------------
sub make_structure_interact_domain_super_family_links{
    my($MSPA, %PDB, %interact_pairs, $i, $j, $k,
       @interact_pairs, $total_domain, @PDB, $homolog1, $homolog2, $seq_id_start,
       $seq_id_stop);
    $seq_id_start=0.6;
    $seq_id_stop =0.7;
    $Eval_thresh=0.00001;
    $MSPA=$_[0];
    $PDBG_file=$_[1];
    $strintergene_list=$_[2];
    open(STRINTERGENE, "<$strintergene_list") || die "Can not open $strintergene_list \n";
    while(<STRINTERGENE>){
       if(/(\S+)/){
          $strintergene{$1}=$1;
       }
    }
    close(STRINTERGENE);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # openning input MSPA file
    #__________________________________________________________
    open(MSPA, "<$MSPA") || die "\n Can not open $MSPA \n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # 88.8    1e-17   0.32    4    61 d155c__ 7   60  YJR048W     1 GDAAKGEKEFN-KCKACHMIQAPDGTDIKGGKTGPNLYGVVGRKIASEEGFKYGEGILE<=>GSAKKGATLFKTRCLQCHTVEKGGP-----HKVGPNLHGIFGRHSGQAEGYSYTDANIK
    #__________________________________________________________________________________________
    while(<MSPA>){
       if(/^\S+\s+(\S+)\s+(\S+)\s+\d+\s+\d+\s+([de](\d\w\w\w)\S+)\s+\d+\s+\d+\s+(\S+)\s+\d+\s+(\S+)/){
           $PDBD_name=$3; $PDB_ID=$4;  $matched=$5;
           unless($strintergene{$matched}){ next }
           for($i= $seq_id_start; $i <= $seq_id_stop; $i+=0.05){
              if($1 <= $Eval_thresh and $2 >= $i ){
                  push(@{$PDB{$i}{$PDB_ID}}, $matched);
                  $PDB_domain_count{$PDB_ID}{$PDBD_name}=$PDBD_name;
              }
           }
       }elsif(/^(\S+)\-\d+\s+\S+\s+([de](\d\w\w\w)\S+)\s+\S+\s+(\S+)/){
           $PDBD_name=$2; $PDB_ID=$3;  $matched=$1; $evalue=$4;
           unless($strintergene{$matched}){ next }
           if($matched=~/^TY/){ next }
           for($i= $seq_id_start; $i <= $seq_id_stop; $i+=0.05){
              if($evalue <= $Eval_thresh){
                  push(@{$PDB{$i}{$PDB_ID}}, $matched);
                  $PDB_domain_count{$PDB_ID}{$PDBD_name}=$PDBD_name;
              }
           }
       }
    }
    close(MSPA);

    open(PDBG_FILE, "<$PDBG_file") || die "\n Can not open $PDBG_file \n\n";
    while(<PDBG_FILE>){
       if(/>[de](\w\w\w\w)\S+\s+(\d+\.\d+\.\d+)/){
           $PDB_ID=$1;  $superfam=$2;
           $superfam{$PDB_ID}{$superfam}=$superfam;
           $PDB_interm{$superfam}{$PDB_ID}=$PDB_ID;
       }
    }
    close(PDBG_FILE);
    print "\n# finished reading in PDBG file \n";
    @ID_thresh=sort {$a<=>$b} keys %PDB;
    open(ID_vs_SINTER_PAIRS, ">Seq_ID_vs_Sinteractio_pairs.txt") || die;
    for($t=0; $t < @ID_thresh; $t++){
       my(%interact_pairs, $ID_thresh, @PDB);
       $ID_thresh=$ID_thresh[$t];
       @PDB=keys %{$PDB{$ID_thresh}};

       $out_file="Sinteraction_pair_seqID_$ID_thresh\_$Eval_thresh.txt";
       $PDB_vs_ORFs="PDB_vs_ORFs_$ID_thresh\_$Eval_thresh.txt";
       open(SINTERACION, ">$out_file") || die;
       open(PDB_vs_ORFs, ">$PDB_vs_ORFs") || die;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Structural intermediate connection
       #_________________________________________________________
       for($i=0; $i< @PDB; $i++){
            my(@PDB_interm, @superfam, $h, @homolog_matched );
            @superfam=keys %{$superfam{$PDB[$i]}};
            for($j=0; $j< @superfam; $j++){
               unless($superfam[$j]=~/\S/){ next }
               push(@PDB_interm, keys %{$PDB_interm{$superfam[$j]}} );
            }
            #@PDB_interm=@{&remove_dup_in_array(\@PDB_interm)};
            print "\n>> @superfam : @PDB_interm\n";

            for($h=0; $h< @PDB_interm; $h++){
               if($PDB{$ID_thresh}{$PDB_interm[$h]}){
                  push(@homolog_matched, @{$PDB{$ID_thresh}{$PDB_interm[$h]}});
               }
            }
            #@homolog_matched=sort @{&remove_dup_in_array(\@homolog_matched)};
            print PDB_vs_ORFs "\n>$PDB[$i]: @homolog_matched";
            for($j=0; $j< @homolog_matched; $j++){
                 $homolog1=$homolog_matched[$j];
                 for($k=($j+1); $k< @homolog_matched; $k++){
                     $homolog2=$homolog_matched[$k];
                     if($homolog1 eq $homolog2){ next }
                     $interact_pair=join(' ', sort($homolog1, $homolog2));
                     $interact_pairs{$interact_pair}=$interact_pair;
                 }
            }
       }
       $interact_pair_num=@interact_pairs=sort keys %interact_pairs;
       for($p=0; $p< @interact_pairs; $p++){
           print SINTERACION "\n$interact_pairs[$p]";
       }
       push(@out_files, $out_file);
       print  "\n# \$interact_pair_num is $interact_pair_num \n";
       print SINTERACION "\n# \$interact_pair_num is $interact_pair_num \n";
       close(SINTERACION);
       close(PDB_vs_ORFs);
       print ID_vs_SINTER_PAIRS "\n$ID_thresh $interact_pair_num";
       print "# $out_file is created \n";
    }
    close(ID_vs_SINTER_PAIRS);
    open(PDB_STAT, ">PDB_domain_stat.txt") || die;
    $num_PDB=@PDB=keys %PDB_domain_count;
    for($i=0; $i< @PDB; $i++){
       $num_of_domain=@domains=keys %{$PDB_domain_count{$PDB[$i]}};
       print PDB_STAT "\n$PDB[$i] $num_of_domain @domains";
       $total_domain+=$num_of_domain;
    }
    $av_domain=$total_domain/$num_PDB;
    print "\n# \$num_PDB $num_PDB, \$total_domain $total_domain, \$av_domain $av_domain";
    close(PDB_STAT);
    return(\@out_files);
}



#______________________________________________________________________________
# Title     : make_revcomp_sequences
# Usage     : %out=%{&make_revcomp_sequences(\@input_string_or_seq)};
#              or %out=%{&make_revcomp_sequences(\%input_string_or_seq)};
# Function  :
# Example   :
# Keywords  : add_revcomp_string, add_revcomp_string_array, insert_revcomp_string_array
#              make_revcomp_NA_sequences make_revcomp_DNA_sequences
#              make_reverse_complementary_sequences
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_revcomp_sequences{
    my(@revcomp_patterns, @patterns, $pattern, $i, %patterns,
       %pattern_revcomp, @names, $input_type, $arbit_name);
    if(ref($_[0]) eq 'ARRAY'){
        @patterns=@{$_[0]};
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Making a hash out of array
        #_________________________________________________
        for($i=0; $i< @patterns; $i++){
            $arbit_name="PAT\_$i";
            $patterns{$arbit_name}=$patterns[$i];
        }
        $input_type='ARRAY';
    }elsif(ref($_[0]) eq 'HASH'){ %patterns=%{$_[0]};
        $input_type='HASH';
    }else{  print "\n# (Error) make_revcomp_sequences get either HASH or ARRAY only \n"; exit }
    @names=sort keys %patterns;
    for($i=0; $i< @names; $i++){
       $pattern =$patterns{$names[$i]};
       $pattern =~ tr/\[\]acgtrymkswhbvdnACGTRYMKSWHBVDN/\]\[tgcayrkmswdvbhnTGCAYRKMSWDVBHN/;
       $pattern = reverse $pattern; # reversing it to add complimentary to the original
       $pattern_revcomp{"$names[$i]\_rc"}=$pattern;
    }
    if($input_type eq 'ARRAY'){
       return([values(%pattern_revcomp)]);
    }elsif($input_type eq 'HASH'){
       return(\%pattern_revcomp);
    }
}



#______________________________________________________________________________
# Title     : make_complementary_sequences
# Usage     : %out=%{&make_complementary_sequences(\@input_string_or_seq)};
#              or %out=%{&make_complementary_sequences(\%input_string_or_seq)};
# Function  :
# Example   :
# Keywords  : add_complementary_string, add_complementary_string_array, insert_complementary_string_array
#              make_complementary_NA_sequences make_complementary_DNA_sequences
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_complementary_sequences{
    my(@complementary_patterns, @patterns, $pattern, $i, %patterns,
       %pattern_complementary, @names, $input_type, $arbit_name);
    if(ref($_[0]) eq 'ARRAY'){
        @patterns=@{$_[0]};
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Making a hash out of array
        #_________________________________________________
        for($i=0; $i< @patterns; $i++){
            $arbit_name="PAT\_$i";
            $patterns{$arbit_name}=$patterns[$i];
        }
        $input_type='ARRAY';
    }elsif(ref($_[0]) eq 'HASH'){ %patterns=%{$_[0]};
        $input_type='HASH';
    }else{  print "\n# (Error) make_complementary_sequences get either HASH or ARRAY only \n"; exit }
    @names=sort keys %patterns;
    for($i=0; $i< @names; $i++){
       $pattern =$patterns{$names[$i]};
       $pattern =~ tr/\[\]acgtrymkswhbvdnACGTRYMKSWHBVDN/\]\[tgcayrkmswdvbhnTGCAYRKMSWDVBHN/;
       $pattern_complementary{"$names[$i]\_rc"}=$pattern;
    }
    if($input_type eq 'ARRAY'){
       return([values(%pattern_complementary)]);
    }elsif($input_type eq 'HASH'){
       return(\%pattern_complementary);
    }
}



#______________________________________________________________________________
# Title     : make_clean_fasta_file
# Usage     :
# Function  :
# Example   :
# Keywords  : cleanup_fasta_db cleanup_fasta_files
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_clean_fasta_file{
    local($num_seq, $i, @matches, %SEQ, $input_file, $functional_name,
          $PDB_name, $base, $output_file);
    $input_file=${$_[0]} || $_[0];
    $base=${&get_base_names($input_file)};
    $output_file="$base\_clean.mpfa";
    open(FASTA, "$input_file") || die "\n Cannot open $input_file \n\n";
    open(OUT_FASTA, ">$output_file") || die "\n Cannot open $output_file \n\n";
    print "\n  $input_file FASTA file is cleaned up. Please wait....\n";
    while(<FASTA>){
       #  0004.PRO=T:m52(3734>5020);thrC
       if(/\>(\S+)\.(\S+)\:(\S+)\;(\S+)\s*$/){
            $functional_name=$4;
            print OUT_FASTA "\>$functional_name\n";
            $SEQ{$functional_name}++;
            if($SEQ{$functional_name} > 1){ die "\n(1) Duplicate SEQ $functional_name \n$_\n";}
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # SGD protein seq format:  >ORFP:YAL011W YAL011W, Chr I from 132159-134075
       #_____________________________________________________________________________
       }elsif(/^\>ORFP\:(\S+)\s+(.*)/){
            $SEQ{$1}++;
            print OUT_FASTA "\>$1 $2\n";
            if($SEQ{$1} > 1){ die "\n(2) Duplicate SEQ $1 \n$_\n"; }
       }elsif(/^\>P1\;(\S+) ?([\S]?)/){  ## Burkhard Rost's test set
            if($2){
                $PDB_name="\L$1"."\_$2";
            }else{
                $PDB_name="\L$1";
            }
            print OUT_FASTA "\>$PDB_name\n";
            $SEQ{$PDB_name}++;
            if($SEQ{$PDB_name} > 1){ die "\n(3) Duplicate SEQ $PDB_name \n$_\n"; }
       }elsif(/^\>([^\|]+)\|([^\|]+)\|pdb\|(\S+)\|(\w)* */){ ## parsing PDB seq from NCBI blast/db dir
            $PDB_name="\L$3"."$4";
            print OUT_FASTA "\>$PDB_name\n";
            $SEQ{$PDB_name}++;
            if($SEQ{$PDB_name} > 1){ die "\n(4) Duplicate SEQ $PDB_name \n$_\n"; }
       }elsif(/^\>\/\/\:\/\:\S+\|\S+\|(\S+) +(.+)/){ ## to clean up Lisa Holm's nrdb90
            $functional_name=$1;
            print OUT_FASTA "\>$functional_name $2\n";
            $SEQ{$functional_name}++;
            if($SEQ{$functional_name} > 1){ die "\n(5) Duplicate SEQ $functional_name \n$_\n";}
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Liisa's prompt run of : ~dali/RDB/column name seq < /homes/holm/work/test/trivial.rdb.11-Mar-99 | /homes/holm/rdb2fasta.pl
       #_______________________________________________________________________________________________________________________________________________________
       }elsif(/^\>[^\|]+\|([^\|]+)\|([^\|]+)\s*/){ ##  >pironly|I38344|I38344 titin, cardiac muscle - human
            $SEQ{$1}++;
            if($SEQ{$1} > 1){
                if($SEQ{$2} > 0){ die "\n(6) Duplicate SEQ $1 and $2 \n$_\n\n";
                }else{ $SEQ{$2}++; print OUT_FASTA "\>$2\n"; }
            }else{
                print OUT_FASTA "\>$1\n";
            }
       }elsif(/^\>\S+\/\:\S+\|([^\|]+)\|\S+/){ ## to clean up Lisa Holm's nrdb90
            $functional_name=$1;
            print OUT_FASTA "\>$functional_name\n";
            $SEQ{$functional_name}++;
            if($SEQ{$functional_name} > 1){ die "\n(7) Duplicate SEQ $functional_name \n$_\n";}
       }elsif(/^\s*(\w+)\.\*$/){
            $last_line_seq=$1;
            print OUT_FASTA "$last_line_seq\n";
       }elsif(/^\>[^\|+]\|\S*\|\S*\|\S*\|([^\|]+) +/){ ## This is nr format
            print OUT_FASTA "\>$1\n";
            $SEQ{$1}++;
            if($SEQ{$1} > 1){ die "\n(8) Duplicate SEQ $1 \n$_\n";   }
       }elsif(/^\>[^\|]+\|([^\|]+) +/){ ## for >gi|45803 (X04609) gamma subunit (3'terminu
            print OUT_FASTA "\>$1\n";
            $SEQ{$1}++;
            if($SEQ{$1} > 1){ die "\n(9) Duplicate SEQ $1 \n$_\n";    }
       }elsif(/^\>\S+\|\S*\|\S*\|(\S+) +/){ # for gi|386065|bbs|133195 cytochrome c3
            print OUT_FASTA "\>$1\n";
            $SEQ{$1}++;
            if($SEQ{$1} > 1){ die "\n(10) Duplicate SEQ $1 \n$_\n";   }
       }elsif(/\>(\w+)\s*/){
            print OUT_FASTA;
            $SEQ{$1}++;
            if($SEQ{$1} > 1){ die "\n(11) Duplicate SEQ $1 \n$_\n";  }
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Actual SEQUENCE
       #______________________________________________
       }elsif(@matches=$_=~/(\w+)\W+/gi){  #  CTAATTTTTG TCAGAATGAT GAGAGTGGCA GCCTCCTGAA GAGCAGCTGT CTCCGTGTGA
            for($i=0; $i< @matches; $i++){  # to handle CPnseq3 genome seq
               print OUT_FASTA "$matches[$i]";
               if($i%7 == 0){ print OUT_FASTA "\n"; }
            }
       }elsif(/^[A-Y]+$/i){  ## prints only CHARs
            print OUT_FASTA;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Burkhard's PHD test set
       #_____________________________________________________________________________
       }elsif(/^   (\w+)$/){
            print OUT_FASTA "$1\n";
            $SEQ{$1}++;
            if($SEQ{$1} > 1){ die "\n(12) Duplicate SEQ $1 \n$_\n"; }
       }
    }
    close(FASTA);
    close(OUT_FASTA);
    $num_seq=keys %SEQ;
    print "\n\n\n $num_seq has been processed producing \"$output_file\"\n\n\n";
    return(\%SEQ);
}





#______________________________________________________________________________
# Title     : merge_superfam_fasta_files_for_ISL
# Usage     : &merge_superfam_fasta_files_for_ISL;
# Function  :
# Example   :
# Keywords  : compile_superfam_fasta_files_for_ISL
# Options   :
# Author    : jong@salt2.med.harvard.edu
# Category  :
# Version   : 1.4
#------------------------------------------------------------------------------
sub merge_superfam_fasta_files_for_ISL{
    my($j, $base, $seq_name, $i, @files);
    @files=@{&read_file_names_only('.', 'mpfa')};
    print "\n# (i) Making compiled_interm_lib.fa, this takes several minutes\n";
    open(COMPILED, ">compiled_interm_lib.mpfa");

    for($j=0; $j< @files; $j++){
       unless($files[$j]=~/^\d+\.\d+\.\d+\.[mps]{0,2}fa/){
           next;
       }
       $base=${&get_base_names($files[$j])};
       open(SUPER, "$files[$j]");
       while(<SUPER>){
           if(/\>(\S+)/){
               $seq_name="\>$1";
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               # Checks if 1.1.1 is attached or not. If not , attach it now
               #_____________________________________________________________
               unless($seq_name=~/\d+\.\d+\.\d+/){
                  $seq_name="\>$1_$base";
               }
               print COMPILED $seq_name, "\n";
           }elsif(/^(\w+)$/){
               print COMPILED $1, "\n";
           }
       }
       close SUPER;
    }
    print "\n# (i) merge_superfam_fasta_files_for_ISL: compiled_interm_lib.fa has been created. \n";
    return( \"compiled_interm_lib.fa");
}




#______________________________________________________________________________
# Title     : make_thumbnails_of_pictures
# Usage     :
# Function  :
# Example   : <a href="jo.jpg"><img SRC="jo.jpg" BORDER=0 height=111 width=76></a>
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1
#------------------------------------------------------------------------------
sub make_thumbnails_of_pictures{
    my(@pic_files, $thumbnail_index_file, $i);
    @pic_files=@{$_[0]};
    $thumbnail_index_file="index_thumbnail_pictures.html";
    open(HTML_PIC_FILE, ">$thumbnail_index_file") || die "\n Can not open $thumbnail_index_file \n";
    print HTML_PIC_FILE "<HTML>";
    for($i=0; $i<@pic_files; $i++){
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Make thumbnails of pictures which are over 18k byte size
        #_____________________________________________________________
        if(-s $pic_files[$i] > 19000){
           print HTML_PIC_FILE "<a href=\"$pic_files[$i]\"><img SRC=\"$pic_files[$i]\" height=74 width=61></a>\n";
        }
    }
    print HTML_PIC_FILE "<\/html>";
    if(-s $thumbnail_index_file){
       print "\n $thumbnail_index_file has been made \n\n";
    }else{
       die "\n $thumbnail_index_file has not been made\n";
    }
}


#______________________________________________________________________________
# Title     : make_clickable_URL_dir_path
# Usage     :
# Function  :
# Example   :IN : /Bio/Proj/Bio/SAT
#            OUT: <A HREF="/Proj">/Proj</A><A HREF="/Proj/Bio">/Bio</A><A HREF="/Proj/Bio/SAT">/SAT</A>
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub make_clickable_URL_dir_path{
    my ($full_pwd_path, @dir_names, $full_pwd_path_URL, $one_level_above_dir_name,
        $A_HREF_quote, $quote_END, $A_END, $i);
    $full_pwd_path=${$_[0]};
    $A_HREF_quote = '<A HREF="';
    $quote_END = '">';
    $A_END = '</A>';
    @dir_names=split(/\//, $full_pwd_path);
    for($i=2; $i< @dir_names; $i++){
       $full_pwd_path_URL .="$A_HREF_quote\/$one_level_above_dir_name$dir_names[$i]$quote_END$dir_names[$i]$A_END\/";
       $one_level_above_dir_name.="$dir_names[$i]\/";
    }
    if(!$full_pwd_path_URL){ $full_pwd_path_URL="$ENV{DOCUMENT_ROOT}\/" }
    return(\$full_pwd_path_URL);
}


#______________________________________________________________________________
# Title     : make_stragments_from_HSSP_files
# Usage     :
# Function  :
# Example   :
# Keywords  : secondary_structure, sec_str, structural_fragment, HSSP
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_stragments_from_HSSP_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my(%HSSP_secture, @PDB_IDs, $stragment_size, $stragment_end, $stragment_start);
    $stragment_size=5;

    for($f=0; $f< @file; $f++){
       %HSSP_secture=%{&open_HSSP_file_for_secture($file[$f])};
       @PDB_IDs=keys %HSSP_secture;
       for($i=0; $i< @PDB_IDs; $i++){
          $PDB_ID=$PDB_IDs[$i];
          $sequence=$HSSP_secture{$PDB_ID}{'SEQUENCE'};
          $secture =$HSSP_secture{$PDB_ID}{'SECTURE'};
          $leng_seq=length($sequence);
          $leng_seq_stragment_size= $leng_seq - $stragment_size;
          for($j=0; $j<= $leng_seq_stragment_size ; $j++){
             $seqlet  = substr($sequence, $j, $stragment_size);
             $stragment=substr($secture , $j, $stragment_size);
             $stragment_start=$j + 1;
             $stragment_end  =$j+$stragment_size;
             print "\n>$PDB_ID\_$stragment_start\-$stragment_end\_$seqlet:$stragment";
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # This can cause a lot of memory consumption, so becareful over whole HSSP dir
             #_________________________________________________________________________________
             push(@{$stragment{$PDB_ID}{$seqlet}}, $stragment);
          }
       }
    }
    return(\%stragment);
}




#_______________________________________________________________________________
# Title     : make_seq_segments_fasta_files_from_MSP_files
# Usage     : @processed_files=&make_seq_segments_fasta_files_from_MSP_f+(\$input_dir);
#                     $input_dir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
# Function  : open dir and process all files in the dir if you wish,
#             and then go in any other sub
#             if any file(dir) is linked, it skips that file.
# Example   : as in my 'indexing.pl' for perl file indexer.
# Warning   : Seems to work fine., !! Change the name of this sub to shorter one
#                                  !! for your own purpose.
# Keywords  : open_dir_and_go_in_and_do_something,
#             go in there do something, get into subdir and do something.
#             go_in_subdir_and_do_something, recursive execution
#
#             ** Check out some subs derived from this:
#              concatenate_seq_files_in_subdir_to_db
#              opendir_and_go_in_and_make_pdb_db
# Options   :
#    $input_dir= by d=
#    $over_write=o by o
# Returns   :
# Argument  : gets a ref. of a scaler (dir name) and returns nothing(void).
# Category  :
# Version   : 1.4
#---------------------------------------------------------------------------
sub make_seq_segments_fasta_files_from_MSP_files{
	  my ($each_file, $file, $new_fasta_name, %pdb_seq, @included_files,
			 $min_seq_leng_accepted, @read_files, $original_dir);

		if($_[0]=~/d=(\S+)/){ $original_dir=$1; }
		if($_[1]=~/o/){ $over_write='o'; }

		@read_files=@{&read_any_dir_simple(\$original_dir)};

		sub read_any_dir_simple { my($in_dir); $in_dir=${$_[0]} || $_[0]; opendir(DIR1,"$in_dir");
				my(@read_files) = readdir(DIR1);closedir(DIR1);	splice(@read_files, 0, 2);  return(\@read_files);
		}

		chomp($pwd=`pwd`);
		foreach $file(@read_files){
				$each_file="$original_dir/$file";

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# If the File read is DIR => Recurse
				#________________________________________________________
				if( -d "$each_file" ){  ## If it is a directory.
						$num=@included_files=(@included_files,
						    &make_seq_segments_fasta_files_from_MSP_files("d=$each_file", $over_write) );   # RECURSION occurrs here!!
				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# If the File read is FILE => PROCESS!
				#________________________________________________________
				}elsif (-f $each_file){     #<<------ This is where things match
						@dir=split(/\//, $each_file);
						$dir=$dir[($#dir-1)];  # $dir is the name of directory in where you are.
						###  put any program which does something here #################################

						if($each_file=~/\/d\S+\.mspa/){
						   #chdir($dir);
							 open(MSP, "$each_file") || die "\n Can not open $each_file";
							 while(<MSP>){
									if(/\d+\s+(d\S+)_(\d+)\-(\d+)\s+\d+\s+\d+\s+nr_(\S+)_(\d+)\-(\d+)_(\S+)_(\d+\.\d+\.\d+)_(\d+)\-(\d+)/){
										 $query=$1;
										 $query_start=$2;
										 $query_stop=$3;
										 $inter=$4;
										 $inter_start=$5;
										 $inter_stop=$6;
										 $match=$7;
										 $match_start=$9;
									   $match_stop=$10;
										 $query_with_range="$query\_$query_start\-$query_stop";
										 $inter_with_range="$inter\_$inter_start\-$inter_stop";
										 $match_with_range="$match\_$match_start\-$match_stop";

										 #print "\n Processing $query $inter $match with $each_file\n";

										 $query_spfa_file="$dir\/$query_with_range\.spfa";
										 $inter_spfa_file="$dir\/$inter_with_range\.spfa";
										 $match_spfa_file="$dir\/$match_with_range\.spfa";

										 open(QUERY_WITH_RANGE, ">$query_spfa_file") unless -s "$query_spfa_file";
										 open(INTER_WITH_RANGE, ">$inter_spfa_file") unless -s "$inter_spfa_file";
										 open(MATCH_WITH_RANGE, ">$match_spfa_file") unless -s "$match_spfa_file";

										 $query_seq_segment=substr($seq{$query}, $query_start-1, ($query_stop-$query_start+1));
										 $inter_seq_segment=substr($seq{$inter}, $inter_start-1, ($inter_stop-$inter_start+1));
										 $match_seq_segment=substr($seq{$match}, $match_start-1, ($match_stop-$match_start+1));
										 #print "\n\$match_seq_segment is  $match_seq_segment\n";
										 print QUERY_WITH_RANGE "\>$query_with_range\n$query_seq_segment\n" unless -s "$query_spfa_file";
										 print INTER_WITH_RANGE "\>$inter_with_range\n$inter_seq_segment\n" unless -s "$inter_spfa_file";
										 print MATCH_WITH_RANGE "\>$match_with_range\n$match_seq_segment\n" unless -s "$match_spfa_file";

										 close(QUERY_WITH_RANGE);
										 close(MATCH_WITH_RANGE);
										 close(INTER_WITH_RANGE);

										 $query_msso_file="$dir\/$query_with_range\_$match_with_range\.msso";
										 $inter_msso_file1="$dir\/$query_with_range\_$inter_with_range\.msso";
										 $inter_msso_file2="$dir\/$match_with_range\_$inter_with_range\.msso";

										 if( !(-s "$query_msso_file") or $over_write){
										    system("ssearch $query_spfa_file $match_spfa_file -m 10 > $query_msso_file");
										 }
										 if(!(-s "$inter_msso_file1") or $over_write){
										    system("ssearch $query_spfa_file $inter_spfa_file -m 10 > $inter_msso_file1");
										 }
										 if( !(-s "$inter_msso_file2") or $over_write){
										    system("ssearch $match_spfa_file $inter_spfa_file -m 10 > $inter_msso_file2");
										 }

									 }
							 }
							 close(MSP);
							 #chdir("..");
						}
				}elsif (-l $each_file){
						print "\n\n$each_file is linked, skipping. \n";
						next;
				}else{  print "\n# (i) ODD?? "; next;  }
		}
		return(@included_files);
}


#______________________________________________________________________________
# Title     : make_6_frame_dna_sequences
# Usage     : %six_dna_frame_seqs=%{&make_6_frame_dna_sequences(\%input_seq)};
# Function  : It makes 3 different types of frames for input seq
#               Then it reverses the input seq and makes another 3 frames!!
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_6_frame_dna_sequences{
		my(%six_frame_seqs, %original_input_seq, $orig_name,
			 $frame_fr1_name, $frame_fr2_name, $frame_fr3_name,
			 $frame_rf1_name, $frame_rf2_name, $frame_rf3_name,
			 $frame_fr1, $frame_fr2, $frame_fr3);
		%original_input_seq=%{$_[0]};
		($orig_name, $frame_fr1)=%original_input_seq;
		$frame_fr1_name="$orig_name\_fr1";
		$frame_fr2_name="$orig_name\_fr2";
		$frame_fr3_name="$orig_name\_fr3";
		$frame_rf1_name="$orig_name\_rf1";
		$frame_rf2_name="$orig_name\_rf2";
		$frame_rf3_name="$orig_name\_rf3";

		$frame_fr1     =$frame_fr1;
		$frame_fr2     =substr($frame_fr1, 1);
		$frame_fr3     =substr($frame_fr1, 2);

		$frame_rf1 =reverse($frame_fr1);
		$frame_rf2 =substr($frame_rf1, 1);
		$frame_rf3 =substr($frame_rf1, 2);

		%six_frame_seqs=($frame_fr1_name, $frame_fr1,
										 $frame_fr2_name, $frame_fr2,
										 $frame_fr3_name, $frame_fr3,
										 $frame_rf1_name, $frame_rf1,
										 $frame_rf2_name, $frame_rf2,
										 $frame_rf3_name, $frame_rf3 );

		return(\%six_frame_seqs);
}

#______________________________________________________________________________
# Title     : make_proportionally_random_sequence
# Usage     :
# Function  :
# Example   :
# Keywords  : make_random_protein_sequence
# Options   :
#   $return_FASTA_format_hash=f by f
#   $make_proportional_random_seq=p by p
#   $number_of_seq_to_be_generated= by n=
#   $length_of_target_rand_seq= by l=
#   $use_this_char_set= by c=
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_proportionally_random_sequence{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my($length_of_target_rand_seq, $residue, @out_seq_ref, $number_of_seq_to_be_generated,
			 $make_proportional_random_seq, $length_of_target_rand_seq,
			 %frequency_table, $use_this_char_set, %amino_acid_occurrance_frequency_table,
			 $number_of_element_chars, @element_chars, @frequency, $rand_proportional_seq,
			 $regular_frequency_calculated, $return_FASTA_format_hash, $wrap_seq_lines,
			 @out_seq_ref);
		$number_of_seq_to_be_generated=1; # default
		$length_of_target_rand_seq=100; # random defalut
		$use_this_char_set='ACDEFGHIKLMNPQRSTVWY';
		$make_proportional_random_seq='p';

		if($vars{'t'}=~/HASH/){  %frequency_table=%{ $vars{'t'} } }
		if($vars{'n'}=~/(\d+)/){ $number_of_seq_to_be_generated=$1 }
		if($vars{'l'}=~/(\d+)/){ $length_of_target_rand_seq=$1 }
		if($vars{'c'}=~/(\S+)/){   $use_this_char_set=$1 }; # default is aa seq residues.
		if($char_opt=~/p/i){ $make_proportional_random_seq='p'; } ####### PROPORTIONAL random #####
		if($char_opt=~/w/i){ $wrap_seq_lines='w' }

				 srand(((time/$$)^($>*time))/(time/(time^$$)));
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# If p opt is set but No table given? use AA table
		#______________________________________________________
		if($make_proportional_random_seq and !(%frequency_table) and !$use_this_char_set ){
				@frequency = (.08713, .03347, .04687, .04953, .03977,
								 .08861, .03362, .03689, .08048, .08536,
								 .01475, .04043, .05068, .03826, .04090,
								 .06958, .05854, .06472, .01049, .02992);

				@element_chars=("A",   "C",     "D",    "E",    "F",
								 "G",     "H",     "I",    "K",    "L",
								 "M",     "N",     "P",    "Q",    "R",
								 "S",     "T",     "V",    "W",    "Y");
				%frequency_table = %amino_acid_occurrance_frequency_table=
							 qw(A .08713 C .03347 D .04687 E .04953 F .03977
									G .08861 H .03362 I .03689 K .08048 L .08536
									M .01475 N .04043 P .05068 Q .03826 R .04090
									S .06958 T .05854 V .06472 W .01049 Y .02992);
		}elsif($make_proportional_random_seq and %frequency_table and !$use_this_char_set){
				@frequency=values(%frequency_table);
				@element_chars=keys %frequency_table;
		}elsif($make_proportional_random_seq and $use_this_char_set){
				@element_chars=split(//, $use_this_char_set);
				$regular_frequency_calculated=1/@element_chars;
				print "\n# (i) \$use_this_char_set is given, making regular freq. table of $regular_frequency_calculated\n";
				for($i=0; $i< @element_chars; $i++){
						$frequency_table{$element_chars[$i]}=$regular_frequency_calculated;
				}
				@frequency=values(%frequency_table);
		}else{
				print "\n# (E) I can not handle this combination of options.\n";
				print "\n# (E) I need \$make_proportional_random_seq, \$use_this_char_set, \%frequency_table\n";
		}
		$number_of_element_chars=@element_chars;

		if($debug==1){  &__YELLOW__, printf("> rand%d random, from $0, len %d\n", $i+1, $num_opt[0]); &__RESET__; }

		for($j=0; $j< $number_of_seq_to_be_generated; $j++){
				 my ($a_random_number, $sum, $rand_proportional_seq);
				 for($i=1; $i <= $length_of_target_rand_seq; $i++){
						 $a_random_number=rand;
						 print "\n# (i) \$a_random_number is $a_random_number" if $debug ==1;
						 $sum =0;
						 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						 # To get a proportionally chosen random number add $sum. If A occurs often, it will make $sum reach quick
						 #_______________________________________________________________________________
						 for($x=0; $x< $number_of_element_chars; $x++){
									$sum+=$frequency[$x];
									if( $sum >= $a_random_number){
											 #print "$element_chars[$x]";
											 if($return_FASTA_format_hash){
													 $fasta_format_hash{"rand${j}"}.=$element_chars[$x]; last;
											 }else{
													 $rand_proportional_seq .= $element_chars[$x];
													 last;
											 }
									}
						 }
						 if($a_random_number > $sum){ $rand_proportional_seq .= $element_chars[$#element_chars]; }
						 if( (!($i%60)) && ($char_opt=~/f/i) ){ $fasta_format_hash{"rand${j}"} .= "\n" }
						 elsif( !($i%60) and $wrap_seq_lines){ $rand_proportional_seq .= "\n" }
				 }
				 #print "\n";
				 print "\nProportionasl Rand SEQ\n : $rand_proportional_seq \n" if $debug ==1;
				 if($char_opt=~/f/i){ push(@fasta_out_ref, \%fasta_format_hash) }
				 else{ push(@out_seq_ref, \$rand_proportional_seq) }
		}
		if(@out_seq_ref==1){
				return($out_seq_ref[0]);
		}elsif(@out_seq_ref > 1){
				return(@out_seq_ref);
		}
}


#_____________________________________________________________________
# Title     : make_scrambled_seq_database
# Usage     : &make_reverse_seq_database(\@input_database_fasta_file);
# Function  :
# Example   :
# Warning   :
# Keywords  : scramble_seq_database, create_scrambled_seq_database
# Options   :
# Category  :
# Version   : 1.1
#-------------------------------------------------------------------
sub make_scrambled_seq_database{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 my (%seqs, %scrambled_seqs, $fasta_file_for_DB,$base,$ext,$out_file_name );

		 for($i=0; $i< @file; $i++){
					$fasta_file_for_DB =$file[$i];
					$base=${&get_base_names($fasta_file_for_DB)};
					#~~~~~~~~~~~~~~~~~~~ To prevent growing of _sc_sc_sc... ~~~~~~~~~~~~`
					if($base=~/^(\S+)_sc[\_sc]*$/){ $base=$1 }

					$ext =${&get_file_extensions($file[$i])};
					if($ext=~/\S/){
								 $out_file_name="$base\_sc\.$ext";
								 $out_bak_file ="$base\_sc_bak\.$ext";
					}else{
						 $out_file_name="$base\_sc\.fasta";
								 $out_bak_file ="$base\_sc_bak\.fasta";
								 print "\n# There was no file ext for $base, attaching \"fa\" as default\n";
					}
					if(-s $out_file_name){
								 print "\n# $out_file_name already exists, moving it to $out_bak_file\n";
					}
					%seqs=%{&open_fasta_files(\$fasta_file_for_DB)};
					%reversed_seqs=%{&scramble_sequences(\%seqs)};
					&write_fasta(\%reversed_seqs, $out_file_name );
					if(-s $out_file_name){
								 print "\n# make_scrambled_seq_database: Supposedly wrote new file: $out_file_name\n";
					}else{
								 print "\n# make_scrambled_seq_database: Error in writing: $out_file_name\n";
					}
		 }
		 print "\n# make_scrambled_seq_database sub finished \n";
}


#__________________________________________________________________________
# Title     : make_2D_identity_matrix_array
# Usage     : @matrix=@{&make_2D_identity_matrix(\@seq1, \@seq2)};
# Function  : @matrix is like  $matrix[1][2]=1;
#             This assigns number 1 to array element
#             If one array is given, it makes self to self matrix.
#             When 2 are given, make matrix for the 2
# Example   :
# Keywords  : make_matrix
# Options   :
#    $skip_gap_char = g  for skipping gap char (any special char)
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub make_2D_identity_matrix_array{
		 my (@matrix, $skip_gap_char, $k, $l, @seq_1, @seq_0);
		 for($i=0; $i< @_; $i++){
				 if($_[$i]=~/g/){
							$skip_gap_char='g';
							splice (@_, $i, 1);
							$i--;
				 }elsif(ref($_[$i]) eq 'ARRAY'){
							push(@seqs, $_[$i]);
				 }
		 }
		 @seq_0=@{$seqs[0]};
		 @seq_1=@{$seqs[1]};
		 unless(@seq_1){ @seq_1=@seq_0; };
		 for($k=0; $k< @seq_0; $k++){
				for($l=0; $l< @seq_1; $l++){
							 if($seq_1[$l] =~/\W/ and $skip_gap_char){ next };
							 if($seq_0[$k] eq $seq_1[$l]){
											$matrix[$k][$l]=1;
											print "# X\[$k\] Y\[$l\] = 1 \n";
							 }
				}
		 }
		 return(\@matrix);
}


#__________________________________________________________________________
# Title     : make_2D_aa_residue_matrix_array
# Usage     : @matrix=@{&make_2D_aa_residue_matrix_array(\@seq)};
# Function  : @matrix is like  $matrix[1][2]='A'; when aa residue is identical
#             This assigns identical residue to array element
#             If one array is given, it makes self to self matrix.
#             When 2 are given, make matrix for the 2
# Example   :
# Keywords  : make_matrix
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub make_2D_aa_residue_matrix_array{
    my @seq=@{$_[0]};
    my @seq2=@{$_[1]};
    if(@_ == 1){ @seq2=@seq };

    my (@residue_matrix, $k, $l);
    for($k=0; $k< @seq; $k++){
         for($l=0; $l< @seq2; $l++){
                 if($seq[$k] eq $seq2[$l]){
                         $residue_matrix[$k][$l]="$seq[$k]";
                         print "# $seq[$k] = $l \n";
                 }
         }
    }
    return(\@residue_matrix);
}


#__________________________________________________________________________
# Title     : make_2D_identity_matrix
# Usage     : @matrix=@{&make_2D_identity_matrix(\$seq, [\$seq2] )};
# Function  : @matrix is like  $matrix[1][2]=1;
#             This assigns number 1 to array element
# Example   :
# Keywords  : make_matrix, make_identity_matrix
# Options   :
#        s  for show axis
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#----------------------------------------------------------------------------
sub make_2D_identity_matrix{

		 #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		 my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		 my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		 my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		 my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		 my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		 if($debug==1){print "\n\t\@hash=\"@hash\"
		 \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		 \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 my (@matrix, @seq1, @seq2 );
		 if(@_ > 1){     @seq1=split(//, $string[0]);   @seq2=split(//, $string[1]);
		 }else{      @seq2=@seq1= split(//, $string[0]); }

		 if($char_opt=~/s/){
				@matrix = map {  $m = $_; "$m  ".join('', map {$_ eq $m or ' '} @seq1 )."\n";  } @seq2;
		 }else{
				@matrix = map {  $m = $_; join('', map {$_ eq $m or ' '} @seq1 )."\n";  } @seq2;
		 }
		 foreach (@matrix) {        print ;      }
		 return(\@matrix);
}

#________________________________________________________________________________
# Title     : amino_acid_homology_matrix
# Usage     : $yes_no=${&amino_acid_homology_matrix('E', 'D')};
# Function  :
# Example   :
# Keywords  : are_they_homologous, amino_acid_homology_table, compare_amino_acid_homology
#             single_residue_homology_matrix
# Options   :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub amino_acid_homology_matrix{
    my ($amino_acid1, $amino_acid2,  $hydrophobic_group, $neural_polar,
        $acidic_group, $basic_group, $proline, @groups, $i);
    $amino_acid1=${$_[0]} || $_[0];
    $amino_acid2=${$_[1]} || $_[1];
    if($amino_acid1=~/\W$/ or $amino_acid2=~/\W$/){ return(\0) }
    $hydrophobic_group='LIFV'; # A excluded by me
    $neural_polar ='STCNQ'; # M excluded by me
    $acidic_group='ED';
    $basic_group='KRH';
    $proline='P';
    @groups=($hydrophobic_group, $neural_polar, $acidic_group, $basic_group);
    for($i=0; $i< @groups; $i++){
         if($groups[$i] =~/$amino_acid1/ and $groups[$i] =~/$amino_acid2/){
             return(\1);
         }
    }
    return(\0);
}

#______________________________________________________________________________
# Title     : write_go_back_html_link
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub write_go_back_html_link{
		print "<hr><FONT COLOR=\"#DC143C\"><FONT SIZE=+2><< Go back to ";
		print "<A href=\"http://cyrah.med.harvard.edu/Biosub/bioperl_server.html\">BSS </A></font></font>";
		print "</html>\n";
}

#______________________________________________________________________________
# Title     : write_protein_structural_domain_interact_pair_file
# Usage     :
# Function  :
# Example   :
# Keywords  : write_PDIPF_file
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub write_protein_structural_domain_interact_pair_file{
    my(%contact_list, $out_file_name, @contact_list, @Interact_FLAG);
    %contact_list=%{$_[0]};
    $out_file_name=${$_[1]} || $_[1];
    unless($out_file_name){ $out_file_name='PROT_STR_DOM_interaction_pair_TEMP.psdip'; }
    $base=${&get_base_names($out_file_name)};
    $out_file_name_chain_chain  ="$base\_chain_chain\.psdip";
    $out_file_name_chain_domain ="$base\_chain_domain\.psdip";
    $out_file_name_domain_domain="$base\_domain_domain\.psdip";
    @Interact_FLAG=keys %contact_list; # these have PROTEIN_INTERACT and PROTEIN_NONINTERACT
    open(ALL_CONTACT_LIST, ">$out_file_name") || die;
    open(CHAIN_CHAIN_INTERACT, ">$out_file_name_chain_chain") || die;
    open(CHAIN_DOMAIN_INTERACT, ">$out_file_name_chain_domain") || die;
    open(DOMAIN_DOMAIN_INTERACT, ">$out_file_name_domain_domain") || die;
    for($j=0; $j< @Interact_FLAG; $j++){
       @contact_list=sort keys %{$contact_list{$Interact_FLAG[$j]}};
       for($i=0; $i < @contact_list; $i++){
          if($contact_list[$i]=~/(\S+_)\s+(\S+_)/){
             print CHAIN_CHAIN_INTERACT "$Interact_FLAG[$j]\t$contact_list{$Interact_FLAG[$j]}{$contact_list[$i]}\n";
          }elsif($contact_list[$i]=~/(\S+_)\s+(\S+)/ or $contact_list[$i]=~/(\S+)\s+(\S+_)/){
             print CHAIN_DOMAIN_INTERACT "$Interact_FLAG[$j]\t$contact_list{$Interact_FLAG[$j]}{$contact_list[$i]}\n";
          }elsif($contact_list[$i]=~/(\S+)\s+(\S+)/ or $contact_list[$i]=~/(\S+)\s+(\S+_)/){
             print DOMAIN_DOMAIN_INTERACT "$Interact_FLAG[$j]\t$contact_list{$Interact_FLAG[$j]}{$contact_list[$i]}\n";
          }
          print ALL_CONTACT_LIST "$Interact_FLAG[$j]\t$contact_list{$Interact_FLAG[$j]}{$contact_list[$i]}\n";
       }
    }
    close(ALL_CONTACT_LIST);
    close(CHAIN_CHAIN_INTERACT);
    close(CHAIN_DOMAIN_INTERACT);
    close(DOMAIN_DOMAIN_INTERACT);
    print "\n#(i) $out_file_name are made <-- write_protein_structural_domain_interact_pair_file\n";
    return(\$out_file_name);
}




#______________________________________________________________________________
# Title     : write_AT_GC_ratio_and_contents_ratio_files
# Usage     : $na_prv=&write_AT_GC_ratio_and_contents_ratio_files($base,
#                                             $na_prv, $sample_interval,
#                                                 $win_siz, $orig_seq);
# Function  : Calculates AT, GC ratios, AG, TC ratios and AT, GC content
#              ratios with scanning window over a long seq.
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.5
#------------------------------------------------------------------------------
sub write_AT_GC_ratio_and_contents_ratio_files{
    my($i, %tot_AT_ratio, %tot_GC_ratio, %tot_G_occu, %tot_C_occu,
       %tot_A_occu, %tot_T_occu, %AT_cont, %GC_cont,
       %tot_BASE_occu, $seq_count, %AT_cont_R, %GC_cont_R,
       %tot_AT_cont, %tot_GC_cont, $base, $na_prv, $smpl_intv,
       $win_siz, $orig_seq, $AT_R_atgcr_file, $GC_R_atgcr_F,
       $Ratio_deviation_F, $Final_R_F, $AT_cont_F, $AT_TC_R_dif_sum,
       $GC_cont_F, $seq_length, $seq_length_adjusted, $seq, $stra,
       %A_occu, %A_occu, %G_occu, %C_occu, %stra, @stra, $stra,
       $Accu_AT_GC_R_dif, $AC_R_atgcr_F, $GT_R_atgcr_F,
       %AC_cont, %GT_cont, $tot_AG_cont_R, $tot_TC_cont_R,
       %tot_AC_cont, %tot_GT_cont, %tot_AG_cont, %tot_TC_cont,
       %true_tot_AT_cont, %true_tot_GC_cont, %true_tot_GT_cont, %true_tot_AC_cont,
       %true_tot_AT_R, %true_tot_GC_R, %true_tot_GT_R, %true_tot_AC_R,
       %true_Tot_A_occu, %true_Tot_G_occu, %true_Tot_T_occu, %true_Tot_C_occu,
       $true_tot_AG_TC_R);
    $base        =$_[0];
    $na_prv      =$_[1];
    $smpl_intv   =$_[2];
    $win_siz     =$_[3];
    $orig_seq    =$_[4];
    %stra        =%{$_[5]};
    @stra        =keys %stra;

    for($s=0; $s< @stra; $s++){
       $stra=$stra[$s];
       $AT_R_atgcr_F ="$base\_$na_prv\_$stra\_AT_R_int_$smpl_intv\_$win_siz.atgcr";
       $GC_R_atgcr_F ="$base\_$na_prv\_$stra\_GC_R_int_$smpl_intv\_$win_siz.atgcr";
       $AC_R_atgcr_F ="$base\_$na_prv\_$stra\_AC_R_int_$smpl_intv\_$win_siz.atgcr";
       $GT_R_atgcr_F ="$base\_$na_prv\_$stra\_GT_R_int_$smpl_intv\_$win_siz.atgcr";
       $AG_R_atgcr_F ="$base\_$na_prv\_$stra\_AG_R_int_$smpl_intv\_$win_siz.atgcr";
       $TC_R_atgcr_F ="$base\_$na_prv\_$stra\_TC_R_int_$smpl_intv\_$win_siz.atgcr";
       $Final_R_F     ="$base\_$na_prv\_$stra\_SUMMARY_int_$smpl_intv\_$win_siz.atgcs";
       $AT_cont_F     ="$base\_$na_prv\_$stra\_AT_cont_int_$smpl_intv\_$win_siz.atcnt";
       $GC_cont_F     ="$base\_$na_prv\_$stra\_GC_cont_int_$smpl_intv\_$win_siz.gccnt";
       $AG_cont_F     ="$base\_$na_prv\_$stra\_AG_cont_int_$smpl_intv\_$win_siz.agcnt";
       $TC_cont_F     ="$base\_$na_prv\_$stra\_TC_cont_int_$smpl_intv\_$win_siz.tccnt";
       $AC_cont_F     ="$base\_$na_prv\_$stra\_AC_cont_int_$smpl_intv\_$win_siz.agcnt";
       $GT_cont_F     ="$base\_$na_prv\_$stra\_GT_cont_int_$smpl_intv\_$win_siz.tccnt";
       $Ratio_deviation_F="$base\_$na_prv\_$stra\_ATGC_R_devi_int_$smpl_intv\_$win_siz.ratdv";
       $AT_GC_R_dif_sum_F ="$base\_$na_prv\_$stra\_AT_GC_R_sum_int_$smpl_intv\_$win_siz.ratdv";
       open(AT_R,       ">$AT_R_atgcr_F") || die "\n can not open  $AT_R_atgcr_F\n\n";
       open(GC_R,       ">$GC_R_atgcr_F") || die "\n can not open  $GC_R_atgcr_F\n\n";
       open(AC_R,       ">$AC_R_atgcr_F") || die "\n can not open  $AC_R_atgcr_F\n\n";
       open(GT_R,       ">$GT_R_atgcr_F") || die "\n can not open  $GT_R_atgcr_F\n\n";
       open(AG_R,       ">$AG_R_atgcr_F") || die "\n can not open  $AC_R_atgcr_F\n\n";
       open(TC_R,       ">$TC_R_atgcr_F") || die "\n can not open  $GT_R_atgcr_F\n\n";
       open(AG_cont,    ">$AG_cont_F") || die "\n can not open  $AG_cont_F\n\n";
       open(TC_cont,    ">$TC_cont_F") || die "\n can not open  $TC_cont_F\n\n";
       open(AT_cont,    ">$AT_cont_F") || die "\n can not open  $AT_cont_F\n\n";
       open(GC_cont,    ">$GC_cont_F") || die "\n can not open  $GC_cont_F\n\n";
       open(AC_cont,    ">$AC_cont_F") || die "\n can not open  $AC_cont_F\n\n";
       open(GT_cont,    ">$GT_cont_F") || die "\n can not open  $GT_cont_F\n\n";
       open(RATIO_DEVI, ">$Ratio_deviation_F") || die "\n can not open $Ratio_deviation_F\n\n";
       open(AT_TC_R_dif_sum, ">$AT_GC_R_dif_sum_F") || die "\n can not open $AT_GC_R_dif_sum_F\n\n";
       open(FINAL_R,    ">$Final_R_F") || die "\n can not open $Final_R_F\n\n";

       $seq_length=length($orig_seq);
             if($seq_length < 20000000){
           $true_Tot_A_occu{$stra} =$orig_seq=~s/a/a/ig;
           $true_Tot_G_occu{$stra} =$orig_seq=~s/g/g/ig;
           $true_Tot_T_occu{$stra} =$orig_seq=~s/t/t/ig;
           $true_Tot_C_occu{$stra} =$orig_seq=~s/c/c/ig;
           $true_tot_AT_cont{$stra}  =$true_Tot_A_occu{$stra} + $true_Tot_T_occu{$stra};
           $true_tot_GC_cont{$stra}  =$true_Tot_G_occu{$stra} + $true_Tot_C_occu{$stra};
           $true_tot_AG_cont{$stra}  =$true_Tot_A_occu{$stra} + $true_Tot_G_occu{$stra};
           $true_tot_TC_cont{$stra}  =$true_Tot_T_occu{$stra} + $true_Tot_C_occu{$stra};
           $true_tot_AC_cont{$stra}  =$true_Tot_A_occu{$stra} + $true_Tot_C_occu{$stra};
           $true_tot_GT_cont{$stra}  =$true_Tot_G_occu{$stra} + $true_Tot_T_occu{$stra};
           $true_tot_AT_R{$stra}  =sprintf("%-.3f", $true_Tot_A_occu{$stra} / $true_Tot_T_occu{$stra});
           $true_tot_GC_R{$stra}  =sprintf("%-.3f", $true_Tot_G_occu{$stra} / $true_Tot_C_occu{$stra});
           $true_tot_AG_R{$stra}  =sprintf("%-.3f", $true_Tot_A_occu{$stra} / $true_Tot_G_occu{$stra});
           $true_tot_TC_R{$stra}  =sprintf("%-.3f", $true_Tot_T_occu{$stra} / $true_Tot_C_occu{$stra});
           $true_tot_AC_R{$stra}  =sprintf("%-.3f", $true_Tot_A_occu{$stra} / $true_Tot_C_occu{$stra});
           $true_tot_GT_R{$stra}  =sprintf("%-.3f", $true_Tot_G_occu{$stra} / $true_Tot_T_occu{$stra});
           $true_tot_AT_cont_R=sprintf("%-.3f", $true_tot_AT_cont{$stra}/$seq_length);
           $true_tot_GC_cont_R=sprintf("%-.3f", $true_tot_GC_cont{$stra}/$seq_length);
           $true_tot_AC_cont_R=sprintf("%-.3f", $true_tot_AC_cont{$stra}/$seq_length);
           $true_tot_GT_cont_R=sprintf("%-.3f", $true_tot_GT_cont{$stra}/$seq_length);
           $true_tot_AG_cont_R=sprintf("%-.3f", $true_tot_AG_cont{$stra}/$seq_length);
           $true_tot_TC_cont_R=sprintf("%-.3f", $true_tot_TC_cont{$stra}/$seq_length);
           $true_tot_AG_TC_R  =sprintf("%-.3f", $true_tot_AG_cont{$stra}/$true_tot_TC_cont{$stra});
           print "# True contnt: A: $true_Tot_A_occu{$stra} T: $true_Tot_T_occu{$stra} G: $true_Tot_G_occu{$stra} C: $true_Tot_C_occu{$stra}  Total: $seq_length AG/TC:$true_tot_AG_TC_R\n";
       }
       %true_Tot_A_occu=%true_Tot_G_occu=%true_Tot_T_occu=%true_Tot_C_occu=();

       $half_win =$win_siz/2;
       $seq_length_adjusted=$seq_length - $half_win;
       for($i=0; $i <= $seq_length_adjusted; $i+=$smpl_intv){ ## $i+=2 is a 2 base stepping to save time
           $seq=substr($orig_seq, $i, $win_siz);
           $seq_count=($i+$half_win);

           $A_occu{$stra} =$seq=~s/a/a/ig;
           $G_occu{$stra} =$seq=~s/g/g/ig;
           $T_occu{$stra} =$seq=~s/t/t/ig;
           $C_occu{$stra} =$seq=~s/c/c/ig;

           if($T_occu{$stra} and $C_occu{$stra}){
               $AT_R{$stra}=sprintf("%-.3f", $A_occu{$stra}/$T_occu{$stra});
               $GC_R{$stra}=sprintf("%-.3f", $G_occu{$stra}/$C_occu{$stra});
               $AC_R{$stra}=sprintf("%-.3f", $A_occu{$stra}/$C_occu{$stra});
               $GT_R{$stra}=sprintf("%-.3f", $G_occu{$stra}/$T_occu{$stra});
               $AG_R{$stra}=sprintf("%-.3f", $A_occu{$stra}/$G_occu{$stra});
               $TC_R{$stra}=sprintf("%-.3f", $T_occu{$stra}/$C_occu{$stra});
           }
           $AT_cont{$stra}  =$A_occu{$stra} + $T_occu{$stra};
           $GC_cont{$stra}  =$G_occu{$stra} + $C_occu{$stra};
           $AG_cont{$stra}  =$A_occu{$stra} + $G_occu{$stra};
           $TC_cont{$stra}  =$T_occu{$stra} + $C_occu{$stra};
           $AC_cont{$stra}  =$A_occu{$stra} + $C_occu{$stra};
           $GT_cont{$stra}  =$G_occu{$stra} + $T_occu{$stra};
           $sum_ATGC{$stra} =$AT_cont{$stra}+ $GC_cont{$stra};

           $AT_cont_R{$stra}=sprintf("%-.3f", $AT_cont{$stra} / $sum_ATGC{$stra});
           $GC_cont_R{$stra}=sprintf("%-.3f", $GC_cont{$stra} / $sum_ATGC{$stra});
           $AG_cont_R{$stra}=sprintf("%-.3f", $AG_cont{$stra} / $sum_ATGC{$stra});
           $TC_cont_R{$stra}=sprintf("%-.3f", $TC_cont{$stra} / $sum_ATGC{$stra});
           $AC_cont_R{$stra}=sprintf("%-.3f", $AC_cont{$stra} / $sum_ATGC{$stra});
           $GT_cont_R{$stra}=sprintf("%-.3f", $GT_cont{$stra} / $sum_ATGC{$stra});

           $tot_AT_cont{$stra}+=$AT_cont{$stra};
           $tot_GC_cont{$stra}+=$GC_cont{$stra};
           $tot_AG_cont{$stra}+=$AG_cont{$stra};
           $tot_TC_cont{$stra}+=$TC_cont{$stra};
           $tot_AC_cont{$stra}+=$AC_cont{$stra};
           $tot_GT_cont{$stra}+=$GT_cont{$stra};

           $tot_A_occu{$stra}    += $A_occu{$stra};
           $tot_G_occu{$stra}    += $G_occu{$stra};
           $tot_T_occu{$stra}    += $T_occu{$stra};
           $tot_C_occu{$stra}    += $C_occu{$stra};
           $tot_BASE_occu{$stra}  =$tot_A_occu{$stra}+$tot_G_occu{$stra}+
                                   $tot_T_occu{$stra}+$tot_C_occu{$stra};

           if($tot_T_occu{$stra}  and $tot_C_occu{$stra} ){
               $tot_AT_R{$stra} =sprintf("%-.3f",$tot_A_occu{$stra} /$tot_T_occu{$stra});
               $tot_GC_R{$stra} =sprintf("%-.3f",$tot_G_occu{$stra} /$tot_C_occu{$stra});
               $tot_AC_R{$stra} =sprintf("%-.3f",$tot_A_occu{$stra} /$tot_C_occu{$stra});
               $tot_GT_R{$stra} =sprintf("%-.3f",$tot_G_occu{$stra} /$tot_T_occu{$stra});
               $tot_AG_R{$stra} =sprintf("%-.3f",$tot_A_occu{$stra} /$tot_G_occu{$stra});
               $tot_TC_R{$stra} =sprintf("%-.3f",$tot_T_occu{$stra} /$tot_C_occu{$stra});
           }

           $R_deviation   =(abs(1-$AT_R{$stra}) + abs(1-$GC_R{$stra}))/2;
           $Accu_AT_GC_R_dif= $AT_R{$stra}-1 + $GC_R{$stra}-1;
           $AT_TC_R_dif_sum   =sprintf("%-.3f", $Accu_AT_GC_R_dif);
           $A_occu{$stra} =$G_occu{$stra} =$T_occu{$stra} =$C_occu{$stra} =0;

           print AT_R "\n$seq_count $AT_R{$stra}";
           print GC_R "\n$seq_count $GC_R{$stra}";
           print AC_R "\n$seq_count $AC_R{$stra}";
           print GT_R "\n$seq_count $GT_R{$stra}";
           print AG_R "\n$seq_count $AG_R{$stra}";
           print TC_R "\n$seq_count $TC_R{$stra}";
           print AT_cont "\n$seq_count $AT_cont_R{$stra}";
           print GC_cont "\n$seq_count $GC_cont_R{$stra}";
           print AG_cont "\n$seq_count $AG_cont_R{$stra}";
           print TC_cont "\n$seq_count $TC_cont_R{$stra}";
           print AC_cont "\n$seq_count $AC_cont_R{$stra}";
           print GT_cont "\n$seq_count $GT_cont_R{$stra}";
           print RATIO_DEVI "$seq_count $R_deviation\n";
           print AT_TC_R_dif_sum "$seq_count $AT_TC_R_dif_sum\n";
        }
        $orig_seq=''; ## <--------------- !!!
        print "\n>$na_prv ";
        $tot_AT_cont_R=sprintf("%-.3f", $tot_AT_cont{$stra}/$tot_BASE_occu{$stra});
        $tot_GC_cont_R=sprintf("%-.3f", $tot_GC_cont{$stra}/$tot_BASE_occu{$stra});
        $tot_AC_cont_R=sprintf("%-.3f", $tot_AC_cont{$stra}/$tot_BASE_occu{$stra});
        $tot_GT_cont_R=sprintf("%-.3f", $tot_GT_cont{$stra}/$tot_BASE_occu{$stra});
        $tot_AG_cont_R=sprintf("%-.3f", $tot_AG_cont{$stra}/$tot_BASE_occu{$stra});
        $tot_TC_cont_R=sprintf("%-.3f", $tot_TC_cont{$stra}/$tot_BASE_occu{$stra});
        print "\n  AT rate: $tot_AT_R{$stra} $tot_A_occu{$stra}/$tot_T_occu{$stra} tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_AT_R{$stra}";
        print "\n  GC rate: $tot_GC_R{$stra} $tot_G_occu{$stra}/$tot_C_occu{$stra} tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_GC_R{$stra}";
        print "\n  AC rate: $tot_AC_R{$stra} $tot_A_occu{$stra}/$tot_C_occu{$stra} tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_AC_R{$stra}";
        print "\n  GT rate: $tot_GT_R{$stra} $tot_G_occu{$stra}/$tot_T_occu{$stra} tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_GT_R{$stra}";
        print "\n  AG rate: $tot_AG_R{$stra} $tot_A_occu{$stra}/$tot_G_occu{$stra} tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_AG_R{$stra}";
        print "\n  TC rate: $tot_TC_R{$stra} $tot_T_occu{$stra}/$tot_C_occu{$stra} tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_TC_R{$stra}";
        print "\n  AT cont: $tot_AT_cont_R tot $seq_count $tot_BASE_occu{$stra}";
        print "\n  GC cont: $tot_GC_cont_R tot $seq_count $tot_BASE_occu{$stra}";
        print "\n  AC cont: $tot_AC_cont_R tot $seq_count $tot_BASE_occu{$stra}";
        print "\n  GT cont: $tot_GT_cont_R tot $seq_count $tot_BASE_occu{$stra}";
        print "\n  AG(purine) cont: $tot_AG_cont_R tot $tot_BASE_occu{$stra}, $true_tot_AT_cont{$stra}/$true_tot_GC_cont{$stra}";
        print "\n  TC(pyrim)  cont: $tot_TC_cont_R tot $seq_count $tot_BASE_occu{$stra}";
        print "\n  Accu. AT GC ratio diff sum: $Accu_AT_GC_R_dif , True: $true_tot_AT_cont{$stra}:$true_tot_GC_cont{$stra}";
        print FINAL_R "# True cont: A: $true_Tot_A_occu{$stra} T: $true_Tot_T_occu{$stra} G: $true_Tot_G_occu{$stra} C: $true_Tot_C_occu{$stra}  Total: $seq_length AG/TC:$true_tot_AG_TC_R\n";
        print FINAL_R "\n Tot AT R: $tot_AT_R{$stra} $tot_A_occu{$stra}/$tot_T_occu{$stra} tot $seq_count true: $true_tot_AT_R{$stra}";
        print FINAL_R "\n Tot GC R: $tot_GC_R{$stra} $tot_G_occu{$stra}/$tot_C_occu{$stra} tot $seq_count true: $true_tot_GC_R{$stra}";
        print FINAL_R "\n Tot AC R: $tot_AC_R{$stra} $tot_A_occu{$stra}/$tot_C_occu{$stra} tot $seq_count true: $true_tot_AC_R{$stra}";
        print FINAL_R "\n Tot GT R: $tot_GT_R{$stra} $tot_G_occu{$stra}/$tot_T_occu{$stra} tot $seq_count true: $true_tot_GT_R{$stra}";
        print FINAL_R "\n Tot AG R: $tot_AG_R{$stra} $tot_A_occu{$stra}/$tot_G_occu{$stra} tot $seq_count true: $true_tot_AG_R{$stra}";
        print FINAL_R "\n Tot TC R: $tot_TC_R{$stra} $tot_T_occu{$stra}/$tot_C_occu{$stra} tot $seq_count true: $true_tot_TC_R{$stra}";
        print FINAL_R "\n Tot AT cont: $tot_AT_cont_R tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_AT_cont_R{$stra}";
        print FINAL_R "\n Tot GC cont: $tot_GC_cont_R tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_GC_cont_R{$stra}";
        print FINAL_R "\n Tot AC cont: $tot_AC_cont_R tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_AC_cont_R{$stra}";
        print FINAL_R "\n Tot GT cont: $tot_GT_cont_R tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_GT_cont_R{$stra}";
        print FINAL_R "\n Tot AG cont: $tot_AG_cont_R tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_AG_cont_R{$stra}";
        print FINAL_R "\n Tot TC cont: $tot_TC_cont_R tot $seq_count $tot_BASE_occu{$stra} true: $true_tot_TC_cont_R{$stra}";
        print FINAL_R "\n  AG(purine) cont: $tot_AG_cont_R tot $tot_BASE_occu{$stra}, $true_tot_AT_cont{$stra}/$true_tot_GC_cont{$stra}";
        print FINAL_R "\n  TC(pyrim)  cont: $tot_TC_cont_R tot $seq_count $tot_BASE_occu{$stra}, AG/TC:$true_tot_AG_TC_R";
        print FINAL_R "\n  Accu. AT GC ratio diff sum: $Accu_AT_GC_R_dif , True: $true_tot_AT_cont{$stra}:$true_tot_GC_cont{$stra}";
        close(AT_R);
        close(GC_R);
        close(AC_R);
        close(GT_R);
        close(AG_R);
        close(TC_R);
        close(AG_cont);
        close(TC_cont);
        close(AT_cont);
        close(GC_cont);
        close(AC_cont);
        close(GT_cont);
        close(FINAL_R);
        close(AT_TC_R_dif_sum);
        close(RATIO_DEVI);
     }
     print "\n\n# Files produced: \n$AT_R_atgcr_F\n$GC_R_atgcr_F\n$Ratio_deviation_F\n$Final_R_F\n$AT_cont_F\n$GC_cont_F
           $AT_GC_R_dif_sum_F\n$AC_R_atgcr_F\n$GT_R_atgcr_F\n\n";
     $na_prv=$name;
     return($na_prv);
}




#______________________________________________________________________________
# Title     : write_SDDIF_file
# Usage     :
# Function  :
# Example   :
# Keywords  : write_sddif_file
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub write_SDDIF_file{
    my($i, %DIFF_classification_domains, $SDDIF_file, @diff_types,
       $pdbd1, $pdbd2, $t1, $t2, $t3, %pdbd1_fasta, %pdbd2_fasta, %pdbd1_fas_swapped,
       %pdbd2_fas_swapped, $changed_pdbd1_fasta, $changed_pdbd2_fasta,
       $missing_in_pdbd1_fasta, $missing_in_pdbd2_fasta,@pdbd1_unchanged_seqs,
       %missing_in_pdbd1_fas, %missing_in_pdbd2_fas, @pdbd1_fasta_seqs,
       @pdbd2_fasta_seqs, %pdbd1_unchanged, %pdbd2_unchanged, $unchaged_pdbd1_fasta);
    %DIFF_classification_domains=%{$_[0]};
    $pdbd1=${$_[1]} || $_[1];
    $pdbd2=${$_[2]} || $_[2];
    $file_base1=${&get_base_names($pdbd1)};
    $file_base2=${&get_base_names($pdbd2)};
    $SDDIF_file="$file_base1\_$file_base2\.sddif"; ## SDDIF : Structural Domain DIfference File (like SCOP domain compa.)
    $changed_pdbd1_fasta="$file_base1\_changed_to_$file_base2\.mpfa";
    $changed_pdbd2_fasta="$file_base2\_changed_to_$file_base1\.mpfa";
    $missing_in_pdbd1_fasta="$file_base1\_missing_in_$file_base2\.mpfa";
    $missing_in_pdbd2_fasta="$file_base2\_missing_in_$file_base1\.mpfa";
    $unchaged_pdbd1_fasta="$file_base1\_unchanged_with_$file_base2\.mpfa";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # %pdbd1_fasta has %pdbd1_fasta{$pdbdomain}{'SEQUENCE'}
    # %pdbd1_fasta has %pdbd1_fasta{$pdbdomain}{'DESCRIPTION'}
    #__________________________________________________________
    %pdbd1_fasta=%{&open_fasta_files($pdbd1, 'd')}; ## getting descriptive seq entry by 'd' option
    %pdbd2_fasta=%{&open_fasta_files($pdbd2, 'd')};
    @pdbd1_fasta_seqs=keys %pdbd1_fasta;
    @pdbd2_fasta_seqs=keys %pdbd2_fasta;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
    # %DIFF_classification_domains{'DIF_CLASSIFICATION'}{$pdbd1_domain}
    # $DIFF_classification_domains{'MISSING_IN_PDBD1'}{$pdbd2_domain}
    # $DIFF_classification_domains{'MISSING_IN_PDBD2'}{$pdbd1_domain}
    #______________________________________________________________________
    open(SDDIF_file, ">$SDDIF_file") || die "Cannt open $SDDIF_file\n\n";
    @diff_types=sort keys %DIFF_classification_domains;
    $t1=@domains_with_diff_classification=keys %{$DIFF_classification_domains{$diff_types[0]}};
    $t2=@domains_missing_in_PDBD1=keys %{$DIFF_classification_domains{$diff_types[1]}};
    $t3=@domains_missing_in_PDBD2=keys %{$DIFF_classification_domains{$diff_types[2]}};

    $num_of_pdbd1_unchanged=@pdbd1_unchanged_seqs=@{&subtract_array(\@pdbd1_fasta_seqs, \@domains_with_diff_classification)};
    $num_of_pdbd2_unchanged=@pdbd2_unchanged_seqs=@{&subtract_array(\@pdbd2_fasta_seqs, \@domains_with_diff_classification)};

    print SDDIF_file "# Diff classification domains: $t1\n";
    print SDDIF_file "# Missing domains in 1st file: $t2\n";
    print SDDIF_file "# Missing domains in 2st file: $t3\n";
    print SDDIF_file "# Unchanged in $pdbd1 : $num_of_pdbd1_unchanged\n";

    for($i=0; $i<@pdbd1_unchanged_seqs; $i++){
       $pdbd1_unchanged{$pdbd1_unchanged_seqs[$i]}{'DESCRIPTION'}=$pdbd1_fasta{$pdbd1_unchanged_seqs[$i]}{'DESCRIPTION'};
       $pdbd1_unchanged{$pdbd1_unchanged_seqs[$i]}{'SEQUENCE'}=$pdbd1_fasta{$pdbd1_unchanged_seqs[$i]}{'SEQUENCE'};
    }
    for($i=0; $i<@pdbd2_unchanged_seqs; $i++){
       $pdbd2_unchanged{$pdbd2_unchanged_seqs[$i]}{'DESCRIPTION'}=$pdbd2_fasta{$pdbd1_unchanged_seqs[$i]}{'DESCRIPTION'};
       $pdbd2_unchanged{$pdbd2_unchanged_seqs[$i]}{'SEQUENCE'}=$pdbd2_fasta{$pdbd1_unchanged_seqs[$i]}{'SEQUENCE'};
    }
    for($i=0;  $i<@domains_with_diff_classification; $i++){
       print SDDIF_file ">$domains_with_diff_classification[$i]: $DIFF_classification_domains{$diff_types[0]}{$domains_with_diff_classification[$i]}\n";
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Swapping description of the seq entries
       #__________________________________________
       $pdbd1_fas_swapped{$domains_with_diff_classification[$i]}{'DESCRIPTION'}=$pdbd2_fasta{$domains_with_diff_classification[$i]}{'DESCRIPTION'};
       $pdbd1_fas_swapped{$domains_with_diff_classification[$i]}{'SEQUENCE'}=$pdbd2_fasta{$domains_with_diff_classification[$i]}{'SEQUENCE'};
       $pdbd2_fas_swapped{$domains_with_diff_classification[$i]}{'DESCRIPTION'}=$pdbd1_fasta{$domains_with_diff_classification[$i]}{'DESCRIPTION'};
       $pdbd2_fas_swapped{$domains_with_diff_classification[$i]}{'SEQUENCE'}=$pdbd1_fasta{$domains_with_diff_classification[$i]}{'SEQUENCE'};
    }
    for($i=0;  $i<@domains_missing_in_PDBD1 ; $i++){
       print SDDIF_file ">$domains_missing_in_PDBD1[$i]: $DIFF_classification_domains{$diff_types[1]}{$domains_missing_in_PDBD1[$i]} Missing_in_1st_file\n";
       $missing_in_pdbd1_fas{$domains_with_diff_classification[$i]}{'SEQUENCE'}=$pdbd2_fasta{$domains_with_diff_classification[$i]}{'SEQUENCE'};
       $missing_in_pdbd1_fas{$domains_with_diff_classification[$i]}{'DESCRIPTION'}=$pdbd2_fasta{$domains_with_diff_classification[$i]}{'DESCRIPTION'};
    }
    for($i=0;  $i<@domains_missing_in_PDBD2 ; $i++){
       $missing_in_pdbd2_fas{$domains_with_diff_classification[$i]}{'SEQUENCE'}=$pdbd1_fasta{$domains_with_diff_classification[$i]}{'SEQUENCE'};
       $missing_in_pdbd2_fas{$domains_with_diff_classification[$i]}{'DESCRIPTION'}=$pdbd1_fasta{$domains_with_diff_classification[$i]}{'DESCRIPTION'};
       print SDDIF_file ">$domains_missing_in_PDBD2[$i]: $DIFF_classification_domains{$diff_types[2]}{$domains_missing_in_PDBD2[$i]} Missing_in_2nd_file\n";
    }
    unless(-f $SDDIF_file){ die "\n $SDDIF_file does not exist, write_SDDIF_file failed \n"; }
    print SDDIF_file "# Diff classification domains: $t1\n";
    print SDDIF_file "# Missing domains in 1st file: $t2\n";
    print SDDIF_file "# Missing domains in 2st file: $t3\n";
    print SDDIF_file "# Unchanged $pdbd1 : $num_of_pdbd1_unchanged\n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # NOTE!! I do not remove old entries in the first PDBD file
    #___________________________________________________________
    &write_fasta(\%pdbd1_fas_swapped, \$changed_pdbd1_fasta);
    &write_fasta(\%missing_in_pdbd1_fas, \$missing_in_pdbd1_fasta);
    &write_fasta(\%pdbd1_unchanged, \$unchaged_pdbd1_fasta);

    return(\$SDDIF_file);
}



#______________________________________________________________________________
# Title     : write_D3FA_file
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub write_D3FA_file{
    my(@regions, $DDID_file, $MPFA_file, %MPFA_hash, $base_DDID, $missing_PDB_id);
    $DDID_file=$_[0]; # DDD_ids.ddid
    $MPFA_file=$_[1]; # pdb100d_1_41.mpfa

    %MPFA_hash=%{&open_fasta_files($MPFA_file)};

    open(DDID_FILE, "$DDID_file") || die "\n Can not open DDID file $DDID_file\n";
    while(<DDID_FILE>){
       if(/^((\S\S\S\S\S)\d+)\s+\S+\s+\S+\s+\S+\s+\d+\s+([\d\- ]+)\s+D.C./){
          my($region);
          $DDD_id=$1;
          $PDB_id=$2;
          $DDD_region=$3;
          @regions=split(/\s+/, $DDD_region);
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # If regions has only one regin like 1-333, split would have failed
          #____________________________________________________________________
          unless(@regions){
              @regions=($DDD_region);
          }
          if($MPFA_hash{$PDB_id}){
              #print "\n Fetching regional sequence for $PDB_id";
              $region .=${&fetch_seqlet_from_sequence($MPFA_hash{$PDB_id}, \@regions)};
              if($region=~/\S/){
                 $D3FA_hash{$DDD_id}=$region;
              }else{
                 print "\n Region is empty, error? $region @regions $DDD_id \"$MPFA_hash{$PDB_id}\" $PDB_id\n";
                 die;
              }
          }else{
              $missing_PDB_id ++;
              print "\n <$missing_PDB_id> $PDB_id does not exist in $MPFA_file, skipping";
          }
       }
    }
    close(DDID_FILE);
    $base_DDID=${&get_base_names($DDID_file)};
    $out_D3FA_file="$base_DDID\_domain.d3fa";
    &write_fasta(\%D3FA_hash, \$out_D3FA_file);
    print "\n There were $missing_PDB_id \$missing_PDB_id in $MPFA_file\n";
    return(\%D3FA_hash);
}


#______________________________________________________________________________
# Title     : write_MRC_search_result_file
# Usage     :
# Function  :
# Example   :
# Keywords  : show_seqlet_alignment, write_mrc_search_file, write_MRC
# Options   :
#   $NO_screen_print=Q         by Q
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.3
#------------------------------------------------------------------------------
sub write_MRC_search_result_file{
    my($herta_input_file, $sequence_1_file, $sequence_2_file,
       @seq1_aligned, @seq2_aligned, @scores, %align_position_seq1_seq2_rv,
       %align_position_seq1_seq2, $herta_input_file_rv, %sequence_1, %sequence_2,
       %sequence_rv, %sequence, @positions1_rv, @positions2_rv,
       @positions1, @positions2, @seq1_start, @seq2_start, @residues1, @residues2,
       @seq1_start_final, @seq2_start_final, @scores_gap, @seq2_start_final_rv,
       @seq1_start_final_rv, $query_seq, $DB_seq, $seq1_len, $seq2_len,
       %MRC_search_alignment, %MRC_search_alignment_rv, $each_MRCS_file,
       $HERTA_open_gap_penalty, $HERTA_extend_gap_penalty, $HERTA_algorithm_type,
       $NO_screen_print, $MRCS_FILE_HANDLE, $write_EACH_MRCS_pair_file,
       $Mutual_Seq_Id_aligned, $ALIGNMENT_LENG);

    $MRCS_FILE_HANDLE="MRCS_FILE_HANDLE";
    $herta_input_file=${$_[0]} || $_[0];
    $herta_input_file_rv=${&attach_suffix_in_file_name($herta_input_file, 'rv', 'e')};

    $sequence_1_file          =${$_[1]} || $_[1];
    $sequence_2_file          =${$_[2]} || $_[2];
    $query_seq                =${$_[3]} || $_[3];
    $DB_seq                   =${$_[4]} || $_[4];
    $HERTA_open_gap_penalty   =${$_[5]} || $_[5];
    $HERTA_extend_gap_penalty =${$_[6]} || $_[6];
    $HERTA_algorithm_type     =${$_[7]} || $_[7];
    $NO_screen_print          =${$_[8]} || $_[8];
    $MRCS_FILE_HANDLE         =${$_[9]} || $_[9];
    $write_EACH_MRCS_pair_file=${$_[10]} || $_[10];

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # This will not be used normally
    #___________________________________________________
    $each_MRCS_file ="$query_seq\_$DB_seq\_OG$HERTA_open_gap_penalty\_EG$HERTA_extend_gap_penalty\_$HERTA_algorithm_type\.mrcs";

    %sequence_1=%{&open_fasta_files(\$sequence_1_file)};
    %sequence_2=%{&open_fasta_files(\$sequence_2_file)};
    @keys1=keys %sequence_1;
    @keys2=keys %sequence_2;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # In herta, there is no seq name, so I note them as 1 and 2
    #______________________________________________________________
    $sequence{1}=$sequence_1{$keys1[0]}; # the first key is for seq (only 1 key in fact)
    $sequence{2}=$sequence_2{$keys2[0]};
    $sequence_rv{1}=$sequence_1{$keys1[0]}; #!!!!!!!!!!!!!!!! <<<<<<<<<<<<
    $sequence_rv{2}=reverse $sequence_2{$keys2[0]};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #  $herta_align_hash{$seq_name}{$seq_pos}=$align_score;
    #_______________________________________________________________
    %align_position_seq1_seq2   =%{&open_herta_files(\$herta_input_file)};
    %align_position_seq1_seq2_rv=%{&open_herta_files(\$herta_input_file_rv)};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # temporarily assign 1, 2 as herta can not distinguish them
    #______________________________________________________________
    ($seq_name1, $seq_name2)=(1,2);

    @residues1   =split(//, $sequence{$seq_name1});
    @residues2   =split(//, $sequence{$seq_name2});
    @residues1_rv=split(//, $sequence_rv{$seq_name1});
    @residues2_rv=split(//, $sequence_rv{$seq_name2});
    @positions1   =sort {$a <=> $b} keys %{$align_position_seq1_seq2{$seq_name1}};
    @positions2   =sort {$a <=> $b} keys %{$align_position_seq1_seq2{$seq_name2}};
    @positions1_rv=sort {$a <=> $b} keys %{$align_position_seq1_seq2_rv{$seq_name1}};
    @positions2_rv=sort {$a <=> $b} keys %{$align_position_seq1_seq2_rv{$seq_name2}};

    $seq1_len=@residues1;
    $seq2_len=@residues2;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # MRC hash has the following structure
    # %MRC_search_alignment=("$query_seq", \@seq1_aligned,
    #                           "$DB_seq", \@seq2_aligned,
    #                   'RAW_ALIGN_SCORE', \@scores,
    #                   'SUM_HERTA_SCORE', \@herta_align_score);
    #____________________________________________________________________

    %MRC_search_alignment=%{&align_herta_inputs_with_adjustments(\@residues1, \@residues2,
                                                              \@positions1, \@positions2,
                                                              \%align_position_seq1_seq2,
                                                              $query_seq,
                                                              $DB_seq,
                                                              $NO_screen_print)};

    %MRC_search_alignment_rv=%{&align_herta_inputs_with_adjustments(\@residues1_rv, \@residues2_rv,
                                                              \@positions1_rv, \@positions2_rv,
                                                              \%align_position_seq1_seq2_rv,
                                                              $query_seq,
                                                              "$DB_seq\_rv",
                                                              $NO_screen_print)};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # openning and making FILE_HANDLE to pass to print_seq_in_block sub
    #____________________________________________________________________
    if($write_EACH_MRCS_pair_file eq 'E'){
        open($MRCS_FILE_HANDLE, ">$each_MRCS_file") || die "\n Cannt open $each_MRCS_file in write_MRC_search_result_file\n\n";
    }else{
        unless($MRCS_FILE_HANDLE){ die "\n $MRCS_FILE_HANDLE is not passed from do_MRC_search \n"; }
    }

    print $MRCS_FILE_HANDLE "\n#<<START>>$query_seq $DB_seq OpenGap:$HERTA_open_gap_penalty, ExtGap:$HERTA_extend_gap_penalty, Algo:$HERTA_algorithm_type\n";
    &print_seq_in_block(\%MRC_search_alignment,     100, "F=$MRCS_FILE_HANDLE",
                        "E=SUM_HERTA_SCORE SEQ_IDENTITY ALIGNMENT_LENG",
                        $NO_screen_print);
    &print_seq_in_block(\%MRC_search_alignment_rv,  100, "F=$MRCS_FILE_HANDLE",
                        "E=SUM_HERTA_SCORE SEQ_IDENTITY ALIGNMENT_LENG",
                        $NO_screen_print);

    $herta_align_score    =   ${$MRC_search_alignment{'SUM_HERTA_SCORE'}}[0];
    $herta_align_score_rv =${$MRC_search_alignment_rv{'SUM_HERTA_SCORE'}}[0];
    $Mutual_Seq_Id_aligned=${$MRC_search_alignment_rv{'SEQ_IDENTITY'}};
    $ALIGNMENT_LENG       =${$MRC_search_alignment_rv{'ALIGNMENT_LENG'}};

    $forward_minus_backward_score = $herta_align_score - $herta_align_score_rv;

    print $MRCS_FILE_HANDLE "#<<STOP>> $query_seq($seq1_len)<->$DB_seq($seq2_len), ForBackScore: $forward_minus_backward_score, ForScore: $herta_align_score, BackScore: $herta_align_score_rv ID: $Mutual_Seq_Id_aligned L:$ALIGNMENT_LENG\n\n";

    close($MRCS_FILE_HANDLE) if $write_EACH_MRCS_pair_file;
    return(\%MRC_search_alignment, %MRC_search_alignment_rv);
}





#______________________________________________________________________________
# Title     : write_amino_acid_propensity_matrix_files
# Usage     :
# Function  : Calculates the propensity with all permutations of sequences aligned
#
# Example   :
# Keywords  : write_AAPM_files, write_aapm_files, write_dipeptide_AA_propensity_matrix_files
# Options   :
#   $overwrite_opt=o by o
#   $NO_write_each_AAPM_file=n by n
#   $NO_sub_dir_creation=d by d   # this writes the AAPM files in the PWD (messy!)
#
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub write_amino_acid_propensity_matrix_files{
    my(@alignment_files, $i, $j, $h, $upper_concentration_threshold, $k,
       %total_seq_used_hash, $lower_concentration_threshold, %total_PROPENSITY_MATRIX,
       $overwrite_opt, $NO_write_each_AAPM_file, $AAPM_result_subdir);
    $upper_concentration_threshold=70;
    $lower_concentration_threshold=20;

    @alignment_files=@{$_[0]};

    if(${$_[1]}){ $upper_concentration_threshold= ${$_[1]} || $_[1]; }
    if(${$_[2]}){ $lower_concentration_threshold= ${$_[2]} || $_[2]; }
    if($_[3]=~/o=o/i){ $overwrite_opt='o'; }
    if($_[4]=~/n/i){ $NO_write_each_AAPM_file='n'; }
    if($_[5]=~/d/i){ $NO_sub_dir_creation='d'; }

    unless($NO_sub_dir_creation){
       $AAPM_result_subdir="..\/AAPM_$lower_concentration_threshold\-$upper_concentration_threshold\/";
       unless(-d "$AAPM_result_subdir"){  mkdir ("$AAPM_result_subdir", 0777); }
    }else{
       $AAPM_result_subdir='';
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Parsing Each Alignment file (like HSSP file)
    #_____________________________________________________________________________________
    for($h=0; $h<@alignment_files; $h++){
        my(%seq_used_hash, @seqs, $k, $i, $j, %ALIGNMENT, $ali_file_type,
           $first_seq, $second_seq, $file_base_name, @residues_1, @residues_2,
           $dipeptide_1, $dipeptide_2, %PROPENSITY_MATRIX, $aligned_dipeptide_count,
           %dipeptide_occurrance, %aa_composition);

        $file_base_name=${&get_base_names($alignment_files[$h])};

        $AAPM_filename_for_each_alignment_file="$AAPM_result_subdir$file_base_name\.aapm";
        $AAPM_filename_for_each_alignment_file_gz="$AAPM_result_subdir$file_base_name\.aapm\.gz";
        if(!$overwrite_opt and
            (-s $AAPM_filename_for_each_alignment_file > 100000
            or -s $AAPM_filename_for_each_alignment_file_gz > 100000) ){
            print "\n $AAPM_filename_for_each_alignment_file exists \n";
            next
        }

        print "\n opening $alignment_files[$h]";
        $ali_file_type=${&get_extension_names(\$alignment_files[$h])};

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # check what alignement file the input is and parse
        #____________________________________________________
        if($ali_file_type=~/hssp/i){
            %ALIGNMENT=%{&open_HSSP_file($alignment_files[$h])};
        }

        unless(%ALIGNMENT){ warn "\nwrite_amino_acid_propensity_matrix_files: \%ALIGNMENT is empty\n\n"; next; }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Concentrated means non-redundant representative
        # This is to make 'RSDB matrix'
        #______________________________________________________________
        %concentrated_seqs=%{&concentrate_seq_alignments(\%ALIGNMENT,
                                                         \$upper_concentration_threshold,
                                                         \$lower_concentration_threshold)};

        $num=keys %concentrated_seqs;
        print "\n \$num is $num\n";

        if($num < 2){
           print EACH_AAPM_FILE "\n# $AAPM_filename_for_each_alignment_file had $num alignment, which is too small\n";
           next
        }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # %concentrated_seqs is the meaning alignment data to process
        #________________________________________________________________
        @seqs=keys %concentrated_seqs;

        for($i=0; $i<@seqs; $i++){
           $seq_used_hash{$seqs[$i]}++;
           $total_seq_used_hash{$seqs[$i]}++;

           for($j=$i; $j<@seqs; $j++){

               if($i==$j){ next }

               $seq_used_hash{$seqs[$j]}++;
               $total_seq_used_hash{$seqs[$i]}++;

               $first_seq =$concentrated_seqs{$seqs[$i]};
               $second_seq=$concentrated_seqs{$seqs[$j]};

               @residues_1=split(//, $first_seq);
               @residues_2=split(//, $second_seq);

               for($k=0; $k< @residues_1; $k++){
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # Ignores any gap regions or insertions (lowercase in HSSP)
                  #___________________________________________________________________
                  if($residues_1[$k  ] =~/[ACDEFGHIKLMNPQRSTVWY]/ and
                     $residues_1[$k+1] =~/[ACDEFGHIKLMNPQRSTVWY]/ and
                     $residues_2[$k  ] =~/[ACDEFGHIKLMNPQRSTVWY]/ and
                     $residues_2[$k+1] =~/[ACDEFGHIKLMNPQRSTVWY]/ ){

                     $dipeptide_1=join('', $residues_1[$k], $residues_1[$k+1]);
                     $dipeptide_2=join('', $residues_2[$k], $residues_2[$k+1]);

                     #@sorted_dipeptides=sort ($dipeptide_1, $dipeptide_2);

                     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                     # Acutual counting of the occurances. Note that I add 2 instead of 1 in each pair of dipeptide
                     #     | cc | cd      <-- In the alignment of  .CCE.
                     # ---------------                             .CCD.
                     #  cc |  2 |  0                  (2/4)
                     # ---------------   =    -log -------------  =  -log(4)  <--- The final propensity in the matrix
                     #  ce |  0 |  2               (1/4) x (2/4)
                     #_________________________________________________________________________________________________
                     $PROPENSITY_MATRIX{$dipeptide_1}{$dipeptide_2} += 2;
                     $PROPENSITY_MATRIX{$dipeptide_2}{$dipeptide_1} += 2;

                     $aligned_dipeptide_count   +=2;
                     $dipeptide_occurrance{$dipeptide_1}++;
                     $dipeptide_occurrance{$dipeptide_2}++;
                     $aa_composition{$residues_1[$k]}++;
                     $aa_composition{$residues_1[$k+1]}++;
                     $aa_composition{$residues_2[$k]}++;
                     $aa_composition{$residues_2[$k+1]}++;

                     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                     # This skips any sequence which has appeard once in order to make RSDB matrix extremely non-redundant (for Global matrix only)
                     #____________________________________________________________________________________________________________
                     if($total_seq_used_hash{$seqs[$i]} == 1 or $total_seq_used_hash{$seqs[$j]} == 1 ){
                          $total_PROPENSITY_MATRIX{$dipeptide_1}{$dipeptide_2} +=2;
                          $total_PROPENSITY_MATRIX{$dipeptide_2}{$dipeptide_1} +=2;
                          $TOTAL_aligned_dipeptide_count +=2;
                          $TOTAL_dipeptide_occurrance{$dipeptide_1}++;
                          $TOTAL_dipeptide_occurrance{$dipeptide_2}++;
                          $TOTAL_aa_composition{$residues_1[$k]}++;
                          $TOTAL_aa_composition{$residues_1[$k+1]}++;
                          $TOTAL_aa_composition{$residues_2[$k]}++;
                          $TOTAL_aa_composition{$residues_2[$k+1]}++;
                     }
                  }
               }

           }

        }
        unless($NO_write_each_AAPM_file){
            &write_aapm_file(\$AAPM_filename_for_each_alignment_file,
                             \%PROPENSITY_MATRIX, \%aa_composition, \%dipeptide_occurrance, \%seq_used_hash,
                             \$aligned_dipeptide_count, \$overwrite_opt);
        }

    }## FOR loop for each alignment file

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Global AAPM file
    #___________________________________________________________________________________________
    $AAPM_filename_for_all_alignment_file    ="$AAPM_result_subdir\AA_propten_matrix\_$lower_concentration_threshold\_$upper_concentration_threshold\.aapm";
    $AAPM_filename_for_all_alignment_file_gz = "$AAPM_filename_for_all_alignment_file\.gz";
    if(!$overwrite_opt and
       (-s $AAPM_filename_for_all_alignment_file > 100000
        or -s $AAPM_filename_for_all_alignment_file_gz > 100000) ){
        print "\n $AAPM_filename_for_each_alignment_file exists \n";
        return;
    }
    &write_aapm_file(\$AAPM_filename_for_all_alignment_file,
                     \%total_PROPENSITY_MATRIX, \%TOTAL_aa_composition, \%TOTAL_dipeptide_occurrance, \%total_seq_used_hash,
                     \$TOTAL_aligned_dipeptide_count, \$overwrite_opt);

}





#______________________________________________________________________________
# Title     : write_aapm_file
# Usage     :
# Function  :
# Example   :
# Keywords  : write_each_aapm_file
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub write_aapm_file{
    my($AAPM_filename_for_each_alignment_file, %PROPENSITY_MATRIX, %aa_composition,
       %dipeptide_occurrance, @dipeptides1, @AA, @total_dipep, @all_dipep,
       $all_peplet_count, $total_AA_occur, $num, $aligned_dipeptide_count,
       %seq_used_hash, %expected_propen_for_a_dipep, $Prob_DIPEP_pair,
       $log_propen_for_each_dipep, $the_lowest_propen_observed, $overwrite_opt,
       $AAPM_filename_for_each_alignment_file_gz, @SEQ_names);
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # The default for 0 occurrance of pairs of AA to AA
    #__________________________________________________
    $the_lowest_propen_observed = -5.5; ## this is from a empirical data. the lowest I saw was -4.46 (1 occurance)

    $AAPM_filename_for_each_alignment_file=${$_[0]} || $_[0];
    $AAPM_filename_for_each_alignment_file_gz="$AAPM_filename_for_each_alignment_file\.gz";

    %PROPENSITY_MATRIX=%{$_[1]};
    %aa_composition   =%{$_[2]};
    %dipeptide_occurrance=%{$_[3]};
    %seq_used_hash    =%{$_[4]};
    $aligned_dipeptide_count=${$_[5]} || $_[5];
    $overwrite_opt =${$_[6]} || $_[6];

    if(!$overwrite_opt
       and (-s $AAPM_filename_for_each_alignment_file > 100000
            or -s $AAPM_filename_for_each_alignment_file_gz > 100000) ){
        print "\n $AAPM_filename_for_each_alignment_file exists\n";
        return
    }

    open(AA_PROPENSITY_FILE, ">$AAPM_filename_for_each_alignment_file") || die "\n Can not open $AAPM_filename_for_each_alignment_file\n";

    $num= @dipeptides1=sort keys %PROPENSITY_MATRIX;
    print AA_PROPENSITY_FILE "# Generated by write_aapm_file in $0\n# There were $num dipeptide kinds.\n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Writing used sequence names
    #______________________________________________________________________
    @SEQ_names=sort keys %seq_used_hash;
    for($i=0; $i< @SEQ_names; $i++){
        print AA_PROPENSITY_FILE "$SEQ_names[$i] $seq_used_hash{$SEQ_names[$i]}\n";
    }

    @AA=sort keys  %aa_composition;

    for($i=0; $i<@AA; $i++){
       print AA_PROPENSITY_FILE "# Residue $AA[$i] $aa_composition{$AA[$i]}\n";
       $total_AA_occur += $aa_composition{$AA[$i]};
    }
    print AA_PROPENSITY_FILE "# Total Single residue : $total_AA_occur\n";

    @total_dipep=sort keys %dipeptide_occurrance;
    for($i=0; $i< @total_dipep; $i++){
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
        # Peplit : small Peptide fragment
        #_____________________________________________
        print AA_PROPENSITY_FILE  "# Peplet: $total_dipep[$i] : $dipeptide_occurrance{$total_dipep[$i]}\n";
        $expected_propen_for_a_dipep{$total_dipep[$i]}=$dipeptide_occurrance{$total_dipep[$i]}/$aligned_dipeptide_count;
        $all_peplet_count += $dipeptide_occurrance{$total_dipep[$i]};
    }

    print AA_PROPENSITY_FILE "\n# The count of all the AA peplet: $all_peplet_count , $aligned_dipeptide_count\n\n";

    @all_dipep=sort keys %PROPENSITY_MATRIX;

    for($i=0; $i< @all_dipep; $i++){
        for($j=0; $j < @all_dipep; $j++){
           if($PROPENSITY_MATRIX{$all_dipep[$i]}{$all_dipep[$j]}){
              $Prob_DIPEP_pair=($PROPENSITY_MATRIX{$all_dipep[$i]}{$all_dipep[$j]}/$aligned_dipeptide_count);
              $Exp_1st_DIPEP  =$expected_propen_for_a_dipep{$all_dipep[$i]};
              $Exp_2nd_DIPEP  =$expected_propen_for_a_dipep{$all_dipep[$j]};
              $log_propen_for_each_dipep = log($Prob_DIPEP_pair/($Exp_1st_DIPEP*$Exp_2nd_DIPEP));
              if($log_propen_for_each_dipep < $the_lowest_propen_observed){
                  $the_lowest_propen_observed=$log_propen_for_each_dipep;
              }
              printf AA_PROPENSITY_FILE "$all_dipep[$i] $all_dipep[$j] $PROPENSITY_MATRIX{$all_dipep[$i]}{$all_dipep[$j]}\t$log_propen_for_each_dipep\n";

           }else{
              print AA_PROPENSITY_FILE "$all_dipep[$i] $all_dipep[$j] 0\t$the_lowest_propen_observed\n";
           }
        }
    }
    close(AA_PROPENSITY_FILE);
    if(-s $AAPM_filename_for_each_alignment_file){
       print "\n  Wrote $0:write_aapm_file ->$AAPM_filename_for_each_alignment_file\n";
       system("gzip $AAPM_filename_for_each_alignment_file");
       print "\n  $0:write_aapm_file GZIPPED $AAPM_filename_for_each_alignment_file\n";
    }
    return(\$AAPM_filename_for_each_alignment_file);
}






#______________________________________________________________________________
# Title     : write_ercf_files
# Usage     : $0 standard_ranked_expression_file.srex expression_ABX_file.abx
# Function  :
# Example   :
# Keywords  :
# Options   :
#   $sort_numerically_RV=r by r  # for sorting the standard Ranked Expression file(SREX)
# Author    : jong@biosophy.org,
# Category  : ercf : Expression Rank Comparison File
#              RXU : Relative eXpression Unit
# Version   : 1.0
#------------------------------------------------------------------------------
sub write_ercf_files{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my($sort_numerically_RV, $SREX, $ABX, $i, $ORF_name, $RXU, $Ranking,
		   %gene_exp_level, @SREX_keys);
		$SREX=$file[0];
		$ABX =$file[1];
		$base=${&get_base_names(\$ABX)};
		open(SREX, "<$SREX");
		open(ABX,  "<$ABX");
		while(<SREX>){
			 if(/(\S+)\s+(\S+)/){
					$ORF_name=$1;
					$RXU =$2;
					$Ranking++;
			    $SREX{$ORF_name}=$Ranking;
			 }
		}
		close(SREX);
		while(<ABX>){
			 if(/^(\S+)\s+(\S+)\s+\S+/){
					$ORF_name=$1;
					$RXA=$2;  ## Relative eXpression Abundance Unit

					if($ORF_name=~/^(\S+)\/_ex\d/){
							$ORF_name="\U$1";
					}elsif($ORF_name=~/^(\S+)\/\w+_ex\d/){
							$ORF_name="\U$1";
					}elsif($ORF_name=~/^(\S+)\/\w+/){
							$ORF_name="\U$1";
					}elsif($ORF_name=~/^(\S+)\//){
							$ORF_name="\U$1";
					}
					$gene_exp_level{$ORF_name}=$RXA;
				}
		}
		close(ABX);

		open(ERCF, ">$base\.ercf");
		open(ERCF_1COL, ">$base\.1col");

		@SREX_keys=@{&sort_hash_by_value(\%SREX, $sort_numerically_RV)}; ##
		@ABX_keys=keys %gene_exp_level;
		for($i=0; $i< @SREX_keys; $i++){
				if($gene_exp_level{$SREX_keys[$i]} > 0){
						printf ERCF ("%-14s %-10s\n", $SREX_keys[$i], $gene_exp_level{$SREX_keys[$i]});
						printf ERCF_1COL ("%-10s\n", $gene_exp_level{$SREX_keys[$i]});
				}else{
						printf ERCF ("%-14s %-10s\n", $SREX_keys[$i], 0);
						printf ERCF_1COL ("%-10s\n", 0);
				}
		}
		close(ERCF);
		close(ERCF_1COL);
}

#______________________________________________________________________________
# Title     : write_to_selected_www_socket_filehandle
# Usage     :
# Function  : The socket should have been selected like: select(SOCKET_FOR_POST);
# Example   : &write_to_selected_www_socket_filehandle($relativeURL, $length, $content);
# Keywords  : write_to_socket
# Options   :
# Author    : jong@biosophy.org,
# Version   : 1.0
#------------------------------------------------------------------------------
sub write_to_selected_www_socket_filehandle{
    my($URL, $length, $content)=@_; ## URL looks like:  /cgi-bin/BLAST/nph-newblast
    print "POST $URL HTTP\/1.0";
    print "\nContent-Type:application\/x-www-form-urlencoded";
    print "\nContent-Length:$length";
    print "\n\n$content\n"; ## <<---- The '\n' should be exactly TWO !!!, Ridiculous isn't it?
}

#______________________________________________________________________________
# Title     : write_to_www_socket_filehandle
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub write_to_www_socket_filehandle{
    my($SOCKET_FOR_POST, $relativeURL, $length, $content, $old_file_handle);
    ($SOCKET_FOR_POST, $relativeURL, $length, $content)=@_;
    ## set socket to be command buffered by $|=1
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # To save the old default Filehandle, I do the following line
    #_____________________________________________________________
    $old_file_handle=select STDERR; $|=1; ## <--- This is to save the old (current) filehandle
    select($SOCKET_FOR_POST); $|=1;
    print $SOCKET_FOR_POST "POST $relativeURL HTTP\/1.0"; ## URL looks like:  /cgi-bin/BLAST/nph-newblast
    print $SOCKET_FOR_POST "\nContent-Type:application\/x-www-form-urlencoded";
    print $SOCKET_FOR_POST "\nContent-Length:$length";
    print $SOCKET_FOR_POST "\n\n$content\n"; ## <<---- The '\n' should be exactly TWO !!!, Ridiculous isn't it?
    select($old_file_handle); # changing back to default(I assume STDOUT)
}



#______________________________________________________________________________
# Title     : write_reverse_seq_files
# Usage     :
# Function  :
# Example   :
# Keywords  : write_rev_seq_files, write_reverse_msf_files
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub write_reverse_seq_files{
    my $inputfile=$_[0];
    my $base=${&get_base_names(\$inputfile)};
    my $ext=${&get_extension_names(\$inputfile)};
    my $out_rev_file="$base\_rv\.$ext";
    my ($hash_out, $order_array)=&open_msf_files(\$inputfile, 'o');
    my %seq=%{$hash_out};
    my @order=@{$order_array};
    my @file_written=@{&write_msf(\%seq, \$out_rev_file, \@order)};
    print "\n# (INFO)  \@file_written: @file_written\n";
    return(\@file_written);
}



#_____________________________________________________________________
# Title     : make_reverse_seq_database
# Usage     : &make_reverse_seq_database(\@input_database_fasta_file);
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub make_reverse_seq_database{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		$| = 1;
		for($i=0; $i< @file; $i++){
				my $fasta_file_for_DB =$file[$i];
				my $base=${&get_base_names($fasta_file_for_DB)};
				my $ext =${&get_file_extensions($file[$i])};
				my($out_file_name, %seqs, %reversed_seqs);
				if($ext=~/\S/){
							 $out_file_name="$base\_rv\.$ext";
				}else{
					 $out_file_name="$base\_rv\.mpfa";
				}
				%seqs=%{&open_fasta_files(\$fasta_file_for_DB)};
				%reversed_seqs=%{&reverse_sequences(\%seqs)};
				&write_fasta(\%reversed_seqs, $out_file_name );
				if(-s $out_file_name){
							 print "\n# make_reverse_seq_database: Supposedly wrote: $out_file_name\n";
				}else{
							 print "\n# make_reverse_seq_database: Error in writing: $out_file_name\n";
				}
		}
		print "\n# make_reverse_seq_database sub finished \n";
}


#__________________________________________________________________________
# Title     : make_hmm_from_alignment
# Usage     : @out_hmm_file_names=@{&make_hmm_from_alignment(\@file, "$over_write")};
# Function  :
# Example   :
# Keywords  : HMM, hidden markov model, make_HMM_from_alignment,
#             make_hmm_from_msf_file, create_hmm_from_alignment,
#             create_hmm_from_msf_file,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub make_hmm_from_alignment{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	local (@out_hmm_files, $base, $out_hmm_file);
	for($i=0; $i< @file; $i++){
	   if($file[$i]=~/\.msf/){
							 $base=${&get_base_names($file[$i])};
							 $out_hmm_file= "$base\.hmm";
							 if($char_opt=~/o/ or !(-s $out_hmm_file) ){
											system("hmmb -P BLOSUM62 -B 200 -w $out_hmm_file $file[$i]");
											push(@out_hmm_files, $out_hmm_file);
							 }else{
											print "\n# The $out_hmm_file file already exists. To overwrite use -o opt\n";
							 }
	   }
	}
	if(@out_hmm_files > 1){
	   return(\@out_hmm_files);
	}else{
	   return(\$out_hmm_files[0]);
	}
}

#________________________________________________________________________
# Title     : get_non_overlapping_elements_in_array
# Usage     : @subs = @{&subtract_array(\@array1, \@array2)};
# Function  : removes any occurances of certain elem. of the first
#             input array with second input array.
# Example   : Following will produce (A K C X);
#		  @array1= qw( A B K B B C);
#  		@array2= qw( B E D X);
#  		@subs = @{&get_non_overlapping_elements_in_array(\@array1, \@array2)};
# Keywords  : array_subtract, substract_array, ary1_minus_ary2
# Options   :
# Returns   :
# Argument  :
# Authors   : jong@biosophy.org
# Version   : 1.0
#--------------------------------------------------------------------
sub get_non_overlapping_elements_in_array{
		my(@first, @second, %counter1, %counter2);
		@first=@{$_[0]};
		@second=@{$_[1]};
		grep($counter1{$_}++, @first );
		grep($counter2{$_}++, @second );
		return([ grep(!$counter2{$_}, @first), grep(!$counter1{$_}, @second)]);
}

#______________________________________________________________________________
# Title     : get_atom_coord_from_SCOP_domain_def_info
# Usage     : @xyz_coord1=@{&get_atom_coord_from_SCOP_domain_def_info(\$domain1, \%PDB_atom_domains)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.5
#------------------------------------------------------------------------------
sub get_atom_coord_from_SCOP_domain_def_info{
    my($dom_def_long, $dom_def, %PDB_atom_domains, $region, @region, $dom_start1, $dom_stop,
       @residue_num, @xyz_coord, $chain, $i, $d, $v, $domain, @dom_def);
    $dom_def_long=${$_[0]} || $_[0];
    %PDB_atom_domains=%{$_[1]};
    @keys=keys %PDB_atom_domains;
    @dom_def=split(/\,/, $dom_def_long);
    for($i=0; $i< @dom_def; $i++){
        $dom_def=$dom_def[$i];
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        #  d1vdea1 1vde    a:416-454   1.002.076.001.002.001,  \w? is necessary as some has a form h:114-223b
        #________________________________________________________________
        if($dom_def=~/^(\w+):(\d+)\w?\-(\d+)\w?$/){     $domain=$1; $dom_start=$2+1;  $dom_stop=$3-1;
            #print "\n$dom_def => $domain $dom_start $dom_stop \n";
            for($d=$dom_start; $d< $dom_stop; $d++){
                push(@xyz_coord, @{$PDB_atom_domains{$domain}{$d}}); # push(@{$ATOM{"$chain\_$region"}{$residue_number}}, $x, $y, $z);
            }
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # d1a0h.2 1a0h    271-320    1.002.041.001.002.012, \w? is necessary as some has a form h:114-223b
        #________________________________________________________________
        }elsif($dom_def=~/^(\d+)\w?\-(\d+)\w?$/){   $dom_start=$1+1; $dom_stop=$2-1;
            if(@{$PDB_atom_domains{'NO_CHAIN'}{$dom_start}} or @{$PDB_atom_domains{'NO_CHAIN'}{$dom_stop}}){
                for($d=$dom_start; $d< $dom_stop; $d++){
                     push(@xyz_coord, @{$PDB_atom_domains{'NO_CHAIN'}{$d}}); # push(@{$ATOM{"$chain\_$region"}{$residue_number}}, $x, $y, $z);
                }
            }else{ &send_mail_to_me("\n Can not get \$PDB_atom_domains{ NO_CHAIN } for $dom_def_long \n"); }
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # d1a0h.2 1a0h    d:    1.002.041.001.002.012
        #________________________________________________________________
        }elsif($dom_def=~/^(\w+):$/){    $chain=$1;
            #print "\n(3) get_atom_coord_from_SCOP_domain_def_info : $dom_def => $chain \n";
            if($PDB_atom_domains{$chain}){ @residue_num= sort {$a<=>$b} keys %{$PDB_atom_domains{$chain}};
                for($v=0; $v < @residue_num; $v++){
                     push(@xyz_coord, @{$PDB_atom_domains{$chain}{$residue_num[$v]}});
                }
            }else{ &send_mail_to_me("\n Can not get \$PDB_atom_domains{ \$chain } for $dom_def_long \n"); }
        }else{
            &send_mail_to_me(" $dom_def  did not match anything right. $dom_def_long\n");
        }
    }
    return(\@xyz_coord);
}



#________________________________________________________________________
# Title     : get_array_average
# Usage     : $output = &array_average(\@any_array);
# Function  : (the same as average_array)
# Example   :
# Warning   : If divided by 0, it will automatically replace it with 1
# Keywords  : get_array_average, av_array, average_array, get_average_array
#             average_of_array, average_array, get_average_array,
#             get_average_of_array, calculate_array_average
# Options   :
# Returns   : single scaler digit.
# Argument  : takes one array reference.
# Category  :
# Version   : 1.3
#--------------------------------------------------------------------
sub get_array_average{
		my(@input)= @{$_[0]};
		my $int_option = ${$_[1]} || $_[1];
		my($item,$average,$num,$sum);
		my $num_of_elem = @input;

		for $item(@input){
			 if( $item =~ /^$/ ){  ## If it matches nothing. '$item == 0' does not work !!!
				$num_of_elem --; ## This is to make sure that the denominator does not
			 }                  ## count blank element. (to get correct element number)
			 else{ $sum += $item;  }
		}
		if($num_of_elem ==0){ $num_of_elem =1; }  ## To prevent 'Division by 0' error
		if($int_option =~ /[\-]*i[nt]*/){
			$average= int( $sum/$num_of_elem );
		}else{   $average = $sum/$num_of_elem }

		return(\$average);
}


#______________________________________________________________________________
# Title     : get_prefix_arrays_for_seqlet_alignment
# Usage     : (@seq1_start_final, @seq2_start_final, @scores_gap)
#                 =@{&get_prefix_arrays_for_seqlet_alignment(\@residue1, \@residues2,
#                                                     \@positions1, \@positions2)};
#
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub get_prefix_arrays_for_seqlet_alignment{
    my(@scores_gap, @seq1_start_final, @seq2_start_final, $start_point,
       @seq1_start, @residues1, @seq2_start, @residues2, @positions1, @positions2);
    @residues1 =@{$_[0]};
    @residues2 =@{$_[1]};
    @positions1=@{$_[2]};
    @positions2=@{$_[3]};
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # adjusting the starting point\
    #___________________________________________________
    if($positions1[0] != 1){
       $start_point=$positions1[0]-1;
       @seq1_start=splice(@residues1, 0, $start_point);
       #print "\n(1) $start_point @seq1_start\n";
    }
    if($positions2[0] != 1){
       $start_point=$positions2[0]-1;
       @seq2_start=splice(@residues2, 0, $start_point);
       #print "\n(2) $start_point @seq2_start\n";
    }

    if(@seq1_start){
        push(@seq1_start_final, @seq1_start);
        push(@seq2_start_final, split(//, "."x@seq1_start) );
    }
    if(@seq2_start){
        push(@seq2_start_final, @seq2_start);
        push(@seq1_start_final, split(//, "."x@seq2_start) );
    }
    push(@scores_gap, split(//, "."x(@seq1_start+@seq2_start) ) );
    #print "\n\n<1>", @seq1_start_final, "\n<2>", @seq2_start_final, "\n<3>", @scores_gap, "\n\n\n";
    return(\@seq1_start_final, \@seq2_start_final, \@scores_gap);
}


#______________________________________________________________________________
# Title     : get_psdip_subset
# Usage     : &get_psdip_subset($PDBG_file, $PSDIP_file);
# Function  : This checks the protein interaction pairs from PSDIP
#              and shows ONLY IF the pdbd domains are found to be in
#              the given MPFA(or PDBG) file such as pdb100d_1_48.mpfa.
#             To make sure the interacting domains are all found in
#              PDB100D or PDB90D etc.
# Example   :
# Keywords  : get_PSDIP_subset
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_psdip_subset{
    my($PSDIP_file, $PDBG_file);
    $file1=${$_[0]} || $_[0];   $file2 =${$_[1]} || $_[1];
    if($file1=~/\.[pdbg|mpfa]/ and $file2=~/\.psdip/){ $PDBG_file=$file1; $PSDIP_file=$file2
    }elsif($file1=~/\.psdip/ and $file2=~/\.[pdbg|mpfa]/){ $PDBG_file=$file2; $PSDIP_file=$file1 }
    unless(-s $PSDIP_file and -s $PDBG_file){ die "\n I need PSDIP and PDBG file <- calculate_versability_of_domain_interaction\n";}

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Check what PDBG file(%) was used to add in output file names
    #_________________________________________________________________
    if($PDBG_file=~/pd\w+?(\d+)\w*_/){    $PDBG_type=$1;    }
    open(PDBG, "<$PDBG_file") || die;
    while(<PDBG>){  if(/\>(\S+)\s(\d+\.\d+\.\d+)\S+\s+(.+)/){ $pdbg{$1}=$2; $pdbg_desc{$2}.="\t$3\n"; push(@{$superfam{$2}}, $1); } }
    close(PDBG);

    open(PSDIP, "<$PSDIP_file") || die;
    while(<PSDIP>){
       if(/PROTEIN_INTERACT\s+(\S+)\s+(\S+)/){
           if($pdbg{$1} and $pdbg{$2}){
               print;
           }
       }elsif(/PROTEIN_NONINTERACT\s+(\S+)\s+(\S+)/){
           if($pdbg{$1} and $pdbg{$2}){
               print;
           }
       }
    }
    close(PDBG);
    close(PSDIP);
}




#______________________________________________________________________________
# Title     : get_DDD_domain_length_from_segment_ranges
# Usage     :
# Function  :
# Example   :
# Keywords  : get_sequence_length_from_DDD_domain_segments,
#             get_seq_length_from_range calculate_length_from_segments
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub get_DDD_domain_length_from_segment_ranges{
    my(@seq_ranges, $i, $seq_seqments_length);
    @seq_ranges=@{$_[0]};
    if(@seq_ranges <= 1){
       print "\n get_DDD_domain_length_from_segment_ranges, SEQ ranges num. can not be odd\n@seq_ranges\n\n";
       die
    }
    for($i=0; $i< @seq_ranges; $i+=2){
       if($seq_ranges[$i]+1=~/\S/){
          $seq_seqments_length+=$seq_ranges[$i+1]-$seq_ranges[$i]+1;
       }else{
          print "\n WARN: get_DDD_domain_length_from_segment_ranges, \$seq_ranges\[\$i\]\+1 is blank or empty\n\n";
       }
    }
    return(\$seq_seqments_length);
}



#______________________________________________________________________________
# Title     : get_PWD_from_HTTP_REFERER_variable
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_PWD_from_HTTP_REFERER_variable{
    my($PWD,$HTTP_REFERER);
    if(${$_[0]}){ $HTTP_REFERER=$$_[0]};
    }else{  $HTTP_REFERER =$ENV{HTTP_REFERER}; }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # matching http://cyrah.ebi.ac.uk:1111/Misc/
    #_________________________________________________________
    if( $HTTP_REFERER =~/http:\/\/\w[^\/]+\w+\/(\S+)\/$/){
       $PWD=$1;
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
    # matching http://cyrah.ebi.ac.uk:1111/
    #__________________________________________________________
    }elsif($HTTP_REFERER =~/http:\/\/\w[^\/]+\w\/$/){
       $PWD='';
    }
    return(\$PWD);
}


#______________________________________________________________________________
# Title     : get_2_most_similar_strings_in_array
# Usage     :($INT_sq1, $INT_sq2, $sq1, $sq2)=@{&get_2_most_similar_strings_in_array(\@seq_names)};
# Function  : returns the common sequence names(usually Intermediates in search)
#             The sequence names can be XXXX or XXXX_30-66
#             Accepts 4 strings in an array.
# Example   :
# Keywords  : get_common_seq_names, get_identical_seq_names
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub get_2_most_similar_strings_in_array{
		my @seq_names=sort @{$_[0]};
		my ($i, $previous_seq_name, @seq_names_out, $previous_seq_name_orig,
		    $highest_iden);
		for($i=0; $i< @seq_names; $i++){
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # If seq names have RANGE information XXX_10-30
			 #______________________________________________
			 if($seq_names[$i]=~/(\S+)_(\d+)\-\d+/){
					if($1 eq $previous_seq_name){
					    if($i == 1){
					       if($previous_seq_name_start > $2){
										 @seq_names_out=($seq_names[$i], $previous_seq_name_orig, $seq_names[$i+1], $seq_names[$i+2]);
					       }else{
										 @seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i+1], $seq_names[$i+2]);
								 }
							}elsif($i > 1){
								 @seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i-2], $seq_names[$i+1]);
							}
					}
					$previous_seq_name=$1;
					$previous_seq_name_start=$2;
					$previous_seq_name_orig=$seq_names[$i];
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # If seq names DO NOT have range info  XXXXXX
			 #______________________________________________
			 }elsif($seq_names[$i] eq $seq_names[$i+1]){
					if($i == 0){
							@seq_names_out=($seq_names[$i], $seq_names[$i+1], $seq_names[$i-1], $seq_names[$i+2]);
					}elsif($i == 1){
							@seq_names_out=($seq_names[$i], $seq_names[$i+1], $seq_names[$i-2], $seq_names[$i-1]);
					}elsif($i == 2){
							@seq_names_out=($seq_names[$i], $seq_names[$i+1], $seq_names[$i-3], $seq_names[$i-2]);
					}
					return(\@seq_names_out)
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # If seq names DO NOT have range info and there is not identical seq pairs
			 #__________________________________________________________________________________
			 }else{
			    my ($counter, $j, $non_char_count);
					$counter=1;
					@name_chars1=split(//, $previous_seq_name);
					@name_chars2=split(//, $seq_names[$i]);
					for($j=0; $j < @name_chars1; $j++){
							if($name_chars1[$j] eq $name_chars2[$j]){
									$counter++;
									if($counter > $highest_iden){
											$highest_iden=$counter;
											if($i == 1){
													@seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i+1], $seq_names[$i+2]);
													print "\n# $i, @seq_names_out H=$highest_iden";
											}elsif($i == 2){
													@seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i-2], $seq_names[$i+1]);
													print "\n# $i, @seq_names_out H=$highest_iden";
											}elsif($i == 3){
													@seq_names_out=($seq_names[$i-1], $seq_names[$i], $seq_names[$i-3], $seq_names[$i-2]);
													print "\n# $i, @seq_names_out H=$highest_iden";
													return(\@seq_names_out);
											}
									}
							}else{ ## When there is non-identical char comes, skip it.
									#$non_char_count++;
									next;
							}
					}
					$previous_seq_name=$seq_names[$i];
					$previous_seq_name_orig=$seq_names[$i];
			 }
		}
		return(\@seq_names_out);
}


#______________________________________________________________________________
# Title     : get_file_creation_date
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_file_creation_date{
    my($file,$file_creation_date);
    $file=${$_[0]} || $_[0];
    $file_creation_date=localtime( (stat($file))[9] );
    return(\$file_creation_date);
}


#__________________________________________________________________
# Title     : get_false_positive_seq_matches
# Usage     : %seq=%{&get_false_positive_seq_matches(\%mspa_1, \%mspa2)};
# Function  : gets sequences which are wrongly matched from intermediate seq search
# Example   :
#
#  OUTPUT looks like the following;
#	d1dvh__=d1fcdc1     7.1e-08
#	d1fcdc1=d1dvh__     7.1e-08
#	d5cytr_=d351c__     5.3e-08
#	d351c__=d5cytr_     5.3e-08
#	d1cyi__=d2mtac_     9.1e-06
#	d2mtac_=d1cyi__     9.1e-06
#	d1cyi__=d5cytr_     0.00045
#	d5cytr_=d1cyi__     0.00045
#
# Warning   : The default is to show the best E value(lowest that is)
#
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub get_false_positive_seq_matches{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%iss_input)= %{$hash[0]};
	my(%final_table_Evalue, %final_table_score, $inter_seq_seq_name, $inter_seq_score,
	   $inter_seq_E_value, $match_seq_seq_name, $match_seq_score, $match_seq_E_value,
	   $name_combi1, $name_combi2, $each_iss_line, $all_enquiry_seqs);
	@iss_lines = sort values %iss_input;

	if(@array > 0){ ## When the names of enquiry was given as an array, use it!
		$all_enquiry_seqs=join(' ', sort @{$array[0]} );
	}else{    ## otherwise, detect yourself.
						for($i=0; $i< @iss_lines; $i++){
							 $each_iss_line=$iss_lines[$i];
							 if($each_iss_line=~/^\s*(\S+)\s+/){
											$all_enquiry_seqs{$1}++;
							 }
						}
						$all_enquiry_seqs=join(' ', sort keys %all_enquiry_seqs );
	}

	for($i=0; $i< @iss_lines; $i++){
	   $each_iss_line=$iss_lines[$i];
	   if($each_iss_line=~/^\s*(\S+)\s+(\S+)\((\d+)\)\((\S+)\)\s+(\S+)\((\d+)\)\((\S+)\)/){
								$inter_seq_seq_name= $2;
								$inter_seq_score   = $3;
								$inter_seq_E_value = $4;
								$match_seq_seq_name= $5;
								$match_seq_score   = $6;
								$match_seq_E_value = $7;
								$name_combi1="$1\=$match_seq_seq_name";
								$name_combi2="$match_seq_seq_name\=$1";
								if($all_enquiry_seqs !~/$match_seq_seq_name/){
											 $false_positive_matches{$name_combi1}="$inter_seq_score $inter_seq_E_value";
											 next;
								}
	   }
	}
	if($char_opt=~/v/){
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   # Wrting the false positives
	   #__________________________________
	   @keys_false=sort keys %false_positive_matches;
	   print "\n\n# False positives: " if @keys_false > 0;
	   for $key_false (@keys_false){
								if($key_false =~/\S+/){
											 printf ("\n%-30s %-30s",  $key_false,  $false_positive_matches{$key_false});
								}
	   }
	   print "\n";
	}
	return(\%false_positive_matches);
}

#______________________________________________________________________________
# Title     : convert_aminio_acid_to_binary_pattern
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub convert_aminio_acid_to_binary_pattern{
		my (%aa_pattern_table);

		%aa_pattern_table
			 = (
					'P', '1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
					'G', '0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
					'C', '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
					'M', '0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
					'H', '0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
					'D', '0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
					'E', '0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0',
					'A', '0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0',
					'F', '0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0',
					'I', '0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0',
					'V', '0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0',
					'L', '0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0',
					'R', '0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0',
					'K', '0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0',
					'S', '0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0',
					'T', '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0',
					'Q', '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0',
					'N', '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0',
					'W', '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0',
					'Y', '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1'
				 );
		 return(\%aa_pattern_table);
}

#______________________________________________________________________________
# Title     : convert_genbank_to_fasta
# Usage     : &convert_genbank_to_fasta(\%genbank, \$out_file, \$block_size)
# Function  :
# Example   :
# Keywords  : convert_genbank_to_fasta_hash
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub convert_genbank_to_fasta{
    my(%genbank_hash, $sequence, $block_size, $out_file_name);
    $block_size=80;
    $out_file_name="default_genank_fasta.spfa";
    %genbank_hash=%{$_[0]};
    $out_file_name=${$_[1]} || $_[1];
    $block_size=${$_[2]} || $_[2];
    open(FASTA, ">$out_file_name") || die "\nCan not open $out_file_name\n\n";
    print FASTA "\>$genbank_hash{'seq_name'}\n";
    $sequence=$genbank_hash{'sequence'};
    $sequence=~s/\s+//g;
    $seq_leng=length($sequence);
    for($i=0; $i< $seq_leng; $i+= $block_size){
       print FASTA substr($sequence, $i, $block_size);
    }
    return(\$out_file_name);
}


#______________________________________________________________________________
# Title     : convert_genbank_to_fasta_file
# Usage     : &convert_genbank_to_fasta_file(\@ARGV, \$block_size);
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub convert_genbank_to_fasta_file{
    my($i, $j, @genbank_files, %genbank, @keys, $seq_leng,
       $sequence, $base, $block_size,  $out_file_name,
       @fasta_files, $accession_no);
    $block_size=80;
    @genbank_files=@{$_[0]};
    if(ref($_[1]) and ${$_[1]}){   $block_size=${$_[1]} || $_[1]; }
    for($i=0; $i< @genbank_files; $i++){
       unless($genbank_files[$i]=~/\.embl/ or $genbank_files[$i]=~/\.genbank/){
          print "\n Can you change the extension of $genbank_files[$i] to genbank?\n\n";
       }
       print "\n# (i) Parsing $genbank_files[$i] to make FASTA ouput\n";
       $base=${&get_base_names($genbank_files[$i])};
       %genbank=%{&open_genbank_files($genbank_files[$i])};
       @accession_no=keys %genbank;
       for($k=0; $k<@accession_no; $k++){
           $accession_no=$accession_no[$k];
           $sequence=$genbank{$accession_no}{'sequence'};
           $sequence=~s/\s+//g;
           $seq_leng=length($sequence);
           if($sequence=~/[DEFGHIKLMNPQRSTVWY]+/i){
               $out_file_name="$base\_$accession_no.spfa";
           }elsif($sequence=~/^[ATGCU]+$/i){
               $out_file_name="$base\_$accession_no.snfa";
           }
           open(FASTA, ">$out_file_name") || die "\nCan not open $out_file_name\n\n";
           print FASTA "\>$genbank{$accession_no}{'seq_name'} $seq_leng bp\n";
           for($j=0; $j< $seq_leng; $j+= $block_size){
              print FASTA substr($sequence, $j, $block_size), "\n";
           }
           close(FASTA);
           if(-s $out_file_name){  push(@fasta_files, $out_file_name); }else{ die "\nERROR \n"; }
       }
    }
    return(\@fasta_files);
}



#______________________________________________________________________________
# Title     : convert_MRCS_to_MSP_format
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
#    $get_Forward_search_scores=F by F
#    $get_Backward_search_scores=B by B
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub convert_MRCS_to_MSP_format{
    my(%MRCS_hash, $i, $MRCS_file, $query_seq, $target_seq, $openning_gap_penalty,
       $elongation_gap_penalty, $algorithm, $ForBackScore, $ForScore, $mspa_line,
       $BackScore, $MutualSeqID, $Alignment_leng, $sorted_paired_seqs,
       @sorted_paired_seqs, $read_point_found, $query_range_start,
       $query_range_stop, $match_string_start, $match_string_stop,
       %MSP_hash, $wanted_score_type, $get_Backward_search_scores);
    $read_point_found=1;

    %MRCS_hash=%{$_[0]};
    $get_Forward_search_scores='F' if $_[1]=~/F/;
    $get_Backward_search_scores='B' if $_[2]=~/B/;

    if($get_Forward_search_scores and $get_Backward_search_scores){
        $get_Forward_search_scores=$get_Backward_search_scores='';
    }

    @sorted_paired_seqs=sort keys %MRCS_hash;
    for($i=0; $i< @sorted_paired_seqs; $i++){
        $sorted_paired_seqs=$sorted_paired_seqs[$i];
        $ForBackScore    =$MRCS_hash{$sorted_paired_seqs}{'ForBackScore'};
        $ForScore        =$MRCS_hash{$sorted_paired_seqs}{'ForScore'};
        $BackScore       =$MRCS_hash{$sorted_paired_seqs}{'BackScore'};
        $MutualSeqID     =$MRCS_hash{$sorted_paired_seqs}{'MutualSeqID'};
        $Alignment_leng  =$MRCS_hash{$sorted_paired_seqs}{'Alignment_leng'};
        $query_seq_align_range =$MRCS_hash{$sorted_paired_seqs}{'QUERY_SEQ_RANGE'};
        ($query_range_start, $query_range_stop)=$query_seq_align_range=~/^(\d+)\-(\d+)$/;
        $target_seq_align_range=$MRCS_hash{$sorted_paired_seqs}{'TARGET_SEQ_RANGE'};
        ($match_string_start, $match_string_stop)=$target_seq_align_range=~/^(\d+)\-(\d+)$/;
        $query_seq       =$MRCS_hash{$sorted_paired_seqs}{'QUERY_SEQ_NAME'};
        $target_seq      =$MRCS_hash{$sorted_paired_seqs}{'TARGET_SEQ_NAME'};

        if($get_Forward_search_scores){       $wanted_score_type=$ForScore
        }elsif($get_Backward_search_scores){  $wanted_score_type=$BackScore
        }else{ $wanted_score_type=$ForBackScore }

        $mspa_line=sprintf("%-6s %-8s %-5s %-5s %-5s %-32s %-5s %-s\t%-25s\t%-s\n",
                           $wanted_score_type, 0.0, $MutualSeqID, $query_range_start,
                           $query_range_stop, $query_seq, $match_string_start,
                           $match_string_stop, $target_seq, $read_point_found);
        $MSP_hash{$sorted_paired_seqs}=$mspa_line;

    }
    return(\%MSP_hash);
}




#______________________________________________________________________________
# Title     : concentrate_HSSP_seq_alignments
# Usage     :   %concentrated_seqs=%{&concentrate_seq_alignments(\%HSSP_align,
#                                                 \$upper_concentration_threshold,
#                                                 \$lower_concentration_threshold)};
# Function  :
# Example   : INPUT: $HSSP_align{$seq_ids[$i]}{$names[$j]}
#
# Keywords  : dilute_sequence_alignments, concentrate_sequence_alignment
#             concentrate_seq_alignments, HSSP
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub concentrate_seq_alignments{
    my(%Alignment, $i, $j, @seq_ids, @seq_names, $upper_concentration_threshold,
       $mutual_seq_id, %Concentrated_seq_HASH, $lower_concentration_threshold);
    $upper_concentration_threshold=60;
    $lower_concentration_threshold=20;

    %Alignment=%{$_[0]};
    $upper_concentration_threshold=${$_[1]};
    $lower_concentration_threshold=${$_[2]};
    @seq_ids=sort {$a<=>$b} keys %Alignment;

    for($i=0; $i< @seq_ids; $i++){
        my($NOT_chosen);
        $seq_number_id=$seq_ids[$i];
        ($seq_name, $each_sequence) = %{$Alignment{$seq_number_id}};
        @each_sequence_residues=split(//, $each_sequence);
        @concentrated_seqs=keys %Concentrated_seq_HASH;
        if(!@concentrated_seqs){
            $Concentrated_seq_HASH{$seq_name}=$each_sequence;
            next;
        }else{
            for($j=0; $j< @concentrated_seqs; $j++){
                $already_chosen_seq_name=$concentrated_seqs[$j];
                $already_chosen_sequence=$Concentrated_seq_HASH{$already_chosen_seq_name};
                $mutual_seq_id=${&get_seq_identity({$seq_name,
                                                    $each_sequence,
                                                    $already_chosen_seq_name,
                                                    $already_chosen_sequence})};
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Select the sequence which has appropriate seq. id. here
                #_______________________________________________________________
                if($mutual_seq_id > $upper_concentration_threshold or $mutual_seq_id < $lower_concentration_threshold ){
                   $NOT_chosen=1;
                   last;
                }
            }
            unless($NOT_chosen){
                $Concentrated_seq_HASH{$seq_name}=$each_sequence;
            }
        }
    }
    return(\%Concentrated_seq_HASH);
}



#______________________________________________________________________________
# Title     : convert_sec_str_to_binary_pattern
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub convert_sec_str_to_binary_pattern{
		my (%ss_pattern_table);

		%ss_pattern_table
			 = (
					'H', '1 0 0',
					'E', '0 1 0',
					'C', '0 0 1',
				 );
		 return(\%ss_pattern_table);
}



#__________________________________________________________________
# Title     : make_sequence_match_table
# Usage     : %sequence_match_table=%{&make_sequence_match_table(\%mspa_1, \%mspa2)};
# Function  : makes a table of match with the values for E values.
# Example   :
#
#  INPUT looks like this: (the iss file format), first column is key
#
#   d1ten__(110)(0.00031)     d1fna__    d1fna___1-91(578)(6.9e-37)       d1ten__(110)(0.00031)
#   d1cfb_2(255)(7.8e-16)     d1cfb_2    HSU55258_741-838(255)(5.6e-12)   d1cfb_2(255)(7.8e-16)
#
#  OUTPUT looks like the following;
#   d1dvh__=d1fcdc1    Correct: 7.1e-08
#	d1fcdc1=d1dvh__    Correct: 7.1e-08
#	d5cytr_=d351c__    Correct: 5.3e-08
#	d351c__=d5cytr_    Correct: 5.3e-08
#	d1cyi__=d2mtac_    Wrong:   9.1e-06
#
# Keywords  : make_sequence_match_Evalue_table, Evalue_table, make_Evalue_table
#             make_iss_sequence_match_table
# Options   : _  for debugging.
#             #  for debugging.
#             s  for skip SELF to SELF match entries
#             w  for Smith-Waterman score result out than E value out
#             r  for reflexive output
#
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#-------------------------------------------------------------------------------
sub make_sequence_match_table{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%iss_input)= %{$hash[0]};
	my(%final_table_Evalue, %final_table_score, $inter_seq_seq_name, $inter_seq_score,
	   $inter_seq_E_value, $match_seq_seq_name, $match_seq_score, $match_seq_E_value, $key_seq,
	   $each_iss_line, $all_enquiry_seqs, $name_combi1, $name_combi2, @sorted_names, $name_sorted,
	   %final_table_interm_and_matched_score, %final_table_interm_and_matched_Evalue, %scop_bugs);
	@iss_lines = sort values %iss_input;

	if($char_opt=~/v/){ print "\n# make_sequence_match_table: \$char_opt is $char_opt\n" ; }

	if($char_opt=~/r/){ $non_reflexive=0;
	}else{    $non_reflexive=1; } # default , not to print result in two ways

	if(@array > 0){ ## When the names of enquiry was given as an array, use it!
						$all_enquiry_seqs=join(' ', sort @{$array[0]} );
	}else{    ## otherwise, detect yourself.
						for($i=0; $i< @iss_lines; $i++){
							 $each_iss_line=$iss_lines[$i];
							 if($each_iss_line=~/^\s*(\S+)\s+/){  $all_enquiry_seqs{$1}++;             }
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# $all_enquiry_seqs contains all the sequences in the group
						#____________________________________________________________________
						$all_enquiry_seqs=join(' ', sort keys %all_enquiry_seqs );
	}

	for($i=0; $i< @iss_lines; $i++){
	   my $each_iss_line=$iss_lines[$i];
	   if($each_iss_line=~/^\s*(\S+)\s+(\S+)\((\d+)\)\((\S+)\)\s+(\S+)\((\d+)\)\((\S+)\)/){
		  $key_seq=$1;
		  $inter_seq_seq_name= $2;
		  $inter_seq_score   = $3;
		  $inter_seq_E_value = $4;
		  $match_seq_seq_name= $5;
		  $match_seq_score   = $6;
		  $match_seq_E_value = $7;
		  if( $key_seq eq $match_seq_seq_name and $char_opt=~/s\s*/ ){ next } ## avoiding self self match
		  @sorted_names=sort ($1, $match_seq_seq_name);
		  $name_combi1="$1\=$match_seq_seq_name";
		  $name_combi2="$match_seq_seq_name\=$1";
		  $name_sorted="$sorted_names[0]\=$sorted_names[1]";
		  if($all_enquiry_seqs !~/$match_seq_seq_name/){
											if($non_reflexive){
															$false_positive_matches{$name_sorted}="$inter_seq_score $inter_seq_E_value : $match_seq_score $match_seq_E_value";
											}else{
															$false_positive_matches{$name_combi1}="$inter_seq_score $inter_seq_E_value : $match_seq_score $match_seq_E_value";
											}
											next;
		  }elsif($final_table_score{$name_combi1} < $inter_seq_score or
		      $final_table_score{$name_combi2} < $inter_seq_score or
		      $final_table_score{$name_sorted} < $inter_seq_score){
											$final_table_score{$name_combi1}=$inter_seq_score;
											$final_table_score{$name_combi2}=$inter_seq_score;
											$final_table_Evalue{$name_combi1}=$inter_seq_E_value;
											$final_table_Evalue{$name_combi2}=$inter_seq_E_value;

											if($non_reflexive){
													$final_table_interm_and_matched_score{$name_sorted} = "$inter_seq_score $match_seq_score";
													$final_table_interm_and_matched_Evalue{$name_sorted} = "$inter_seq_E_value $match_seq_E_value";
											}else{
													$final_table_interm_and_matched_score{$name_combi1} = "$inter_seq_score $match_seq_score";
													$final_table_interm_and_matched_score{$name_combi2} = "$inter_seq_score $match_seq_score";
													$final_table_interm_and_matched_Evalue{$name_combi1} = "$inter_seq_E_value $match_seq_E_value";
													$final_table_interm_and_matched_Evalue{$name_combi2} = "$inter_seq_E_value $match_seq_E_value";
											}
		  }
	   }
	}
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# printing out to screen
	#__________________________________________________________
	if($char_opt =~/w/){ ## returning Smith-waterman score than E value
	   @keys = sort keys %final_table_interm_and_matched_Evalue;
	   for $key (@keys){
							 if($key =~/\S+/){
											printf ("\n%-30s Correct: %-50s",  $key, $final_table_interm_and_matched_Evalue{$key});
							 }
	   }
	}else{
	   @keys = sort keys %final_table_interm_and_matched_score;
	   for $key (@keys){
							 if($key =~/\S+/){
											printf ("\n%-30s Correct: %-50s",  $key,  $final_table_interm_and_matched_score{$key});
							 }
	   }
	}
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Writing the false positives
	#__________________________________
	@keys_false=sort keys %false_positive_matches;
	#print "\n\n# False positives: " if @keys_false > 0;
	for $key_false (@keys_false){
	   if($key_false =~/\S+/){
	      if($scop_bugs{$key_false}){
	          printf ("\n%-30s Correct: %-50s",  $key_false,  $false_positive_matches{$key_false});
	      }else{
			  printf ("\n%-30s Wrong:   %-50s",  $key_false,  $false_positive_matches{$key_false});
		  }
		  %scop_bugs=qw(d2kauc1=d2kauc  1 d1pkya2=d1pkya1 1 d1pbe_1=d1pbe_2 1
																d1dih_1=d1dih_2 1 d2ohxa2=d2ohxa1 1 d1poxa3=d1pvda2 1
																d1efga1=d1efga2 1 d1bct__=d1brd__ 1 d1qora1=d1qora2 1
																d2ohxa1=d2ohxa2 1);
	   }
	}
	print "\n";

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Returning the hash result
	#_____________________________________
	if($char_opt =~/w/){ ## returning Smith-waterman score than E value
	   return(\%final_table_interm_and_matched_score);
	}else{
	   return(\%final_table_interm_and_matched_Evalue);
	}
}



#__________________________________________________________________
# Title     : write_iss_file
# Usage     : &write_iss_file(\%mspa1, \%mspa2);  ## for 2 mspa_x file input
# Function  : writes the intermediate sequence search file.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : write_interm_seq_search_file
#             v  for showing the output in STDOUT
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Category  :
# Version   : 1.2
#---------------------------------------------------------------------------
sub write_iss_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my(%mspa_1, %mspa_2, %merged_1, %merged_2);

	 %mspa_1=%{$hash[0]};
	 %mspa_2=%{$hash[1]};

	 @mspa1_keys=sort keys  %mspa_1;
	 @mspa2_keys=sort keys  %mspa_2;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
	 # removing the empty matches and merging matches according to the enquiry seqs.
	 #  Following input will become;
	 # ..
	 # xxxxx
	 # xxxxx  YYYYY(xx)(yy)
	 # xxxxx  zzzzz(xx)(yy) ttttt(xx)(yy)
	 # ..
	 #  ->  xxxxx  'YYYYY(xx)(yy) zzzzz(xx)(yy) ttttt(xx)(yy)'
	 #____________________________________________________________________________________
	 for($i=0; $i< @mspa1_keys; $i++){
						 $enquiry_seq = $mspa1_keys[$i];
						 #my ($seq_name,  $sw_score, $evalue)=$enquiry_seq=~/(\S+)\((\S+)\)\((\S+)\)/;

						 #-- if $mspa_1{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
						 if($mspa_1{$enquiry_seq}=~/\S+/){
										($seq_name, $sw_score, $evalue)=$enquiry_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
										 $merged_mspa1{$seq_name} .=$mspa_1{$enquiry_seq};
						 }else{
										next;
						 }
	 }
	 for($i=0; $i< @mspa2_keys; $i++){
						 $enquiry_seq = $mspa2_keys[$i];

						 #-- if $mspa_2{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
						 if($mspa_2{$enquiry_seq}=~/\S+/){
										 $merged_mspa2{$enquiry_seq} .=$mspa_2{$enquiry_seq};
						 }else{
										next;
						 }
	 }

	 @merged_mspa1_keys=sort keys  %merged_mspa1;
	 @merged_mspa2_keys=sort keys  %merged_mspa2;

	 for($i=0; $i< @merged_mspa1_keys; $i++){
	  $enquiry_seq=$merged_mspa1_keys[$i];
	  @intermediate_seqs=sort split(/\s+/, $merged_mspa1{$enquiry_seq});
	  for($j=0; $j< @intermediate_seqs; $j++){

		 $intermediate_seq=$intermediate_seqs[$j];

		 ($inter_seq_name, $sw_score, $evalue)=$intermediate_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
		 @final_matches=sort split(/\s+/,  $merged_mspa2{$inter_seq_name});
		 for($k=0; $k < @final_matches; $k ++){
		     $final_matched_seq = $final_matches[$k];
		     if($char_opt=~/v/){
			 printf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
		     }
		     $final_out{$final_matched_seq}=
	             sprintf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
		 }
		 #print "\n";
	  }
	  #print "\n";
	 }
	 #print "\n";
	 return(\%final_out);
}

#______________________________________________________________________________
# Title     : get_perl_keywords
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_perl_keywords{
		my(%perl_keywords);
		my @keywords=qw( AUTOLOAD BEGIN CORE DESTROY END abs accept alarm and atan2 bind binmode bless caller chdir chmod chop chown chr chroot
			 close closedir cmp connect continue cos crypt dbmclose dbmopen defined delete die do dump each else elsif endgrent endhostent endnetent endprotoent endpwent endservent
			 eof eq eval exec exit exp fcntl fileno flock for foreach fork format formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
			 getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent getpwent getpwnam getpwuid getservbyname
			 getservbyport getservent getsockname getsockopt glob gmtime goto grep gt hex if index int ioctl join keys kill last lc lcfirst le length link listen local localtime log lstat
			 lt m mkdir msgctl msgget msgrcv msgsnd my ne next no not oct open opendir or ord pack package pipe pop print printf push q qq quotemeta qw qx rand read readdir readline
			 readlink readpipe recv redo ref rename require reset return reverse rewinddir rindex rmdir s scalar seek seekdir select
			 semctl semget semop send setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent setservent setsockopt shift
			 shmctl shmget shmread shmwrite shutdown sin sleep socket socketpair sort splice split sprintf sqrt srand stat
			 study sub substr symlink syscall sysread system syswrite tell telldir tie time times tr truncate uc ucfirst
			 umask undef unless unlink unpack unshift untie until use utime values vec wait waitpid wantarray
			 warn while write x xor y
		 );
		 foreach(@keywords){
				$perl_keywords{$_}=$_;
		 }
		 return(\%perl_keywords);
}




#______________________________________________________________________________
# Title     :  get_homology_info_of_seq_pairs
# Usage     :
# Function  :
# Example   : %seq_pair_homology_table=%{&get_homology_info_of_seq_pairs(\%pairs_excluded,
#                                         \%pdbg_hash_table)};
#
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_homology_info_of_seq_pairs{
	 my($i, %pairs_to_be_checked,%seq_pairs_homology_table, @pairs,
			$homology_info, %pdbg_hash_table);
	 for($i=0; $i< @_; $i++){
			my %in_hash=%{$_[$i]};
			my @seq_names=keys %in_hash;
			if($in_hash{$seq_names[0]}=~/^\S+\s+\S+$/){
					%pairs_to_be_checked=%in_hash; %in_hash=();
			}elsif($in_hash{$seq_names[0]}=~/^\S+$/){
					%pdbg_hash_table=%in_hash; %in_hash=();
			}
	 }

	 @pairs=keys %pairs_to_be_checked;

	 for($i=0; $i< @pairs; $i++){
			if($pairs[$i]=~/^(\S+)\s+(\S+)/){
					$homology_info=${&check_homology_of_seq_pair(\$pairs[$i], \%pdbg_hash_table)};
					$seq_pairs_homology_table{$pairs[$i]}=$homology_info;
					print "\n#>> $pairs[$i] $homology_info" if $verbose;
			}
	 }
	 return(\%seq_pairs_homology_table);
}

#______________________________________________________________________________
# Title     : get_sequence_region_distance
# Usage     : $range_distance=${&get_sequence_region_distance(\@range)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_sequence_region_distance{
    my(@ranges, $range_1_stop, $range_2_start, $distance);
    @ranges=@{$_[0]};
    @ranges= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map {/(\d+)\-\d+/ && [$_, $1] } @ranges;
    $ranges[0]=~/\d+\-(\d+)/;
    $range_1_stop=$1;
    $ranges[1]=~/(\d+)\-\d+/;
    $range_2_start=$1;
    $distance=$range_2_start-$range_1_stop;
    return(\$distance);
}



#________________________________________________________________________________
# Title     : get_sequence_overlap_size
# Usage     : $ovlapsize=${&get_sequence_overlap_size($st1, $en1, $st2, $en2)
# Function  :
# Example   :
# Keywords  : CF: get_overlapping_range, get_overlapping_seq_match
#             get_sequence_overlap_size, get_overlap_size, check_overlap_size
#             check_sequence_overlap_size
# Options   :
# Category  :
# Author    : jong@biosophy.org
# Version   : 1.1
#--------------------------------------------------------------------------------
sub get_sequence_overlap_size{
    my($start1, $end1, $start2, $end2, $overlapping_region_matched);
    if(@_ == 4){  $start1=$_[0]; $end1 =$_[1];  $start2=$_[2]; $end2  =$_[3];
    }elsif(@_==2){
        if( $_[0]=~/(\d+)\-(\d+)/ ){
              $start1=$1;      $end1  =$2;
        }elsif($_[1]=~/(\d+)\-(\d+)/ ){
              $start2=$1;      $end2  =$2;
        }else{print "\n# (ERROR) get_overlapping_seq_match_size: I need 2 or 4 arguments for regions\n";
              print "   They look like ($start1, $end1, $start2, $end2) or ('10-100', '20-211')\n";
              print "   You got it, Sarah?? Try again my dear!\n";
        }
    }else{    print "\n# (ERROR) get_overlapping_seq_match_size: I need 2 or 4 arguments for regions\n";
              print "   They look like ($start1, $end1, $start2, $end2) or ('10-100', '20-211')\n";
              print "   You got it, Sarah?? Try again my dear!\n";
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #     ---------
    #  ------
    #___________________________________
    if($start1 >= $start2 and $end1 >= $end2){  $overlapping_region_matched=$end2-$start1;  }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # ---------
    #     ----------
    #___________________________________
    elsif($start1 <= $start2 and $end1 <= $end2){ $overlapping_region_matched=$end1-$start2;}
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #      -----
    #    ----------
    #___________________________________
    elsif($start1 >= $start2 and $end1 <= $end2){ $overlapping_region_matched=$end1-$start1;}
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #  ---------
    #    ----
    #___________________________________
    elsif($start1 <= $start2 and $end1 >= $end2){ $overlapping_region_matched=$end2-$start2;}
    return(\$overlapping_region_matched);
}


#________________________________________________________________________________
# Title     : get_overlapping_seq_match_size
# Usage     : $ovlapsize=${&get_overlapping_seq_match_size($st1, $en1, $st2, $en2)
# Function  :
# Example   :
# Keywords  : CF: get_overlapping_range, get_overlapping_seq_match
#             get_sequence_overlap_size, get_overlap_size, check_overlap_size
#             check_sequence_overlap_size
# Options   :
# Category  :
# Author    : jong@biosophy.org
# Version   : 1.1
#--------------------------------------------------------------------------------
sub get_overlapping_seq_match_size{
    my($start1, $end1, $start2, $end2, $overlapping_region_matched);
    if(@_ == 4){  $start1=$_[0]; $end1 =$_[1];  $start2=$_[2]; $end2  =$_[3];
    }elsif(@_==2){
        if( $_[0]=~/(\d+)\-(\d+)/ ){
              $start1=$1;      $end1  =$2;
        }elsif($_[1]=~/(\d+)\-(\d+)/ ){
              $start2=$1;      $end2  =$2;
        }else{print "\n# (ERROR) get_overlapping_seq_match_size: I need 2 or 4 arguments for regions\n";
              print "   They look like ($start1, $end1, $start2, $end2) or ('10-100', '20-211')\n";
              print "   You got it, Sarah?? Try again my dear!\n";
        }
    }else{    print "\n# (ERROR) get_overlapping_seq_match_size: I need 2 or 4 arguments for regions\n";
              print "   They look like ($start1, $end1, $start2, $end2) or ('10-100', '20-211')\n";
              print "   You got it, Sarah?? Try again my dear!\n";
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #     ---------
    #  ------
    #___________________________________
    if($start1 >= $start2 and $end1 >= $end2){  $overlapping_region_matched=$end2-$start1;  }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # ---------
    #     ----------
    #___________________________________
    elsif($start1 <= $start2 and $end1 <= $end2){ $overlapping_region_matched=$end1-$start2;}
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #      -----
    #    ----------
    #___________________________________
    elsif($start1 >= $start2 and $end1 <= $end2){ $overlapping_region_matched=$end1-$start1;}
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #  ---------
    #    ----
    #___________________________________
    elsif($start1 <= $start2 and $end1 >= $end2){ $overlapping_region_matched=$end2-$start2;}
    return(\$overlapping_region_matched);
}

#______________________________________________________________________________
# Title     : get_username
# Usage     : $user_name=${&get_username};
# Function  :
# Example   :
# Keywords  : get_login_name
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub get_username{
    my $who = getpwuid($>);
    return(\$who);
}

#______________________________________________________________________________
# Title     : get_unix_shell_name
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org, On commercial use issue, Email me.
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_unix_shell_name{
		my($shell_env);
		if($ENV{'SHELL'}=~/\/(\w+)$/ or $ENV{'SHELL'}=~/(\w+)$/){
				$shell_env=$1
		}else{
				print "\n# (ERROR) SHELL env setting is not on, I can not give you SHELL type\n";
				die;
		}
		return(\$shell_env);
}


#______________________________________________________________________________
# Title     : get_stat_FASTA_search_result_in_mspa_0_files
# Usage     : &get_stat_FASTA_search_result_in_mspa_0_files(\@file);
# Function  :
# Example   :
# Keywords  : get_stat00_result, get_stat_mspa0_files, get_stat_single_search_result
# Options   :
#
#  $E_value= by e=
#  $verbose=v by v
#  $show_options=o by o
#  $step   =  by s=
#  $score_thresh1=   by t1=
#  $score_thresh2=   by t2=
#  $E_mult_factor1 = by m1=
#  $E_mult_factor2 = by m2=
#
# Category  : statistics, search, bio
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_stat_FASTA_search_result_in_mspa_0_files{
		my($num_enq_seq, $pdbg_file, %input_file_base, $score_thresh1, $score_thresh2,
				$E_mult_factor1, $E_mult_factor2, @seqs, @pdbg_seqs, @MSP0, @array,
				$E_value, %final_stat_big_hash, @bases, $i, $j, $k);
		my $leng_thresh=10;
		$score_thresh1 = 73;
		my $simple_pdbg_read_opt='b';
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Arguments handling
		#_____________________________________
		for($i=0; $i< @_; $i++){
			 if($_[$i]=~/e=(\S+)/){          $E_value=$1;
					splice(@_, $i, 1); $i--;
			 }elsif(ref($_[$i]) eq 'ARRAY'){
					 @array=@{$_[$i]};
					 for($j=0; $j < @array; $j++){
							 if($array[$j]=~/(\S+)\.pdbg$/){         $input_file_base{$1}=$1;
							 }elsif($array[$j]=~/(\S+)\.mspa_?0$/){   $input_file_base{$1}=$1;
							 }
					 }
			 }elsif($_[$i]=~/(\S+)\.pdbg$/){
					 $input_file_base{$1}=$1;
			 }elsif($_[$i]=~/(\S+)\.mspa_?0$/){
					 $input_file_base{$1}=$1;
			 }elsif($_[$i]=~/m1=(\S+)/){          $E_mult_factor1=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/m2=(\S+)/){          $E_mult_factor2=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/t1=(\S+)/){          $score_thresh1=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/t2=(\S+)/){          $score_thresh2=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/o=(\S+)/){          $show_options=$1;
					splice(@_, $i, 1); $i--;
			 }
		}
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Handling options
		#_________________________________
		if($E_value=~/^\s*$/){
			 $E_value=5;
			 print "\n# WARNING: you did not set 'e=x.xxx' option default $E_value used\n";
		}
		if($E_mult_factor1 !~/\S/){  $E_mult_factor1 =1; }
		if($E_mult_factor2 !~/\S/){  $E_mult_factor2 =1; }
		if($show_options=~/o/){
			 print "\n#---- \$step          : $step";
			 print "\n#---- \$score_thresh1 : $score_thresh1";
			 print "\n#---- \$score_thresh2 : $score_thresh2\n";
		}


		@bases=keys %input_file_base;
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Main loop
		#__________________________________________
		for($i=0; $i< @bases; $i++){
				my(%mspa_0, %mspa_00, $score, $evalue, $enquiry, @keys0, @keys2, $j,
					 %stat, %stat2, $sum_correct, $sum_false, @non_dup, $base, $pdbg_file,
					 @seqs, @pdbg_seqs, $mspa_0_file, @MSP0, @keys, $k, %correct );
				$base=$bases[$i];
				$pdbg_file ="$base\.pdbg";
				$mspa_0_file="$base\.mspa0";
				print "\n# $base $pdbg_file $mspa_0_file\n" if $verbose;

				unless(-s $pdbg_file or -s $mspa_0_file){
					 print "\n", __LINE__, "# file is missing. I need xxx.pdbg, xxx.mspa_0\n\n";
				}

				@seqs=@pdbg_seqs= keys %{&open_pdbg_files($pdbg_file, $simple_pdbg_read_opt)};

				if(@pdbg_seqs < 2){
					 print "\n# too little sequneces @pdbg_seqs $pdbg_file\n";
					 die;
				}

				print "\n# Result of open_pdbg_files, \@seqs are        : @seqs \n" if $verbose;
				open(MSP0, "$mspa_0_file");
				@MSP0=<MSP0>;
				close(MSP0);

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# opening each mspa0 file
				#______________________________________
				for(@MSP0){
						if(/^(\S+)\s+(\S+)\s+\S*\s*(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
								# Length checking
								#__________________________________________________
								$seq_leng1=$4-$3;
								$seq_leng2=$7-$6;
								if($seq_leng1 < $leng_thresh or $seq_leng2 < $leng_thresh){
									 if($verbose){
											 print "\n# LENG $seq_leng1, $seq_leng2: $seq_leng1 $seq_leng2  $5 => $8 $1 $2 skipping\n";
											 next;
									 }
								}

								$score=$1;
								$evalue=$2;
								$enquiry=$5;
								$match_seq=$8;
								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
								#  Evalue checking
								#__________________________________________________
								if($evalue > $E_value ){
										if($enquiry=~/^(\S+)_\d+\-\d+/){
											 $mspa_0{"$1"} ="" unless $mspa_0{"$1"};
											 next;
										}else{
											 $mspa_0{"$enquiry"} ="" unless $mspa_0{"$enquiry"};;
											 next;
										}
								}

								if($score < $score_thresh1){     next;     }
								if($enquiry=~/^(\S+)_\d+\-\d+/){
										$mspa_0{"$1"} .="$match_seq ";
								}else{
										$mspa_0{"$enquiry"} .="$match_seq ";
										$mspa_00{join(' ', sort($enquiry, $match_seq))} = " $score $evalue";
								}
						}
				}
				%stat=%mspa_0;

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
				# filtering duplicates
				#____________________________________________
				@keys=keys %stat;
				for($k=0; $k< @keys; $k++){
						@split=split(/\s+/,$stat{$keys[$k]});
						@non_dup=@{&remove_dup_in_array(\@split)};
						for($j=0; $j<@non_dup; $j++){
								if($non_dup[$j]=~/^\s*$/){
										splice(@non_dup, $j, 1);       $j--;
										next;
								}
								if($non_dup[$j] eq $keys[$k]){
										splice(@non_dup, $j, 1);       $j--;
										next;
								}
						}
						$stat2{$keys[$k]}=join(' ', @non_dup);
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# Showing the actual matched sequences
				# %stat has following contents
				#    d1ash__ d1bam__ d1mba__ d2lhb__
				#    d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
				#    d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
				#______________________________________________________________________________
				if($verbose=~/v/){
					 @keys= sort keys %stat2;
					 for($k=0; $k< @keys; $k++){
							print "$keys[$k]: $stat2{$keys[$k]}\n";
					 }
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# Getting statistics
				#_________________________________________
				$evalue=$s;
				$E_mult_factor1=1;
				@output=@{&get_isearch_result_stat(\%stat2, \@pdbg_seqs, \$evalue,
									\$base, \$E_mult_factor1,  $leng_thresh, \%mspa_00)};
				%correct=%{$output[3]};
				%final_stat_big_hash=(%final_stat_big_hash, %correct);
				if($verbose){
						@keys=sort keys %correct;
						for($k=0; $k< @keys; $k++){
							 print "$keys[$k] $correct{$keys[$k]}\n";
						}
				}
		}
		return(\%final_stat_big_hash);
}


#________________________________________________________________________________
# Title     : get_scop_correcting_pairs
# Usage     : %correct=%{&get_scop_correcting_pairs()};
# Function  :
# Example   :
# Keywords  : get_pdb_correcting_pairs , correct_pairs_in_scop, correct_homology_pairs
# Options   :
# Category  :
# Version   : 1.4
#--------------------------------------------------------------------------------
sub get_scop_correcting_pairs{
		my (%correcting_pairs, @correcting_pairs);

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# %correcting_pairs is a correcting table for old pdb40d file classi
		#_____________________________________________________________________
		@correcting_pairs=(  # should be pairs
				'd2kauc1 d2kauc2', 'd1pkya1 d1pkya2',
				'd1pvda2 d1trka1', 'd1pbe_1 d1pbe_2',
				'd1poxa3 d1pvda2', 'd1efga1 d1efga2',
				'd1dsba1 d1dsba2', 'd2gsta1 d2gsta2',
				'd1bct__ d1brd__', 'd1qora1 d1qora2',
				'd2ohxa1 d2ohxa2', 'd1efga2 d1eft_1',
				'd1tada1 d1tada2', 'd1gsea1 d1gsea2',
				'd1gesa2 d2tmda3', 'd1lvl_2 d2tmda3',
				'd2tmda3 d2tpra2', 'd1tde_1 d2tmda3',
				'd1nhp_2 d2tmda3', 'd1gesa1 d2tmda3',
				'd1lvl_1 d2tmda3', 'd2tmda3 d2tpra1',
				'd1fcda1 d2tmda3', 'd1nhp_1 d2tmda3',
				'd1tde_2 d2tmda3', 'd1pbe_1 d2tmda3',
				'd1ebha1 d1ebha2', 'd1gesa2 d2dlda2', ## 3.4.1  with
				'd1gesa2 d1psda2', 'd1nhp_2 d2dlda2',
				'd1ldm_1 d1tde_2', 'd1coy_1 d1ldb_1',
				'd1lvl_2 d1psda2', 'd1psda2 d1tde_2',
				'd1hyha1 d1tde_2', 'd1fcda1 d1ldm_1',
				'd1hdca_ d1nhp_2', 'd1fcda1 d1hlpa1',
				'd1llda1 d1lvl_2', 'd2dlda2 d2tpra2',
				'd1ldm_1 d1nhp_2', 'd1llda1 d1pbe_1',
				'd1gdha2 d2tpra1', 'd1ldb_1 d1nhp_2',
				'd1gesa2 d1scua2', 'd1fcda1 d1hyha1',
				'd1gesa1 d1hlpa1', 'd1gdha2 d1gesa2',
				'd1lvl_2 d2dlda2', 'd1gesa1 d2dlda2',
				'd1nhp_2 d2ohxa2', 'd1tde_2 d2dlda2', # 3.4.1. with 3.18.1, 3.17.1.
				'd1nhp_1 d2cmd_1', 'd1fcda1 d1ldb_1',
				'd1lvl_1 d2ohxa2', 'd1nhp_2 d2naca2',
				'd1pbe_1 d2ohxa2', 'd1gdha2 d1nhp_2',
				'd2cmd_1 d2tpra1', 'd1tde_1 d2cmd_1',
				'd1llda1 d1nhp_2', 'd1hlpa1 d1nhp_2',
				'd1nhp_1 d2dlda2', 'd1hyha1 d1nhp_2',
				'd1nhp_2 d1psda2', 'd1fcda1 d2cmd_1',
				'd1fcda1 d1llda1', 'd1lvl_2 d1udpa_',
				'd1psda2 d2tpra2', 'd1hdca_ d1lvl_2',
				'd1gesa2 d1llda1', 'd1nhp_2 d1qora2',
				'd1ldm_1 d2tpra1', 'd1coy_1 d2dlda2',
				'd2dlda2 d2tpra1', 'd1hdca_ d1pbe_1',
				'd1coy_1 d1gdha2', 'd1nhp_2 d2cmd_1',
				'd1llda1 d1tde_1', 'd1llda1 d1lvl_1',
				'd1bdma1 d2tpra1', 'd1gd1o1 d2tpra2',
				'd1ldb_1 d1lvl_1', 'd1hlpa1 d1tde_2',
				'd1coy_1 d1psda2', 'd1nhp_2 d1udpa_',
				'd1llda1 d1tde_2', 'd1tde_2 d2cmd_1',
				'd1llda1 d2tpra2', 'd1ldb_1 d1tde_1',
				'd1coy_1 d1hlpa1', 'd1coy_1 d2cmd_1',
				'd1bdma1 d1gesa2', 'd1hyha1 d2tpra2',
				'd1gesa2 d1hyha1', 'd1gesa2 d2ohxa2',
				'd1ldb_1 d1tde_2', 'd1hlpa1 d1pbe_1',
				'd1ldm_1 d2tpra2', 'd2ohxa2 d2tpra1',
				'd1ldb_1 d2tpra2', 'd1gesa2 d1ldm_1',
				'd1lvl_2 d1qora2', 'd1gesa1 d2naca2',
				'd1coy_1 d1llda1', 'd1coy_1 d1hyha1',
				'd1coy_1 d1ldm_1', 'd1ldm_1 d1lvl_2',
				'd1eny__ d1nhp_2', 'd1pbe_1 d2pgd_2',
				'd1ldb_1 d1pbe_1', 'd1ldb_1 d1lvl_2',
				'd1gesa2 d1hlpa1', 'd1dhr__ d1nhp_2',
				'd1hdca_ d1tde_1', 'd1gesa1 d1psda2',
				'd1pbe_1 d2cmd_1', 'd1tde_2 d1udpa_',
				'd1pbe_1 d2dlda2', 'd1hdca_ d1tde_2',
				'd1gesa2 d1ldb_1', 'd1psda2 d2tpra1',
				'd1gdha2 d1lvl_2', 'd1tde_1 d2dlda2',
				'd1ldm_1 d1pbe_1', 'd1pbe_1 d1scua2',
				'd1gesa1 d2ohxa2', 'd1lvl_2 d2naca2',
				'd1gd1o1 d1lvl_1', 'd1fvl__ d1kst__',
				'd1kst__ d2ech__', 'd1hsaa2 d1std__', ## d1hsaa.. is NOT homol, but to fix a problem in E_100_e_0.0005_j30_segged_2092
				'd1afp__ d1hfi__'
				);

		 for($i=0; $i< @correcting_pairs; $i++){
										 $correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
		 }
		 return(\%correcting_pairs);
}

#__________________________________________________________________
# Title     : get_isearch_result_stat
# Usage     : &get_self_isearch_stat(\%stat2, \@pdbg_seqs, \$evalue);
# Function  :
# Example   : Following input (hash eg: %stat2, input with the first word as key)
#              will become columnar output.
#
#    d1ash__ d1bam__ d1mba__ d2lhb__
#    d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
#    d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
#
#    Will become:
#      ....
#      d1ash__ d2lhb__ Homolog: G1   98 0.012
#      d1baba_ d1flp__ Homolog: G1   82 0.072
#      d1baba_ d1hbg__ Homolog: G1   79 0.13
#      d1baba_ d2lhb__ Homolog: G1   228 8e-12
#      d1baba_ d3aaha_ Nomolog: G1   74 2
#      d1baba_ d3sdha_ Homolog: G1   92 0.012
#      d1cola_ d1hbg__ Nomolog: G1   79 0.59
#      d1cpca_ d1cpcb_ Homolog: G1   176 4.9e-08
#      ....
#
# Keywords  : get_stat_interm_search, get_intermediate_search_stat
# Options   : _  for debugging.
#             #  for debugging.
# Package   : Bio
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Returns   : [$av_correct, $num_enq_seq]
# Tips      :
# Argument  :
# Todo      :
# Author    : A Scientist
# Category  :
# Version   : 2.2
#-----------------------------------------------------------------------------
sub get_isearch_result_stat{
	my (@keys, $num_enq_seq, @pdbg_seqs_ori, $c, $d, $i, %correct_pairs,
	    $sum_correct, $sum_false, $match_seq, $percent_correct, $correct, @correct,
	    $av_correct, $av_false, $actual_e_value, $correct_matched,
	    %correcting_pairs, @correcting_pairs, %correct);

	my %seqs=%{$_[0]};
	my @pdbg_seqs=@{$_[1]};
	my $evalue=${$_[2]};
	my $pdbg_base=${$_[3]} || $ARGV[3];
	my $E_mult_factor1=${$_[4]};
	my $E_mult_factor2=${$_[4]};
				if(ref($_[5])){  $leng_thresh =${$_[5]}  }else{ $leng_thresh=$_[5]; }
	my %mspa_0=%{$_[6]};
	my %mspa_00=%{$_[7]};

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# %correcting_pairs is a correcting table for old pdb40d file classi
		#_____________________________________________________________________
		@correcting_pairs=(  # should be pairs
																'd2kauc1 d2kauc2',              'd1pkya1 d1pkya2',
																'd1pvda2 d1trka1',                              'd1pbe_1 d1pbe_2',
				'd1poxa3 d1pvda2',				'd1efga1 d1efga2',
				'd1dsba1 d1dsba2',				'd2gsta1 d2gsta2',
				'd1bct__ d1brd__',				'd1qora1 d1qora2',
				'd2ohxa1 d2ohxa2',				'd1efga2 d1eft_1',
				'd1tada1 d1tada2',				'd1gsea1 d1gsea2',
				'd1gesa2 d2tmda3',				'd1lvl_2 d2tmda3',
				'd2tmda3 d2tpra2',				'd1tde_1 d2tmda3',
				'd1nhp_2 d2tmda3',				'd1gesa1 d2tmda3',
				'd1lvl_1 d2tmda3',				'd2tmda3 d2tpra1',
				'd1fcda1 d2tmda3',				'd1nhp_1 d2tmda3',
				'd1tde_2 d2tmda3',				'd1pbe_1 d2tmda3',
				'd1ebha1 d1ebha2',				'd1gesa2 d2dlda2', ## 3.4.1  with 3.15.1
				'd1gesa2 d1psda2',				'd1nhp_2 d2dlda2',
				'd1ldm_1 d1tde_2',				'd1coy_1 d1ldb_1',
				'd1lvl_2 d1psda2',				'd1psda2 d1tde_2',
				'd1hyha1 d1tde_2',				'd1fcda1 d1ldm_1',
				'd1hdca_ d1nhp_2',				'd1fcda1 d1hlpa1',
				'd1llda1 d1lvl_2',				'd2dlda2 d2tpra2',
				'd1ldm_1 d1nhp_2',				'd1llda1 d1pbe_1',
				'd1gdha2 d2tpra1',				'd1ldb_1 d1nhp_2',
				'd1gesa2 d1scua2',        'd1fcda1 d1hyha1',
				'd1gesa1 d1hlpa1',        'd1gdha2 d1gesa2',
				'd1lvl_2 d2dlda2',        'd1gesa1 d2dlda2',
				'd1nhp_2 d2ohxa2',        'd1tde_2 d2dlda2',
				'd1nhp_1 d2cmd_1',        'd1fcda1 d1ldb_1',
				'd1lvl_1 d2ohxa2',        'd1nhp_2 d2naca2',
				'd1pbe_1 d2ohxa2',        'd1gdha2 d1nhp_2',
				'd2cmd_1 d2tpra1',        'd1tde_1 d2cmd_1',
				'd1llda1 d1nhp_2',        'd1hlpa1 d1nhp_2',
				'd1nhp_1 d2dlda2',        'd1hyha1 d1nhp_2',
				'd1nhp_2 d1psda2',        'd1fcda1 d2cmd_1',
				'd1fcda1 d1llda1',        'd1lvl_2 d1udpa_',
				'd1psda2 d2tpra2',        'd1hdca_ d1lvl_2',
				'd1gesa2 d1llda1',        'd1nhp_2 d1qora2',
				'd1ldm_1 d2tpra1',        'd1coy_1 d2dlda2',
				'd2dlda2 d2tpra1',        'd1hdca_ d1pbe_1',
				'd1coy_1 d1gdha2',        'd1nhp_2 d2cmd_1',
				'd1llda1 d1tde_1',        'd1llda1 d1lvl_1',
				'd1bdma1 d2tpra1',        'd1gd1o1 d2tpra2',
				'd1ldb_1 d1lvl_1',        'd1hlpa1 d1tde_2',
				'd1coy_1 d1psda2',        'd1nhp_2 d1udpa_',
				'd1llda1 d1tde_2',        'd1tde_2 d2cmd_1',
				'd1llda1 d2tpra2',        'd1ldb_1 d1tde_1',
				'd1coy_1 d1hlpa1',        'd1coy_1 d2cmd_1',
				'd1bdma1 d1gesa2',        'd1hyha1 d2tpra2',
				'd1gesa2 d1hyha1',        'd1gesa2 d2ohxa2',
				'd1ldb_1 d1tde_2',        'd1hlpa1 d1pbe_1',
				'd1ldm_1 d2tpra2',        'd2ohxa2 d2tpra1',
				'd1ldb_1 d2tpra2',        'd1gesa2 d1ldm_1',
				'd1lvl_2 d1qora2',        'd1gesa1 d2naca2',
				'd1coy_1 d1llda1',        'd1coy_1 d1hyha1',
				'd1coy_1 d1ldm_1',        'd1ldm_1 d1lvl_2',
				'd1eny__ d1nhp_2',        'd1pbe_1 d2pgd_2',
				'd1ldb_1 d1pbe_1',        'd1ldb_1 d1lvl_2',
				'd1gesa2 d1hlpa1',        'd1dhr__ d1nhp_2',
				'd1hdca_ d1tde_1',        'd1gesa1 d1psda2',
				'd1pbe_1 d2cmd_1',        'd1tde_2 d1udpa_',
				'd1pbe_1 d2dlda2',        'd1hdca_ d1tde_2',
				'd1gesa2 d1ldb_1',        'd1psda2 d2tpra1',
				'd1gdha2 d1lvl_2',        'd1tde_1 d2dlda2',
				'd1ldm_1 d1pbe_1',        'd1pbe_1 d1scua2',
				'd1gesa1 d2ohxa2',        'd1lvl_2 d2naca2',
				'd1gd1o1 d1lvl_1'
				);


		for($i=0; $i< @correcting_pairs; $i++){
				$correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
		}
	if($E_mult_factor1=~/^\s*$/){ $E_mult_factor1=1; };


	@keys=sort keys %seqs;
	@keys=@{&strip_sequence_ranges(\@keys)};
	@keys=@{&remove_dup_in_array(\@keys)};
	@pdbg_seqs_ori=@pdbg_seqs;
	$num_enq_seq=@pdbg_seqs;
	print "\n# In get_isearch_result_stat: PDBG seqs $num_enq_seq \n=> @pdbg_seqs\n\n" if $verbose;

	#@pdbg_seqs=@{&strip_sequence_ranges(\@pdbg_seqs)};
	#@pdbg_seqs=@{&remove_dup_in_array(\@pdbg_seqs)};

	if($num_enq_seq < 2){ print "\n# \$num_enq_seq is less than 2 @pdbg_seqs $base\n"; die; }

	for($c=0; $c < @keys; $c++){
	   my($enq_seq, $correct, $false_positive);
	   $num_of_matched=@match_seqs=split(/\s+/, $seqs{$keys[$c]});
			 $enq_seq=$keys[$c];

	   for($d=0; $d< @match_seqs; $d++){
								my($correct_matched, @sorted);

								$match_seq=$match_seqs[$d];
																			$sorted=join(' ', sort ($enq_seq, $match_seq) );

								for($i=0; $i< @pdbg_seqs; $i++){
										 if($match_seq =~/d?$pdbg_seqs[$i]/i or $correcting_pairs{$sorted} ){
													print "\n# \$match_seq = $match_seq, \$pdbg_seqs $pdbg_seqs[$i] \$enq_seq: $enq_seq\n"  if $verbose;
													$correct++;
													$correct_matched=1;
													unless($correct{$sorted}){
																	 $correct_group{$base} .="Homolog: $sorted $base  $mspa_0{$sorted}\n";
													}
													$correct{$sorted} = "Homolog: $base  $mspa_0{$sorted}";
										 }
								}
								if($correct_matched !=1){
										 $false_positive++;
										 unless( $correct{$sorted} ){
													$correct_group{$base} .="Nomolog: $sorted $base  $mspa_0{$sorted}\n";
										 }
										 $correct{$sorted} = "Nomolog: $base  $mspa_0{$sorted}";
								}
	   }
					 if(@match_seqs == 0){ @match_seqs=1; $percent_correct=0; }
	   $sum_correct += $correct;
	   $sum_false   += $false_positive;
	}
	$av_correct = $sum_correct/$num_enq_seq;
	$av_false   = $sum_false  /($num_enq_seq);

	#### $actual_e_value becomes whatever $E_mult_factor1 defined ~~~~~~~~~~~~
	if($E_mult_factor1 != 1){
	   $actual_e_value=$evalue * $E_mult_factor1;
	}elsif($E_mult_factor2 != 1){
	   $actual_e_value= $evalue * $E_mult_factor2;
	}else{ $actual_e_value=$evalue }

	$num_enq_seq--;
	$sum_correct_for_additional = $num_enq_seq+1;
	$match_count=$sum_correct_for_additional * $av_correct;
	#$sum_correct= $sum_correct_for_additional;
	if($verbose){
					 printf ("%-10s %-12s %-13f %-13f %-7s %-7s %-7s %-7s %-4s\n", $pdbg_base,
		$actual_e_value, $av_correct, $av_false, $num_enq_seq,
		$sum_correct_for_additional, $sum_false, $match_count, $leng_thresh);
	}

	@correct_new=@{&remove_dup_in_array(\@correct_new)};
	for($i=0; $i< @correct_new; $i++){
	    print "\n# correct new: $correct_new[$i]" ;
	}
	$num_correct=$match_count/2;

	print "Num of non-reflective correcct:  $num_correct  Nomolog: $sum_false  \n\n" if $verbose;
	return([$av_correct, $sum_correct, $num_enq_seq, \%correct, \%correct_group]);
}



#__________________________________________________________________
# Title     : strip_sequence_ranges
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : remove_sequence_ranges, remove_sequence_name_ranges,
#             remove_ranges_in_sequences, strip_sequence_name_ranges,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub strip_sequence_ranges{
		my (@out, $i);
		my @in=@{$_[0]} or @in=@_;
		for($i=0; $i< @in; $i++){
				if($in[$i]=~/^(\S+)_\d+\-\d+/){
	     push(@out, $1);
	}else{
	     push(@out, $in[$i]);
				}
		}
		return(\@out);
}


#______________________________________________________________
# Title     : delete_files
# Usage     : delete_files *.zip  (delete files except xxxx.zip)
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub delete_files{
    my ($i, @del_files);
    @del_files=@{$_[0]} || @_;
	for($i=0; $i< @del_files; $i++){
	    if(-d $del_files[$i]){
	        next;
		}else{
            unlink($del_files[$i]);
		    print "\n>> $del_files[$i] has been deleted";
	    }
	}
}


#______________________________________________________________________________
# Title     : open_DOMSEL_file
# Usage     :
# Function  :
# Example   : DOMSEL_file is: ~/Proj/Bio/3Dserver/domsel-y
#             1waj(10,9,7,5,21,20)
#             1bdg(4,5,3)
#             1bg3A(6,7,5,22,23,21)
#             1hjrA(2)
#             1tgoA(6,8,11,12,10,13,21,22)
#
#    Assigned this way>>
#           $DOMSEL_hash{$structure}=[@domains];
#
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub open_DOMSEL_file{
    my($DOMSEL_file, %DOMSEL_hash, $structure, @domains);
    $DOMSEL_file=${$_[0]} || $_[0];
    open(DOMSEL, "<$DOMSEL_file") || die "\n open_DOMSEL_file: Can not open $DOMSEL_file \n\n";
    while(<DOMSEL>){
       if(/^\s*(\S+)\(([\d+\,]*)\)/){
          $structure=$1;
          @domains=split(/\,/, $2);
       }
       $DOMSEL_hash{$structure}=[@domains];
    }
    close(DOMSEL);
    return(\%DOMSEL_hash);
}

#____________________________________________________________________
# Title     :  open_psipred_files
# Usage     :
# Function  : gets sec. str. prediction of psipred(sec. str pred program)
#             and puts in hash If 's' option is given, it also gives
#             sequence hash ref as the second output ref.
#             This can handle the 2 types of output format of
#             psipred. So, the output can will be different according
#             to inputs.
# Example   :
#
#   Conf: Confidence (0=low, 9=high)
#   Pred: Predicted secondary structure (H=helix, E=strand, C=coil)
#     AA: Target sequence
#
#   Conf: 988776544676303321158888868999999999999999987615763468723587
#   Pred: CCCCCCCCCCCCCHHHHHCCCCCCCHHHHHHHHHHHHHHHHHHHHHHCCCCCCCCEECCC
#     AA: MSNPGDVRPVPHRSKVCRCLFGPVDSEQLRRDCDALMAGCLQEARERWNFDFVTETPLEG
#                 10        20        30        40        50        60
#
# Warning   :
# Keywords  : open_PSIPRED_files, open_PSI-PRED_files, psipred, open_psi_pred_files{
#             open_pred_files, secondary structure prediction file
# Options   : 's' for sequence output as well (\%sec_str, \%seq)
# Options   : 's' for sequence output as well (\%sec_str, \%seq)
#             'p' for percentage of the sec. str.
#             'a' for accumulated percentage. This will
#                  set 'p' automatically
#             'n' for NO name when outputing Percentage of chars with
#                 HASH input to get_occurances_of_char sub.
#    $reverse_residue_order=r by r
#    $use_universal_seq_hash_format=u by u -u
#    $consider_top_X_percent_only= by T=
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#-----------------------------------------------------------
sub open_psipred_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my( @out_ref, $seq_out, %sec_str, %seq, $percent_out, $NO_name_out,
        $short_form_out_detected, $long_form_out_detected, $accumulate, $sequence,
        $reverse_residue_order, %rev_sec_str, $use_universal_seq_hash_format,
        @seq, @sec_str, $sec_str, $secondary_structure, @reliability_sorted,
        $num_of_diff_pred_for_both_methods, %mismatch_stats, $num_of_residues,
        $name_found, $consider_top_X_percent_only, @posi_under_threshold_arr,
        @residues_array, $top_X_perc_sec_str);
    $consider_top_X_percent_only=100; # the default
    if($vars{'T'}=~/(\d+)/i){ $consider_top_X_percent_only=$1; }
    if($char_opt=~/s/i){ $seq_out=1 }
    if($char_opt=~/a/i){ $accumulate=1  }
    if($char_opt=~/p/i){ $percent_out=1 }
    if($char_opt=~/n/i){ $NO_name_out='n' }
    if($char_opt=~/r/){  $reverse_residue_order='r' }
    if($char_opt=~/u/){  $use_universal_seq_hash_format='u'; print "\n# (i) \$use_universal_seq_hash_format is set";}

    for($i=0; $i< @file; $i++){
       my (%sec_str, $sec_str, %seq) if($accumulate !=1);
       my ($secondary_structure, $cut_off_position, @reliability, $top_X_perc_sec_str,
           @posi_under_threshold_arr, $cut_off_relia_value, $sequence, @sec_str,
           $line_counter, $residue_num_count, $reliability);

       open(PSIPRED_FILE, "$file[$i]");
       $name=${&get_base_names($file[$i])};
       print "\n# (INFO) open_psipred_files: opening $file[$i]";
       while(<PSIPRED_FILE>){
             $line_counter++;
             if($line_counter < 4){ next }
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
             # Simple sec str input form
             #________________________________________________
             if(/^Conf:\s+(\d+)$/){
                $reliability .=$1;
             }elsif(/Pred:\s+([CHE]+)$/){
                $sec_str.=$1;
             }elsif(/AA:\s+(\S+)$/){
                $sequence.=$1;
             }
       }
       close (PSIPRED_FILE);
       push(@residues_array,  split(//, $sequence));
       $residue_num_count=@residues_array;
       push(@reliability, split(//, $reliability));
       @sec_str=split(//, $sec_str);
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Making a simple HASH style output(OLD form)
       #_______________________________________________________
       for($i=0; $i<@residues_array; $i++){
           $position=$i+1;
           $sec_str{$position}=[$residues_array[$i], $sec_str[$i],
                                $reliability[$i], $name, $position];
           if( $use_universal_seq_hash_format ){  $residues ={$position, $residue_1_letter};  }
       }
       $num_of_residues=@residues_array;

      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # Considering only certain high ranking predictions (this is to make the same sample size as commonly predicted region
      #______________________________________________________________________________________________________________________
      if($consider_top_X_percent_only < 100){  # as default $consider_top_X_percent_only has 100
         my ($exclusion_count, @top_X_perc_sec_str, @reliability_sorted, $cut_off_position, $cut_off_relia_value);
         @reliability_sorted  = sort {$a<=>$b} @reliability;
         $cut_off_position    = int( $num_of_residues *(1-$consider_top_X_percent_only/100) );
         $cut_off_relia_value = $reliability_sorted[$cut_off_position];
         print  "\n\$cut_off_relia_value $cut_off_relia_value \$cut_off_position $cut_off_position \/$residue_num_count\n";
         for($j=0; $j< @sec_str; $j++){
            if($reliability[$j] <= $cut_off_relia_value or $reliability[$j] == 0){
                $exclusion_count++;
                if($exclusion_count >= $cut_off_position){
                    $top_X_perc_sec_str[$j]=$sec_str[$j];
                }else{
                    $top_X_perc_sec_str[$j]='-'; ## strike out
                    push(@posi_under_threshold_arr, $j);
                }
            }elsif($reliability[$j] > $cut_off_relia_value){
                $top_X_perc_sec_str[$j]=$sec_str[$j];
            }else{    print "\n# Error ,,, "; exit;      }
         }
         $top_X_perc_sec_str=join('', @top_X_perc_sec_str);
     }
     if($reverse_residue_order){
         $secondary_structure   =[reverse @sec_str];
         $prediction_reliability=[reverse @reliability];
         $sequence=reverse($sequence);
         $sec_str =reverse($sec_str);
     }else{
         $secondary_structure=[@sec_str];
         $prediction_reliability=[@reliability];
     }
     $matched_prediction_count=$residue_num_count-$num_of_diff_pred_for_both_methods;

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # This is the general Seq object format
     #_______________________________________________
     if( $use_universal_seq_hash_format ){
        @seq=split(//, $sequence);
        for($s=0; $s< @sec_str; $s++){
            $position=$s+1;
            $residues ={$position, $seq[$s]};
        }
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
        # Definition of Seq information detail, Ver 1.5
        #______________________________________________________________________________
        %sec_str=('name'                    =>$name,
                  'ID'                      =>$name,
                  'PDB_chain_number'        =>$chain,
                  'chain_number'            =>$chain,
                  'protein_name'            =>$name,
                  'seq_name'                =>$name,
                  'top_X_perc_sec_str'      =>$top_X_perc_sec_str,
                  'residue_hash'            =>$residues,
                  'residue_array'           =>[@residues_array],
                  'residue_string'          =>$sequence,
                  'sec_str_array'           =>$secondary_structure,
                  'sec_str_string'          =>$sec_str,
                  'sec_str'                 =>$sec_str,
                  'mol_type'                =>'protein',
                  'type'                    =>'protein',
                  'prediction_reliability'  =>$prediction_reliability,
                  'cut_off_reliability_val' =>$cut_off_relia_value,
                  'prediction'              =>'prediction',
                  'sequence'                =>"$sequence",
                  'seq_string'              =>$sequence,
                  'structure_assignment'    =>0,
                  'matched_prediction_count'   =>$matched_prediction_count,
                  'mismatched_prediction_count'=>$num_of_diff_pred_for_both_methods,
                  'num_of_diff_pred_for_both_methods'=>$num_of_diff_pred_for_both_methods,
                  'num_of_residues'         =>$num_of_residues,
                  'size_of_sequence'        =>$num_of_residues,
                  'mismatch_stats'          =>{%mismatch_stats},
                  'reverse_residue_order'   =>$reverse_residue_order,
                  'posi_under_threshold_arr'=>[@posi_under_threshold_arr]
                  ); # this is not a struc. assignment as in DSSP
        #__________________________________________________________________________________
     }
	 if($seq_out==1){ push(@out_ref, \%sec_str, \%seq);
	 }elsif($percent_out==1 ){
	      push(@out_ref, [%{&get_occurances_of_char(\%sec_str, $NO_name_out, 'p')}] );
	 }elsif($percent_out !=1){ push(@out_ref, \%sec_str) }

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
         # If -r option is set
         #____________________________________________________________
         if($reverse_residue_order){
              @keys=keys %sec_str;
              for($r=0; $r<@keys; $r++){
                  $rev_sec_str{$keys[$r]}=reverse($sec_str{$keys[$r]});
              }
              %sec_str=%rev_sec_str;
         }
    }
    if(@out_ref==1){
       return($out_ref[0]);
    }elsif(@out_ref>1){
       return(@out_ref);
    }
}




#______________________________________________________________________________
# Title     : open_protein_structural_domain_interact_pair_file
# Usage     :
# Function  :
# Example   :
# Keywords  : open_PDIP_file open_protein_domain_interaction_pair_file
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.5
#------------------------------------------------------------------------------
sub open_protein_structural_domain_interact_pair_file{
    my($previous_interact_file, %protein_str_dom_interact, @contact_pair,
       %total_PDB_entry, $total_PDB_entry);
    $previous_interact_file=$_[0];
    if(-s $previous_interact_file > 100){
       open(CONTACT_LIST, $previous_interact_file) || die;
       while(<CONTACT_LIST>){
          if(/^PROT_INTERACT\s+(\S(\S+))\s+(\S(\S+))/ or /^INTERACT\s+(\S(\S+))\s+(\S(\S+))/ or
             /^PROTEIN_INTERACT\s+(\S(\S+))\s+(\S(\S+))/){
              @contact_pair=sort($1, $3);
              $total_PDB_entry{$2}++; $total_PDB_entry{$4}++;
              $protein_str_dom_interact{'PROTEIN_INTERACT'}{"@contact_pair"}="@contact_pair";
          }elsif(/^NO\S*INTERACT\s+(\S(\S+))\s+(\S(\S+))/ or /^NONINTERACT\s+(\S(\S+))\s+(\S(\S+))/
              or /^NO_PROTEIN_INTERACT\s+(\S(\S+))\s+(\S(\S+))/){
              @contact_pair=sort($1, $3);
              $protein_str_dom_interact{'PROTEIN_NONINTERACT'}{"@contact_pair"}="@contact_pair";
              $total_PDB_entry{$2}++; $total_PDB_entry{$4}++;
          }
       }
    }else{
       print "\n \$previous_interact_file $previous_interact_file does not exist or very small \n";
    }
    $total_PDB_entry=keys %total_PDB_entry;
    print "\n#  \$total_PDB_entry is $total_PDB_entry <- \&open_protein_structural_domain_interact_pair_file\n\n";
    return(\%protein_str_dom_interact);
}

#______________________________________________________________________________
# Title     : open_SCOP_domain_definition_file
# Usage     :
# Function  :
# Example   :
#     d1abya1 1aby    a:1-142 1.001.001.001.001.016
#     d1abya2 1aby    a:143-283       1.001.001.001.001.016
#     d1rvwa_ 1rvw    a:      1.001.001.001.001.016
#     d1ithb_ 1ith    b:      1.001.001.001.001.045
#     d1hlb__ 1hlb    -       1.001.001.001.001.046
# Keywords  : open_scop_domain_definition_file open_scop_file
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   : (\%interacting_PDB_domains, \%PROT_STR_DOM_lookup_hash, \%PROT_STR_DOM_class_hash)
# Version   : 1.8
#------------------------------------------------------------------------------
sub open_SCOP_domain_definition_file{
    my($SCOP_domain_def_file, $dom_def, $PDB_ID, $PROT_STR_DOM_name,
       $classification, %interacting_PDB_domains, %PROT_STR_DOM_lookup_hash, %PROT_STR_DOM_class_hash,
       %known_interactions, $domain_num, %all_PDB_entry, $single_domain_PDB);
    $SCOP_domain_def_file=$_[0];
    open(SCOP_DOM_DEF, "$SCOP_domain_def_file") || die;
    while(<SCOP_DOM_DEF>){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # I ignore '0lpc' like entry in the following pattern matching
       #_______________________________________________________________
       if(/^(\S+)\s+(\S+)\s+\-\s+(\S+)/){
          $single_domain_PDB++;
       }elsif(/^(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/){
          $PROT_STR_DOM_name=$1;
          $PDB_ID=$2;
          $dom_def=$3;
          $classification=$4;
          #$PROT_STR_DOM_name=~s/\./_/g; ## changing . to _
          if($PDB_ID =~/^0\S+/){ next  }
          $all_PDB_entry{$PDB_ID}=$PDB_ID;
          push(@{$interacting_PDB_domains{$PDB_ID}}, $dom_def);
          $PROT_STR_DOM_lookup_hash{"$PDB_ID$dom_def"}=$PROT_STR_DOM_name;
          $PROT_STR_DOM_class_hash{$PROT_STR_DOM_name}=$classification;
       }
    }
    print "\n(i) There were $single_domain_PDB \$single_domain_PDB proteins <- open_SCOP_domain_definition_file\n";
    return(\%interacting_PDB_domains, \%PROT_STR_DOM_lookup_hash, \%PROT_STR_DOM_class_hash, \%all_PDB_entry);
}



#___________________________________________________________________________________________________
# Title     : open_PDB_atom_files
# Usage     : %Atom_coord=%{&open_PDB_atom_files($ARGV[0])};
#             @keys= map {$_->[0]} sort {$a->[1] <=> $b->[1]} map{[$_, /(\S+)/]} keys %Atom_coord;
# Function  :
# Example   :
#      ATOM      1  N   LYS A   2      -3.152  12.128  50.432  1.00 48.77      1GDH 177
#      ATOM      2  CA  LYS A   2      -2.090  12.595  49.538  1.00 49.58      1GDH 178
#      ATOM      3  C   LYS A   2      -1.272  11.417  49.018  1.00 49.32      1GDH 179
#      ATOM      4  O   LYS A   2      -0.596  10.717  49.790  1.00 50.21      1GDH 180
#      ATOM      5  CB  LYS A   2      -1.167  13.596  50.250  1.00 49.34      1GDH 181
#      ATOM      6  CG  LYS A   2      -1.478  15.056  49.941  1.00 48.84      1GDH 182
#      ATOM      7  N   LYS A   3      -1.400  11.155  47.720  1.00 43.99      1GDH 183
#      ATOM      8  CA  LYS A   3      -0.719  10.028  47.117  1.00 37.29      1GDH 184
#      ATOM      9  C   LYS A   3       0.749  10.340  47.110  1.00 32.72      1GDH 185
#      ATOM     10  O   LYS A   3       1.132  11.466  47.353  1.00 30.49      1GDH 186
#      ATOM     11  CB  LYS A   3      -1.227   9.806  45.706  1.00 36.99      1GDH 187
#      ATOM     12  CG  LYS A   3      -2.696   9.436  45.674  1.00 39.38      1GDH 188
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#---------------------------------------------------------------------------------------------------
sub open_PDB_atom_files{
    my ($input, $pdb_name, $i, $j, $keys, $temp,$outseq, $SEQRES, $res_numb, $structure, %ATOM, @fields,
        %AA, @residues);
    if(ref($_[0]) eq 'SCALAR'){ $input=${$_[0]}; }else{  $input=$_[0]; }
    $AA{"ALA"} = "A";  $AA{"MET"} = "M";  $AA{"ASP"} = "D";  $AA{"PRO"} = "P";
    $AA{"CYS"} = "C";  $AA{"ASN"} = "N";  $AA{"GLU"} = "E";  $AA{"GLN"} = "Q";
    $AA{"PHE"} = "F";  $AA{"ARG"} = "R";  $AA{"GLY"} = "G";  $AA{"SER"} = "S";
    $AA{"HIS"} = "H";  $AA{"THR"} = "T";  $AA{"ILE"} = "I";  $AA{"VAL"} = "V";
    $AA{"LYS"} = "K";  $AA{"TRP"} = "W";  $AA{"LEU"} = "L";  $AA{"TYR"} = "Y";
    open(INPUT_PDB_FILE, "$input") || die "\n Can not open PDB Atom files \n";
    while (<INPUT_PDB_FILE>){
       my($chain, $seq_size, $residues);
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #    ATOM     1       N      LYS      A          2    -3.152  12.128   50.432   1.00   48.77
       #____________________________________________________________________________________________
       if(/^ATOM\s+(\d+)\s+(\w+)\*?\s+\+?(\w+)\s+(\S)\s+(\-?\d+)\S*\s+(\S+)\s+(\S+)\s+(\S+)\s+/){ ## NOTE \s*
          $atom_number=$1;
          $element_type=$2;
          $SEQRES = "\L$3";
          $chain =$4;
          $residue_number=$5;
          $x=$6;
          $y=$7;
          $z=$8;
          $pdb_name="\L$11";
          $ATOM{"$atom_number $element_type"}=[$x, $y, $z];
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #        ATOM     1       N      LYS     2     -3.152  12.128   50.432  1.00    48.77
       #____________________________________________________________________________________________
       }elsif(/^ATOM\s+(\d+)\s+(\w+)\*?\s+\+?(\w+)\s+(\-?\d+)\S*\s+(\S+)\s+(\S+)\s+(\S+)\s+/){ ## NOTE \s*
          $atom_number=$1;
          $element_type=$2;
          $SEQRES = "\L$3";
          $residue_number=$4;
          $x=$5;
          $y=$6;
          $z=$7;
          $ATOM{"$atom_number $element_type"}=[$x, $y, $z];
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #        ATOM     1       N      LYS             A2187          -3.152  12.128   50.432   1.00   48.77
       #____________________________________________________________________________________________
       }elsif(/^ATOM\s+(\d+)\s+(\w+)\*?\s+\+?(\w+)\s+([^\d])(\-?\d+)\S*\s+(\S+)\s+(\S+)\s+(\S+)\s+/){ ## NOTE \s*
          $atom_number=$1;
          $element_type=$2;
          $SEQRES = "\L$3";
          $chain =$4;
          $residue_number=$5;
          $x=$6;
          $y=$7;
          $z=$8;
          $pdb_name="\L$11";
          $ATOM{"$atom_number $element_type"}=[$x, $y, $z];
       }elsif(/^ATOM/){
          die "\n$_\n (E) open_PDB_atom_files_for_domains ($input), something is wrong in pattern matching \n";
       }
	}
	close(INPUT_PDB_FILE);
	return( \%ATOM );
}



#___________________________________________________________________________________________________
# Title     : open_PDB_atom_files_for_domains
# Usage     : %Atom_coord=%{&open_PDB_atom_files($ARGV[0])};
#
# Function  :
# Example   :
#      ATOM      1  N   LYS A   2      -3.152  12.128  50.432  1.00 48.77      1GDH 177
#      ATOM      2  CA  LYS A   2      -2.090  12.595  49.538  1.00 49.58      1GDH 178
#      ATOM      3  C   LYS A   2      -1.272  11.417  49.018  1.00 49.32      1GDH 179
#      ATOM      4  O   LYS A   2      -0.596  10.717  49.790  1.00 50.21      1GDH 180
#      ATOM      5  CB  LYS A   2      -1.167  13.596  50.250  1.00 49.34      1GDH 181
#      ATOM      6  CG  LYS A   2      -1.478  15.056  49.941  1.00 48.84      1GDH 182
#      ATOM      7  N   LYS A   3      -1.400  11.155  47.720  1.00 43.99      1GDH 183
#      ATOM      8  CA  LYS A   3      -0.719  10.028  47.117  1.00 37.29      1GDH 184
#      ATOM      9  C   LYS A   3       0.749  10.340  47.110  1.00 32.72      1GDH 185
#      ATOM     10  O   LYS A   3       1.132  11.466  47.353  1.00 30.49      1GDH 186
#      ATOM     11  CB  LYS A   3      -1.227   9.806  45.706  1.00 36.99      1GDH 187
#      ATOM     12  CG  LYS A   3      -2.696   9.436  45.674  1.00 39.38      1GDH 188
# Warning   :
# Keywords  : open_PDB_atom_files_for_chains
# Options   :
# Returns   :  $ATOM{"$chain\_$region_$region"}{$atom_number}=[$x, $y, $z];
# Argument  :
# Category  :
# Version   : 1.7
#---------------------------------------------------------------------------------------------------
sub open_PDB_atom_files_for_domains{
    my ($input, $pdb_name, $i, $j, $keys, $temp,$outseq, $SEQRES, $res_numb, $structure, %ATOM,
        $region, @fields, %AA, @residues, $reg_start, @region, $range_stop, $i, $residue_number);
    if(ref($_[0]) eq 'SCALAR'){ $input=${$_[0]}; }else{  $input=$_[0]; }
    if(ref($_[1]) eq 'SCALAR'){ $region=${$_[1]}; }else{  $region=$_[1]; }
    if($region){ @region=$region=~/(\d+)\-(\d+)/g; }

    open(INPUT_PDB_FILE, "$input") || warn "\n#open_PDB_atom_files_for_domains: Can not open PDB Atom files \n";

    while (<INPUT_PDB_FILE>){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #   ATOM      1       N            LYS      A         2       -3.152  12.128   50.432
       #____________________________________________________________________________________________
       if(/^ATOM\s+(\d+)\s+(\w+)\*?\s+\+?(\w+)\s+(\S)\s+(\-?\d+)\S*\s+(\S+)\s+(\S+)\s+(\S+)\s+/){ ## NOTE \s*
          $atom_number=$1;
          $element_type=$2;
          $SEQRES = "\L$3";
          $chain ="\L$4";    ##<----- I change it to lowercase.
          $residue_number=$5;
          $x=$6;  $y=$7;    $z=$8;
          $pdb_name="\L$11";
          if(@region){
             for($i=0; $i< @region; $i+=2){
                 $range_start=$region[$i]; $range_stop=$region[$i+1];
                 if($residue_number >= $range_start and $residue_number <= $range_stop){
                     push(@{$ATOM{"$chain:$region"}{$residue_number}}, $x, $y, $z);
                 }
             }
          }else{
             push(@{$ATOM{$chain}{$residue_number}}, $x, $y, $z);
          }
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #        ATOM     1       N            LYS        2      -3.152  12.128   50.432  1.00    48.77
       #____________________________________________________________________________________________
       }elsif(/^ATOM\s+(\d+)\s+(\w+)\*?\s+\+?(\w+)\s+(\-?\d+)\S*\s+(\S+)\s+(\S+)\s+(\S+)\s+/){ ## NOTE \s*
          $atom_number=$1;
          $element_type=$2;
          $SEQRES = "\L$3";
          $residue_number=$4;
          $x=$5;  $y=$6;   $z=$7;
          if(@region){
             for($i=0; $i< @region; $i+=2){
                 $range_start=$region[$i]; $range_stop=$region[$i+1];
                 if($residue_number >= $range_start and $residue_number <= $range_stop){
                    push(@{$ATOM{"NO_CHAIN:$region"}{$residue_number}}, $x, $y, $z);
                 }
             }
          }else{
             push(@{$ATOM{'NO_CHAIN'}{$residue_number}}, $x, $y, $z);
          }
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #        ATOM     1       N            LYS          B2187          -3.152  12.128   50.432   1.00   48.77
       #____________________________________________________________________________________________
       }elsif(/^ATOM\s+(\d+)\s+(\w+)\*?\s+\+?(\w+)\s+([^\d])(\-?\d+)\S*\s+(\S+)\s+(\S+)\s+(\S+)\s+/){ ## NOTE \s*
          $atom_number=$1;
          $element_type=$2;
          $SEQRES = "\L$3";
          $chain ="\L$4";    ##<----- I change it to lowercase.
          $residue_number=$5;
          $x=$6;  $y=$7;  $z=$8;
          if(@region){
             for($i=0; $i< @region; $i+=2){
                 $range_start=$region[$i]; $range_stop=$region[$i+1];
                 if($residue_number >= $range_start and $residue_number <= $range_stop){
                    push(@{$ATOM{"$chain:$region"}{$residue_number}}, $x, $y, $z);
                 }
             }
          }else{
             push(@{$ATOM{$chain}{$residue_number}}, $x, $y, $z);
          }
       }elsif(/^TER/){
          $residue_number=0;
       }elsif(/^ATOM/){
          die "\n$_\n (E) open_PDB_atom_files_for_domains ($input), something is wrong in pattern matching \n";
       }
	}
	close(INPUT_PDB_FILE);
	return(\%ATOM);
}




#______________________________________________________________________________
# Title     : open_list_file
# Usage     :
# Function  :
# Example   :
# Keywords  : open_list_file_HASH
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub open_list_file{
    my($list_file, %list);
    $list_file=${$_[0]} || $_[0];
    open(LIST_FILE, "$list_file") || die "\n Can not open $list_file \n";
    while(<LIST_FILE>){
       if(/(\S+)/){
          $list{$1}=$1;
       }
    }
    close(LIST_FILE);
    return(\%list);
}


#______________________________________________________________________________
# Title     : open_list_file_ARRAY
# Usage     :
# Function  :
# Example   :
# Keywords  : open_list_file_array
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub open_list_file_ARRAY{
    my($list_file, @list);
    $list_file=${$_[0]} || $_[0];
    open(LIST_FILE, "$list_file") || die "\n Can not open $list_file \n";
    while(<LIST_FILE>){
       if(/(\S+)/){
          push(@list, $1);
       }
    }
    close(LIST_FILE);
    return(\@list);
}



#______________________________________________________________________________
# Title     : open_trivial_taxonomy_file
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub open_trivial_taxonomy_file{
    my(%keep_orig, $trivial_taxonomy_file, @list,
       $hid,$lseq,$species,$ppid,$taxon,$name,$unk,$rep);
    $trivial_taxonomy_file=${$_[0]} || $_[0];

    open(IN, "$trivial_taxonomy_file") || die;
    while(<IN>) {
        chop;
        ($hid,$lseq,$species,$ppid,$taxon,$name,$unk,$rep)=split(/\t+/);
        next if(!defined($rep));
        if(!defined($keep_orig{$rep})) { push(@list, $rep); }
        $keep_orig{$rep}=1;
    }
    close(IN);
    return(\%keep_orig);
}





#______________________________________________________________________________
# Title     : open_MRCS_file
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.3
#------------------------------------------------------------------------------
sub open_MRCS_file{
    my(%MRCS_hash, $i, $MRCS_file, $query_seq, $target_seq, $openning_gap_penalty,
       $elongation_gap_penalty, $algorithm, $ForBackScore, $ForScore,
       $BackScore, $MutualSeqID, $Alignment_leng, $sorted_paired_seqs,
       $forward_search_area_end);
    $MRCS_file=${$_[0]} || $_[0];
    open(MRCS_FILE_HANDLE, "$MRCS_file") || die "\n Cannt open $MRCS_file \n";
    while(<MRCS_FILE_HANDLE>){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # #<<START>>d1acf__ d1aco_1 OpenGap:10, ExtGap:1, Algo:lss_dipeptide
       #_______________________________________________________________________
       if(/START\>+(\S+)\s+(\S+)\s+OpenGap:(\d+)\,\s+ExtGap:(\d+)\,\s+Algo:(\S+)/){
           $query_seq=$1;
           $target_seq=$2;
           $openning_gap_penalty=$3;
           $elongation_gap_penalty=$4;
           $algorithm=$5;
           $forward_search_area_end=$SELF_seq_matched=0;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # d1acf___1-124   SWQTYVDTNLVGTGAVTQAAILGLDGNTWATSAGFAVTPAQGTTLAGAFNNADAIRAGGFDLAGVH
       #_____________________________________________________________________________________
       }elsif(/^$query_seq\_(\d+\-\d+)\s+\S/ and $forward_search_area_end==1){
           $query_seq_align_range=$1;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # d1acf___1-124   SWQTYVDTNLVGTGAVTQAAILGLDGNTWATSAGFAVTPAQGTTLAGAFNNADAIRAGGFDLAGVH
       #_____________________________________________________________________________________
       }elsif(/^$target_seq\_(\d+\-\d+)\s+\S/ and $forward_search_area_end==1){
           $target_seq_align_range=$1;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # d1acf___1-124   SWQTYVDTNLVGTGAVTQAAILGLDGNTWATSAGFAVTPAQGTTLAGAFNNADAIRAGGFDLAGVH
       #_____________________________________________________________________________________
       }elsif(/# Created by print_seq_in_block sub/){
           $forward_search_area_end++;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # SELF            SWQTYVDTNLVGTGAVTQAAILGLDGNTWATSAGFAVTPAQGTTLAG
       #_____________________________________________________________________________________
       }elsif(/^SELF\s+\S/ and $forward_search_area_end == 1){
           $target_seq_align_range=$query_seq_align_range;
           $SELF_seq_matched=1;  ##<--- this flag is used in case whre SELF line comes earlier than the query line
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # SELF            SWQTYVDTNLVGTGAVTQAAILGLDGNTWATSAGFAVTPAQGTTLAG
       #_____________________________________________________________________________________
       }elsif(/STOP\>.+ForBackScore:\s*(\S+)\,\s+ForScore:\s+(\S+)\,\s+BackScore:\s+(\S+)\s+ID:\s+(\S+)\s+L:(\d+)/){
           $ForBackScore=sprintf("%-.1f", $1);
           $ForScore=$2;
           $BackScore=$3;
           $MutualSeqID=$4;
           $Alignment_leng=$5;
           $sorted_paired_seqs=join(' ', sort($query_seq, $target_seq));
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Following line is to prevent empty range for target
           #_________________________________________________________
           if($SELF_seq_matched and !$target_seq_align_range){ $target_seq_align_range=$query_seq_align_range }
           $MRCS_hash{$sorted_paired_seqs}{'ForBackScore'}    =$ForBackScore;
           $MRCS_hash{$sorted_paired_seqs}{'ForScore'}        =$ForScore;
           $MRCS_hash{$sorted_paired_seqs}{'BackScore'}       =$BackScore;
           $MRCS_hash{$sorted_paired_seqs}{'MutualSeqID'}     =$MutualSeqID;
           $MRCS_hash{$sorted_paired_seqs}{'Alignment_leng'}  =$Alignment_leng;
           $MRCS_hash{$sorted_paired_seqs}{'QUERY_SEQ_RANGE'} =$query_seq_align_range;
           $MRCS_hash{$sorted_paired_seqs}{'TARGET_SEQ_RANGE'}=$target_seq_align_range;
           $MRCS_hash{$sorted_paired_seqs}{'QUERY_SEQ_NAME'}  =$query_seq;
           $MRCS_hash{$sorted_paired_seqs}{'TARGET_SEQ_NAME'} =$target_seq;
           $forward_search_area_end=0;
       }
    }
    return(\%MRCS_hash);
}

#________________________________________________________________________
# Title     : open_pdbfinder_file
# Usage     : %PDBFINDER_hash=%{&open_pdbfinder_file($ARGV[0])};
# Function  :
# Example   :
# Warning   :
# Keywords  : open_pdfi_file, open_PDFI_file
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------------
sub open_pdbfinder_file{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($input_file, $sequence, %PDBFINDER_hash, $PDB_id, $ID, $chain);
    $input_file=$file[0];
    open(PDBFINDER_FILE, "$input_file") || die ;
    while(<PDBFINDER_FILE>){
       if(/^ID\s+:\s+(\S+)/){
          $ID="\L$1";
          if($ID eq '1hfe'){ print "\n $ID \n"; }
       }elsif(/^Chain\s+:\s+(\S)/){
          $chain=$1;
          if($ID eq '1hfe'){ print "\n Chain $chain \n";  }
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Thi is to skip
       #____________________________________________
       #}elsif(/non-Std\s+/){
       #   $chain='';
       #   next;
       }elsif($chain and /\S*Sequence\s+:\s+(\S+)/){
          $sequence=$1;
          $PDB_id="$ID$chain";
          if($PDBFINDER_hash{$PDB_id}){
              # This is a case where there are identical chain IDs
          }else{
              $PDBFINDER_hash{$PDB_id}=$sequence;
          }
          if($ID eq '1hfe'){ print "\n $sequence \n";  }
       }elsif(/\/\//){
          $ID=$chain=$PDB_id=$sequence='';
       }
    }
    return(\%PDBFINDER_hash);
}



#______________________________________________________________________________
# Title     : open_db_file_DALI
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : holm@ebi.ac.uk,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub open_db_file_DALI {
    local($dir, $ext, $file,$fatal,$cd,%dbdir);
    ($fatal,$cd,%dbdir)=@_;
    foreach $dir (keys(%dbdir)) {
         $ext=$dbdir{$dir};
         $file=$dir . $cd . $ext;
       #print "open_db_file_DALI : test : $file\n";
         open(IN, $file) || warn "\n Can not open $file \n";  next;
       #print "open_db_file_DALI : reading : $file\n";
         return(0);
    }
    if($fatal) {die "FATAL can't open $file\n";} else { return(1);}
}



#______________________________________________________________________________
# Title     : open_DDDF_file
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub open_DDDF_file{
    my($input_file, $PDBID, $NChain, $Chain, $Domain, $Range, $Parent, $DC_number,
       $AliLen, %DDDF_hash, $DDD_ID);
    $input_file=${$_[0]} || $_[0];
    print "\n (i) Openning $input_file \n";
    open(IN_FILE, "$input_file") || die "\n Can not open \"$input_file\" \n";
    while(<IN_FILE>){
      if(/PDBID\s+\:\s+(\S+)/){
         $PDBID=$1;
      }elsif(/NChain\s+\:\s+(\d+)/){
         $NChain=$1;
      }elsif(/Chain\s+\:\s+(\S+)/){
         $Chain=$1;
      }elsif(/Domain\s+\:\s+(\d+)/){
         $Domain=$1;
      }elsif(/Range\s+\:\s+(.+)/){
         $Range=$1;
      }elsif(/Paren\s+\:\s+(\S+)/){
         $Parent=$1;
      }elsif(/D\.C\.\s+\:\s+(\S+)/){
         $DC_number=$1;
      }elsif(/AliLen\s+\:\s+(\d+)/){
         $AliLen=$1;
         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # Write in to HASH
         #____________________________________________
         $DDD_ID="$PDBID$Chain$Domain";
         $DDDF_hash{$DDD_ID}=[ $PDBID,
                               $NChain,
                               $Chain,
                               $Domain,
                               $Range,
                               $Parent,
                               $DC_number,
                               $AliLen ];
      }
   }
   close(IN_FILE);
   unless(%DDDF_hash){ warn "\n\n\n\t!!!! \%DDDF_hash is empty. Something is wrong \n\n\n"; }
   return(\%DDDF_hash);
}


#______________________________________________________________________________
# Title     : open_HSSP_file_for_secture
# Usage     :
# Function  : returns str. alignment information from HSSP
# Example   : %HSSP_align=%{&open_HSSP_file_for_secture($ARGV[0])};
#       $HSSP_alignment_1to1_residue_map{$Representative_name}{$HSSP_homolog_name}{$i}={$j};
#
#     The hash looks like:
#       $HSSP_ALIGNMENT{$SEQ_NUMBER_ID}{$SEQ_NAME} .= $AA_HOMO_RESIDUES[$i];
#
# Keywords  : open_hssp_file, open_hssp_files
# Options   :
#      $get_ranges_info=r by r
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub open_HSSP_file_for_secture{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($HSSP_file_name, $PDBID, $DATE, $SEQLENGTH, $NCHAIN, $NALIGN,
       $SEQ_NUMBER_ID, $SEQ_NAME, $MUTUAL_PERCENT_SEQ_ID, $NUM_OF_ALIGNMENT_START,
       $NUM_OF_ALIGNMENT_STOP, $NUM_OF_ALIGNMENT_SHOWN, $Residue_Position_No,
       $PDB_Res_Position_No, $AA_REP, $AA_HOMO, @AA_HOMO_RESIDUES,
       $ALIGNMENTS_line_counter,  %HSSP_secture);
    $HSSP_file_name=$file[0];
    if($char_opt=~/r/i){  $get_ranges_info='r' }

    open(HSSP_FILE, $HSSP_file_name) || die "\n Can not open $HSSP_file_name\n";
    while(<HSSP_FILE>){
       if(/^PDBID\s+(\S+)$/){
          $PDBID=$1;
       }elsif(/^DATE\s+(.+)/){
          $DATE=$1;
       }elsif(/^SEQLENGTH\s+(\d+)/){
          $SEQLENGTH=$1;
       }elsif(/^NCHAIN\s+(\d+)/){
          $NCHAIN=$1;
       }elsif(/^NALIGN\s+(\d+)/){
          $NALIGN=$1;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
       # matching     1 : myg_phyca   1CIO    1.00  1.00    1  153    1  153  153    0    0  153  P02185     MYOGLOBIN.
       #____________________________________________________________________________________________________________________
       }elsif(/^\s+(\d+)\s+:\s+(\S+)\s+\S*\s*(\d\.\d+)\s+(\d\.\d+)/){
          $SEQ_NUMBER_ID=$1;
          $SEQ_NAME=$2;
          $MUTUAL_PERCENT_SEQ_ID=$3;
          $WEIGHTED_SIMILARITY_ID=$4;
       }elsif(/^\s*##\s+ALIGNMENTS\s+(\d+)\s+\-\s+(\d+)/){
          $NUM_OF_ALIGNMENT_START=$1;
          $NUM_OF_ALIGNMENT_STOP =$2;
          $NUM_OF_ALIGNMENT_SHOWN=$2-$1+1;
          $ALIGNMENTS_line_counter++;
       }elsif(/SeqNo\s+PDBNo\s+AA\s+STRUCTURE\s+BP1\s+BP2\s+ACC\s+NOCC\s+VAR\s+(\S+)/){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
       # matching SeqNo  PDBNo AA STRUCTURE BP1 BP2  ACC NOCC  VAR  ....:....1....:....2....:....3....:....4....:....5....:....6....:....7
       # matching     1    1   V              0   0  125    7    0  VVVV   V  V
       # matching     9  116   Y  E     -ab  44  52A  13  103   10  YYYYYYYY YY YYYYYYYYYYYYYYYYYY YY YYYY YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYF
       # matching     3    5 A W  S    S+     0   0    6   50   44  WWWWWWWWW  LLWW VWVVVVVVVVRVR RRRRRRIRRVIVIIVF LL  F F  H S  LF
       #____________________________________________________________________________________________________________________________________________
       }elsif(/^\s+(\d+)\s+(\d+)\s\s+(\S)\s\s(\S*)/){
          $Residue_Position_No=$1;
          $PDB_Res_Position_No=$2;
          $AA_REP =$3;
          $Chain_ID='_';
          if($4){ $SEC_str=$4 }else{ $SEC_str='C' }
          $HSSP_secture{"$PDBID$Chain_ID"}{'SEQUENCE'} .=$AA_REP;
          $HSSP_secture{"$PDBID$Chain_ID"}{'SECTURE'}  .=$SEC_str;  ## secture means secondary structure
       }elsif(/^\s+(\d+)\s+(\d+)\s+(\S)\s+(\S)\s\s(\S*)/){
          $Residue_Position_No=$1;
          $PDB_Res_Position_No=$2;
          $Chain_ID=$3;
          $AA_REP =$4;
          if($5){ $SEC_str=$5 }else{ $SEC_str='C' }
          $HSSP_secture{"$PDBID$Chain_ID"}{'SEQUENCE'} .=$AA_REP;
          $HSSP_secture{"$PDBID$Chain_ID"}{'SECTURE'}  .=$SEC_str;  ## secture means secondary structure

       }elsif(/^\s*## SEQUENCE PROFILE AND ENTROPY/){
          last;
       }

    }
    return(\%HSSP_secture);
}



#______________________________________________________________________________
# Title     : open_HSSP_file
# Usage     :
# Function  : returns str. alignment information from HSSP
# Example   : %HSSP_align=%{&open_HSSP_file($ARGV[0])};
#       $HSSP_alignment_1to1_residue_map{$Representative_name}{$HSSP_homolog_name}{$i}={$j};
#
#     The hash looks like:
#       $HSSP_ALIGNMENT{$SEQ_NUMBER_ID}{$SEQ_NAME} .= $AA_HOMO_RESIDUES[$i];
#
# Keywords  : open_hssp_file, open_hssp_files
# Options   :
#      $get_ranges_info=r by r
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub open_HSSP_file{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($HSSP_file_name, $PDBID, $DATE, $SEQLENGTH, $NCHAIN, $NALIGN,
       $SEQ_NUMBER_ID, $SEQ_NAME, $MUTUAL_PERCENT_SEQ_ID, $NUM_OF_ALIGNMENT_START,
       $NUM_OF_ALIGNMENT_STOP, $NUM_OF_ALIGNMENT_SHOWN, $Residue_Position_No,
       $PDB_Res_Position_No, $AA_REP, $AA_HOMO, @AA_HOMO_RESIDUES,
       %SEQ_NAME_and_NUM_lookup_hash, %HSSP_ALIGNMENT, $ALIGNMENTS_line_counter);
    $HSSP_file_name=$file[0];
    if($char_opt=~/r/i){  $get_ranges_info='r' }

    open(HSSP_FILE, $HSSP_file_name) || die "\n Can not open $HSSP_file_name\n";
    while(<HSSP_FILE>){
       if(/^PDBID\s+(\S+)$/){
          $PDBID=$1;
          $SEQ_NAME_and_NUM_lookup_hash{0}=$PDBID;
       }elsif(/^DATE\s+(.+)/){
          $DATE=$1;
       }elsif(/^SEQLENGTH\s+(\d+)/){
          $SEQLENGTH=$1;
       }elsif(/^NCHAIN\s+(\d+)/){
          $NCHAIN=$1;
       }elsif(/^NALIGN\s+(\d+)/){
          $NALIGN=$1;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
       # matching     1 : myg_phyca   1CIO    1.00  1.00    1  153    1  153  153    0    0  153  P02185     MYOGLOBIN.
       #____________________________________________________________________________________________________________________
       }elsif(/^\s+(\d+)\s+:\s+(\S+)\s+\S*\s*(\d\.\d+)\s+(\d\.\d+)/){
          $SEQ_NUMBER_ID=$1;
          $SEQ_NAME=$2;
          $MUTUAL_PERCENT_SEQ_ID=$3;
          $WEIGHTED_SIMILARITY_ID=$4;
          $SEQ_NAME_and_NUM_lookup_hash{$SEQ_NUMBER_ID}=$SEQ_NAME;
       }elsif(/^\s*##\s+ALIGNMENTS\s+(\d+)\s+\-\s+(\d+)/){
          $NUM_OF_ALIGNMENT_START=$1;
          $NUM_OF_ALIGNMENT_STOP =$2;
          $NUM_OF_ALIGNMENT_SHOWN=$2-$1+1;
          $ALIGNMENTS_line_counter++;
       }elsif(/SeqNo\s+PDBNo\s+AA\s+STRUCTURE\s+BP1\s+BP2\s+ACC\s+NOCC\s+VAR\s+(\S+)/){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
       # matching SeqNo  PDBNo AA STRUCTURE BP1 BP2  ACC NOCC  VAR  ....:....1....:....2....:....3....:....4....:....5....:....6....:....7
       # matching     1    1   V              0   0  125    7    0  VVVV   V  V
       # matching     9  116   Y  E     -ab  44  52A  13  103   10  YYYYYYYY YY YYYYYYYYYYYYYYYYYY YY YYYY YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYF
       # matching     3    5 A W  S    S+     0   0    6   50   44  WWWWWWWWW  LLWW VWVVVVVVVVRVR RRRRRRIRRVIVIIVF LL  F F  H S  LF
       #____________________________________________________________________________________________________________________________________________
       }elsif(/^\s+(\d+)\s+(\d+)\s+\S*\s*(\S).{12,14}(\w+)\s+(\w+)\s+(\d+)\s+(\d+)\s+(\d+)..([ \w]+)/){
          $Residue_Position_No=$1;
          $PDB_Res_Position_No=$2;
          $AA_REP =$3;
          $AA_HOMO=$9;
          $AA_HOMO=~tr/ /\./; ## change the space gap to '.' gap
          @AA_HOMO_RESIDUES=split(//, $AA_HOMO);
          if($NUM_OF_ALIGNMENT_SHOWN > @AA_HOMO_RESIDUES){
             $diff=$NUM_OF_ALIGNMENT_SHOWN-@AA_HOMO_RESIDUES;
             for($i=0; $i<$diff; $i++){
                push(@AA_HOMO_RESIDUES, '.');
             }
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # putting the very first rep. seq
          #________________________________________________________________________
          $HSSP_ALIGNMENT{0}{$PDBID} .= $AA_REP if $ALIGNMENTS_line_counter==1;

          #$AA_HOMO=join('', @AA_HOMO_RESIDUES);
          #print "\n\"$AA_HOMO  $ALIGNMENTS_line_counter\"";
          for($i = 0; $i< @AA_HOMO_RESIDUES; $i++){
             $SEQ_NUMBER_ID=$i + $NUM_OF_ALIGNMENT_START;
             $SEQ_NAME=$SEQ_NAME_and_NUM_lookup_hash{$SEQ_NUMBER_ID};
             $HSSP_ALIGNMENT{$SEQ_NUMBER_ID}{$SEQ_NAME} .= $AA_HOMO_RESIDUES[$i];
          }
       }elsif(/^\s*## SEQUENCE PROFILE AND ENTROPY/){
          last;
       }

    }
    return(\%HSSP_ALIGNMENT);
}



#______________________________________________________________________________
# Title     : open_FSSP_file
# Usage     :
# Function  : returns str. alignment information from FSSP
# Example   : %FSSP_align=%{&open_FSSP_file($ARGV[0])};
#       $FSSP_alignment_1to1_residue_map{$Representative_name}{$FSSP_homolog_name}{$i}={$j};
# Keywords  : open_fssp_file, open_fssp_files open_FSSP_files
# Options   :
#      $get_ranges_info=r by r
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.4
#------------------------------------------------------------------------------
sub open_FSSP_file{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($Alignment_section_found, $FSSP_file_name, %FSSP_alignment_1to1_residue_map,
       $i, $j, $REP_position, $REP_region_start, $HOMOL_position, $HOMOL_position,
       $length_of_alignment, $Representative_name, $HOMOLOG_ID_num,
       $FSSP_homolog_name, $HOMOL_region_start, $HOMOL_region_stop);
    $FSSP_file_name=$file[0];
    if($char_opt=~/r/i){  $get_ranges_info='r'; print "\n open_FSSP_file: \$get_ranges_info is set to $get_ranges_info\n"; }

    open(FSSP_FILE, $FSSP_file_name) || die "\n (E) open_FSSP_file: Can not open $FSSP_file_name\n";
    while(<FSSP_FILE>){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
       # Following is the Alignment part extraction only. Other parts will be made later
       #________________________________________________________________________________________
       if(/## EQUIVALENCES: ranges of aligned residues/){
          $Alignment_section_found=1;      #      NR     STRID1 <=> STRID2
       }elsif($Alignment_section_found
          and /^\s+(\d+)\:\s+(\S+)\s+(\S+)\s+(\d+)\s+\-\s+(\d+)\s+\<=\>\s+(\d+)\s+\-\s+(\d+)/){
          $HOMOLOG_ID_num=$1;
          $Representative_name=$2;
          $FSSP_homolog_name=$3;
          $REP_region_start=$4;
          $REP_region_stop =$5;
          $HOMOL_region_start=$6;
          $HOMOL_region_stop=$7;
          $REP_range  = "$REP_region_start\-$REP_region_stop";
          $HOMOL_range= "$HOMOL_region_start\-$HOMOL_region_stop";

          if($get_ranges_info eq 'r'){
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # Making a hash which stores ranges information
              #______________________________________________________________
              $FSSP_alignment_1to1_residue_map{$Representative_name}{$FSSP_homolog_name}{$REP_range}=$HOMOL_range;
          }else{
              #print "\n    Alignment region matched\n$HOMOLOG_ID_num $Representative_name $FSSP_homolog_name $REP_region_start $HOMOL_region_start ";
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
              # Making a hash which stores 1 to 1 residue position
              #______________________________________________________

              $length_of_alignment=$REP_region_stop-$REP_region_start + 1;

              for($i=0; $i < $length_of_alignment; $i++){
                  $REP_position=$i+$REP_region_start;
                  $HOMOL_position=$i+$HOMOL_region_start;
                  $FSSP_alignment_1to1_residue_map{$Representative_name}{$FSSP_homolog_name}{$REP_position}=$HOMOL_position;
              }
          }
       }
    }
    close(FSSP_FILE);
    return(\%FSSP_alignment_1to1_residue_map);
}

#______________________________________________________________________________
# Title     : open_FDAT_file
# Usage     :
# Function  :
# Example   : There are 2 main entries:
#          $Dali_subdomains{$structure_ID}{$subdomain_number}=[$subdomain1,
#                                                              $subdomain2,
#                                                              $residue_leng,
#                                                              $numb_of_segments,
#                                                              $ranges
#                                                              ]
#          $Dali_subdomains{$structure_ID}{'SEQUENCE'}
# Keywords  :
# Options   :
# Author    : holm@ebi.ac.uk jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 2.0
#------------------------------------------------------------------------------
sub open_FDAT_file{
    my($i, $j, $FDAT_file, $entry_sign_count, $structure_ID, $subdomain_number,
       $strand_number, $residue_leng, $secondary_str_element_num, $helix_number,
       $strand_number, %Dali_subdomains, $subdomain1, $subdomain2, $residue_leng,
       $numb_of_segments, $ranges, $sequence, $correct_node_number,
       @delete_targets, @nodes, @final_ranges);
    $FDAT_file=${$_[0]} || $_[0];
    if($FDAT_file=~/([\S+]*)\/([^\/]+)\.dat/ and length($2) < 5){ $FDAT_file="$1\/$2\_.dat"
    }elsif($FDAT_file=~/^(\w+)\.dat/ and length($1) < 5){ $FDAT_file="$1\_.dat" }

    open(FDAT, "<$FDAT_file") || warn "open_FDAT_file: Can not open $FDAT_file file \n";
    while(<FDAT>){
       if(/\>\>\>\>\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+([HE]*)/){
          $entry_sign_count++;
          $structure_ID=$1;
          $residue_leng=$2;
          $secondary_str_element_num=$3;
          $helix_number=$4;
          $strand_number=$5;
          $PDB_sec_str_data{$1}=[$residue_leng,
                                 $secondary_str_element_num,
                                 $helix_number, $strand_number];
       # >>>> 1tgoA   69
       }elsif(/\>\>\>\>\s+(\S+)\s+(\d+)/){
          $entry_sign_count++;
       # >>>> 1tgoA   69                      NODE     +*=-    (sub domains)   Leng    Seg No.  ranges
       }elsif($entry_sign_count > 2 and /^\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s*(\d*)\s*(\d*)\s*(.*)/){
          $subdomain_number=$1;
          $subdomain1=$3;
          $subdomain2=$4;
          $residue_leng=$5;
          $numb_of_segments=$6;
          $ranges=$7;

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # This checks a line like: '   1 +    2   31017   1   11017' in 1kcw_.dat
          #_____________________________________________________________________________
          if(length($subdomain2) > 4 and !$ranges){
              $ranges = $numb_of_segments;
              $numb_of_segments=$residue_leng;
              ($subdomain2, $residue_leng)=$subdomain2=~/(\d+)(\d\d\d\d)/;
              if(length($ranges) > 4){
                  @ranges=$ranges=~/(\d+)(\d\d\d\d)$/;
                  $ranges=join(' ', @ranges);
              }
              print "$ranges, $numb_of_segments $residue_leng $subdomain2\n";
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Mark nodes which are less than 40 aa size (to be deleted later)
          #___________________________________________________________________________
	      # accept 1ppt(1) = 36 residue peptide
          if($residue_leng < 40 && $subdomain_number>1){
              push(@delete_targets, $subdomain_number);
          }else{
              my(@final_ranges);
              $correct_node_number++;
              @ranges= split(/\s+/, $ranges);
              for($i=0; $i < @ranges; $i++){
                  if(length($ranges[$i]) > 4){
                      print "\n$ranges[$i]";
                      push(@final_ranges, $ranges[$i]=~/(\d+)(\d\d\d\d)$/);
                  }else{
                      push(@final_ranges, $ranges[$i]);
                  }
              }
              $ranges=join(' ', @final_ranges);
              $Dali_subdomains{$structure_ID}{$correct_node_number}=[$subdomain1,
                                                                  $subdomain2,
                                                                  $residue_leng,
                                                                  $numb_of_segments,
                                                                  $ranges
                                                                  ];
          }

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # -sequence "MILDTDYITEDGKPVIRIFKKENGEFKIDYDRNFEPYIYALLKDDSAIEDVK
       #______________________________________________________________________
       }elsif(/\-sequence \"(\w+)/){
          $sequence=$1;
          $Dali_subdomains{$structure_ID}{'SEQUENCE'}=$sequence;
       }
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Final processing of the Dali_subdomains hash to remove the deleted nodes the anon. array
    #______________________________________________________________________________________________
    @nodes=sort {$a<=>$b} keys %{$Dali_subdomains{$structure_ID}};
    for($i=0; $i< @delete_targets; $i++){
       for($j=0; $j< @nodes; $j++){
           if($Dali_subdomains{$structure_ID}{$nodes[$j]}[0] == $delete_targets[$i]){
               $Dali_subdomains{$structure_ID}{$nodes[$j]}[0]=0;
           }elsif($Dali_subdomains{$structure_ID}{$nodes[$j]}[1] == $delete_targets[$i]){
               $Dali_subdomains{$structure_ID}{$nodes[$j]}[1]=0;
           }
       }
    }

    close(FDAT);
    return(\%Dali_subdomains);
}



#______________________________________________________________________________
# Title     : open_stragment_library_file
# Usage     :
# Function  :
# Example   :
# Keywords  : open_STRL_file, open_strl_file STRL: stragment library file
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub open_stragment_library_file{
    my($stragment_file, $stragment, $PDB_ID, $seqlet);
    $stragment_file=${$_[0]} || $_[0];
    open(STRAGMENT_FILE, "$stragment_file") || die "\n Can not open $stragment_file\n\n";
    while(<STRAGMENT_FILE>){
       if(/\>(\S+_(\w+)):(\w+)/){
          $PDB_ID=$1;
          $seqlet=$2;
          $stragment=$3;
          push(@{$stragment{$PDB_ID}}, $stragment);
       }
    }
    return(\%stragment);
}




#______________________________________________________________________________
# Title     : open_socket
# Usage     : $SOCKET_FILEHANDLE=${&open_socket("H=$local_hostname",
#                                     "S=$target_server_IP", "FH=$file_handle",
#                                     "P=$port")};
#
# Function  : opens Socket filehandle and returns the filehandle name
#             which is 'SOCKET_FOR_POST' by default
# Example   :
# Keywords  : open_SOCKET
# Options   :
# Author    : jong@biosophy.org, ali@genet.sickkids.on.ca
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub open_socket{
	my($arg, $name, $altname, $proto, $port, $client, $server, $clientAddress,
			 $serverAddress, $AF_INET, $SOCK_STREAM, $SOCKET_FILEHANDLE,
			 $local_hostname, $target_server_IP, $type, $len);

		$SOCKET_FILEHANDLE='SOCKET_FOR_POST';
		$SOCK_STREAM=1; # in Linux it is 1 /usr/include/socketbits.h
		$AF_INET = 2;   # unix users: can be found in /usr/include/sys/socket.h, or /usr/include/socketbits.h
							      # In Linux, it is the same as PF_INET and PF_INET has 2
		$port=80; # default is set to HTTPD port
		$|=1;

		for $arg(@_){
		   if(ref($arg)){ $arg=${$arg}; }
			 if($arg=~/FH=(\S+)/){
					$SOCKET_FILEHANDLE=$1;
			 }elsif($arg=~/S=(\S+)/i){
					$target_server_IP=$1;
			 }elsif($arg=~/H=(\S+)/i){
					$local_hostname=$1;
			 }elsif($arg=~/P=(\S+)/i){
					$port=$1;
			 }
		}

		($name, $altname, $proto) = getprotobyname('tcp');
		($name, $altname, $port)  = getservbyname($port, 'tcp') unless $port =~ /^\d+$/;;
		($name, $altname, $type, $len, $clientAddress) = gethostbyname($local_hostname);
		($name, $altname, $type, $len, $serverAddress) = gethostbyname($target_server_IP);

		if(!$name){
				die "\n# (E) No hostname defined $!\n\n$name, $altname, $proto, $type, $len\n\a";
		}

		$client = pack("Sna4x8", $AF_INET, 0,     $clientAddress); # $AF_INET = 2;   ## unix users: can be found in /usr/include/sys/socket.h, or /usr/include/socketbits.h
		$server = pack("Sna4x8", $AF_INET, $port, $serverAddress);

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# make the socket filehandle
	#______________________________________________________
	if (!socket("$SOCKET_FILEHANDLE", $AF_INET, $SOCK_STREAM, $proto)) {
				die ("Cannot open socket. Error code: $!\n");
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# give the socket an address
		#______________________________________________________
		if (!bind("$SOCKET_FILEHANDLE", $client)) {
				die("Cannot bind. Error code: $!\n");
				close($SOCKET_FILEHANDLE);
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Connect to the server
		#______________________________________________________
		if (!connect("$SOCKET_FILEHANDLE", $server)) {
				print "\n# (E) \$local_hostname    : $local_hostname";
				print "\n# (E) \$target_server_IP  : $target_server_IP";
				print "\n# (E) \$clientAddress     : $clientAddress";
				print "\n# (E) \$serverAddress     : $serverAddress";
				print "\n# (E) \$proto             : $proto";
				print "\n# (E) \$type              : $type";
				print "\n# (E) \$port              : $port";
				print "\n# (E) \$SOCKET_FILEHANDLE : $SOCKET_FILEHANDLE";
				print "\n# (E) \$len               : $len\n";
				die("# (E) $0: Cannot connect to $server. Error code: $!\n");
				close($SOCKET_FILEHANDLE);
	}
	return(\$SOCKET_FILEHANDLE);
}


#________________________________________________________________________________
# Title     : open_parf_files
# Usage     : @output_hashes=&open_parf_files(@files);
#
#              %parf_hash_homology_info          =%{$output_hashes[0]};
#              %parf_hash_score                  =%{$output_hashes[1]};
#              %parf_hash_classification_column_1=%{$output_hashes[2]};
#              %parf_hash_classification_column_2=%{$output_hashes[3]};
#              %parf_hash_Homologous_rank        =%{$output_hashes[4]};
#              %parf_hash_Nomologous_rank        =%{$output_hashes[5]};
#
# Function  :
# Example   :
#    PARF file looks like this>
#   d1nsca_   d3nn9__   Homolog -664.92 2.43.1.1.3  2.43.1.1.2
#   d1dppa_   d2olba_   Homolog -617.41 3.68.1.1.6  3.68.1.1.1
#   d2ach.1a1 d9api.1a1 Homolog -556.38 5.2.1.1.3   5.2.1.1.4
#
# Keywords  :
# Options   :
# Author    :
# Version   : 1.0
#--------------------------------------------------------------------------------
sub open_parf_files{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
     \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
     \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
     my (@parf_file, $number_of_lines_read,  %all_parf_file_hash,
         %parf_hash_homology_info, %parf_hash_score,
         %parf_hash_classification_column_1, %parf_hash_classification_column_2,
         %parf_hash_Homologous_rank,  %parf_hash_Nomologous_rank);

     my $number_of_lines_to_read   = 50000;
     my $number_of_Homologs_to_read= 7000; # 699 is for 1% error in 935 PDB40D
     my $number_of_Nomologs_to_read= 90; # 9 is for 1% error in 935 PDB40D

     if($vars{'l'}=~/\S+/){ $number_of_lines_to_read=$vars{'l'} }
     for($i=0; $i< @file; $i++){
         my ($counter);
         if($file[$i]=~/\.parf/i){
             push(@parf_file, $file[$i]);
         }else{
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # Check if it is parf file inside the file
             #__________________________________________________________
             open(INPUT_FILE, "<$file[$i]");
             while(<INPUT_FILE>){
                  $counter++;
                  if(/^\s*\S+\s+\S+\s+[NFUH]omolog\s+\S+\s+\S+\s+\S+/){
                      push(@parf_file, $file[$i]);
                      last;
                  }else{
                      if($counter > 100){  ## giving up, it is not PARF file!
                          print "\n# $0 needs to have PARF files, others are ignored";
                          last;
                      }else{
                          next;
                      }
                  }
             }
             close(INPUT_FILE);
         }
     }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # Processing the parf file contents
     #__________________________________________________________
     for($i=0; $i< @parf_file; $i++){
         my($nomolog_counter, $homolog_counter, $sorted_pair, @sorted_seq_name_pairs);
         open(PARF_FILE, "<$parf_file[$i]");
         while(<PARF_FILE>){
              if(/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/){
                   $number_of_lines_read++;
                   $sorted_pair=join('', sort($1, $2));
                   $homology_info=$3;
                   $score=$4;
                   $classification_column_1=$5;
                   $classification_column_2=$6;

                   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                   # Count the NO-HOmology
                   #____________________________________
                   if($homology_info =~/Nomolog/i){
                       $nomolog_counter++;
                   }elsif($homology_info =~/Homolog/i){
                       $homolog_counter++;
                   }

                   $parf_hash_homology_info{$sorted_pair}=$homology_info;
                   $parf_hash_score{$sorted_pair}=$score;
                   $parf_hash_classification_column_1{$sorted_pair}=$classification_column_1;
                   $parf_hash_classification_column_2{$sorted_pair}=$classification_column_2;
                   $parf_hash_Homologous_rank{$sorted_pair}=$homolog_counter;
                   $parf_hash_Nomologous_rank{$sorted_pair}=$nomolog_counter;

              }
              if($number_of_lines_read == $number_of_lines_to_read){  last;     }
              if($nomolog_counter == $number_of_Nomologs_to_read){    last;     }
              if($homolog_counter == $number_of_Homologs_to_read){    last;     }
         }
         close(PARF_FILE);

     }
     return(\%parf_hash_homology_info,
            \%parf_hash_score,
            \%parf_hash_classification_column_1,
            \%parf_hash_classification_column_2,
            \%parf_hash_Homologous_rank,
            \%parf_hash_Nomologous_rank);
}



#______________________________________________________________________________
# Title     : open_upro_files
# Usage     :
# Function  : User Profile file (for NetBioServ)
# Example   : input example is:
#
#  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#  User      : Jong
#  Seq_name  : 128U_DROME
#  Sequence  : MITILEKISAIESEMARTQKNKATSAHLGLLKANVAKLRRELISPKGGGGGTGEAGFEVAK
#              LLDLPGIIEGAKDGKGRGRQVIAVARTCNLIFMVLDCLKPLGHKKLLEHELEGFGIRLNKKPPNIY
#              SDDLIDVIEGNRIYIPCIYLLNKIDQISIEELDVIYKIPHCVPISAHHHWNFDDLLELMWEYLRLQ
#              WGSSVKHQPQKVGIEHVLNDEDVVQIVKKV
#  ____________________________________________________________
#
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub open_upro_files{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my(%UPRO, $query_seq_name, $user_name, $sequence, @final_hash_out);
		if(@file<1){
				print "\n \@file has less than 1 elem. There is no fileinput for open_embl_files\n";
				die
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
		# (1) opening UPRO files
		#_______________________________________________________________________
		for($i=0; $i< @file; $i++){
			 unless(-s $file[$i]){ print "\n# (E) File not found $file[$i]\n"; next; } ## this is essential as handle_arguments has a problem
			 open(UPRO, $file[$i]) || die "\n $0 open_upro_files failed to open $file[$i] \n";
			 while(<UPRO>){
					if(/User\s+:\s*(\S+)/i){
							$user_name=$1;
					}elsif(/Seq_name\s+:\s*(\S+)/i){
							$query_seq_name="$1\_$user_name";
					}elsif(/Sequence\s+:\s*(\S[\S ]+)/i){
							$sequence=$1;
					}elsif($sequence and /^\s*[^_]+([\S ]+)/i){
					    $sequence.=$1;
					}elsif(/^\s*#?________+/){
					    last;
					}
		   }
		   close(UPRO);
			 %UPRO=('USER', $user_name, 'SEQ_NAME', $query_seq_name, 'SEQUENCE', $sequence);
			 push(@final_hash_out, \%UPRO);
		}
		if(@final_hash_out > 1){
			 return(\@final_hash_out);
		}else{
			 return(\%UPRO);
		}

}



#__________________________________________________________________________
# Title     : open_sequence_index_files
# Usage     : open_sequence_index_files(<indexfilename>, <sequencename>);
# Function  : returns seqname with its seek pos in fasta sequence db file.
# Example   : %index=%{&open_sequence_index_files(\@INDEX_FILE, \@input_seq_names)};
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : open_seq_index_files, open_seq_idx_files, open_idx_files,
#             get_sequence_index, get_seq_index, get_sequence_with_index
# Options   : _ or # for debugging
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------------------
sub open_sequence_index_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my( %final_index, %long_index);

    for($i=0; $i< @file; $i++){
        open(INDEX, "$file[$i]");
        while(<INDEX>){
           if(/^(\S+)\s+(\S+)$/){
               $long_index{$1}=$2;
           }
        }
        for($j =0; $j < @string; $j++){ #<<<< @string has the sequence NAMEs >>>>
            if($input_seq_names[$j]=~/^(\S+)_\d+\-\d+/){
                 $seq_with_index{$input_seq_names[$j]}=$long_index{$1};
            }else{
                 $seq_with_index{$input_seq_names[$j]}=$long_index{$input_seq_names[$j]};
            }
        }

	}
	return(\%final_index);
}



#______________________________________________________________________________
# Title     : do_MRC_search
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
#   $HERTA_algorithm_type=     by M=
#   $HERTA_open_gap_penalty=   by A=
#   $HERTA_extend_gap_penalty= by B=
#   $HERTA_coded_seq1=         by P=
#   $HERTA_coded_seq2=       by Q=
#   $HERTA_coded_seq1_rv     by p=
#   $HERTA_coded_seq2_rv     by q=
#   $HERTA_Matrix_name=      by R=
#   $Query_sequences=        by Q=
#   $DB_sequences=           by D=
#   $raw_matrix_name=        by X=
#   $NO_screen_print=Q       by Q
#   $overwrite_opt=o         by o
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.6
#------------------------------------------------------------------------------
sub do_MRC_search{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my(@scores, @codes, $count, $out_matrix_name, $algorithm, $HERTA_algorithm_type,
       $openning_gap_penalty, $elongation_gap_penalty, $coded_seq1,
       $coded_seq2, $coded_Matrix_name, $HERTA_coded_seq1_rv, $HERTA_coded_seq2_rv,
       $coded_seq1_rv, $coded_seq2_rv, $output_herta_file_rv, $raw_matrix_name,
       %DB_input, %seq_input, $HERTA_open_gap_penalty, $HERTA_extend_gap_penalty,
       $query_seq, $DB_seq, $NO_screen_print, $MRCS_file_query, $MRCS_FILE_HANDLE,
       $write_EACH_MRCS_pair_file, @HERTA_files_made, $keep_HERTA_files,
       $num_of_queries, $num_of_DB_seq, %sorted_pair_seqs, $overwrite_opt);

    $HERTA_algorithm_type    ='lss_dipeptide';
    $openning_gap_penalty    =5;
    $elongation_gap_penalty  =1;
    $MRCS_FILE_HANDLE="MRCS_FILE_HANDLE";

    if($vars{'M'}=~/(\S+)/){ $algorithm=$1 } # gss_dipeptide, lss_dipeptide
    if($vars{'A'}=~/(\S+)/){ $openning_gap_penalty=$1 }
    if($vars{'B'}=~/(\S+)/){ $elongation_gap_penalty=$1 }
    if($vars{'P'}=~/(\S+)/){ $coded_seq1=$1 }
    if($vars{'Q'}=~/(\S+)/){ $coded_seq2=$1 }
    if($vars{'p'}=~/(\S+)/){ $coded_seq1_rv=$1 }
    if($vars{'q'}=~/(\S+)/){ $coded_seq2_rv=$1 }
    if($vars{'R'}=~/(\S+)/){ $coded_Matrix_name=$1 }
    if($vars{'F'}=~/(\S+)/){ $Query_sequences=$1 }
    if($vars{'D'}=~/(\S+)/){ $DB_sequences=$1 }
    if($vars{'X'}=~/(\S+)/){ $raw_matrix_name=$1 }
    if($char_opt=~/o/){ $overwrite_opt='o' }
    if($char_opt=~/Q/){      $NO_screen_print='Q' }
    if($vars{'E'}=~/(\S+)/){ $write_EACH_MRCS_pair_file='E'; }
    if($char_opt=~/k/){      $keep_HERTA_files='k' }

    unless(-s $coded_Matrix_name){
       print "\n $0: do_MRC_search, \$coded_Matrix_name ($coded_Matrix_name) not here!, will check AA_matrix.matx \n";
       if(-s $raw_matrix_name and !(-b $raw_matrix_name)){
           $HERTA_Matrix_name=${&encode_residue_exchange_matrix("$raw_matrix_name")};
       }elsif( !(-e $raw_matrix_name) and -b "$coded_Matrix_name"){  $coded_Matrix_name="$coded_Matrix_name";
       }elsif(-s "AA_matrix.matx"){  $coded_Matrix_name="AA_matrix.matx";
       }else{                    die "\n \$coded_Matrix_name and \$raw_matrix_name are not given/found \n";       }
    }else{
       print "\n Using \$coded_Matrix_name : $coded_Matrix_name, GOOD!!\n\n";
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Openning $Query_sequences and $DB_sequences
    #_____________________________________________________
    #print "\n(1) Reading and writing $Query_sequences and $DB_sequences sequences \n\n";
    %seq_input=%{&open_fasta_files(\$Query_sequences)};
    %DB_input =%{&open_fasta_files(\$DB_sequences)};
    &write_fasta_seq_by_seq(\%seq_input, 'e'); ## e makes skip writing when file already
    &write_fasta_seq_by_seq(\%DB_input, 'e'); ## e makes skip writing when file already
    @Query_seq_names= sort keys %seq_input;
    @DB_seq_names   = sort keys %DB_input;
    $num_of_queries=@Query_seq_names;
    $num_of_DB_seq =@DB_seq_names;

    for($i=0; $i< @Query_seq_names; $i++){
        $query_seq=$Query_seq_names[$i];
        $query_seq_file="$query_seq\.spfa";
        $MRCS_file_query="$query_seq\_OG$openning_gap_penalty\_EG$elongation_gap_penalty\_$algorithm.mrcs";
        if(-s $MRCS_file_query and !$overwrite_opt){
           print " :-o  skipping $MRCS_file_query\n"; next;
        }
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Openning output $MRCS_FILE_HANDLE
        #________________________________________________________
        open($MRCS_FILE_HANDLE, ">$MRCS_file_query") || die "Cannot open $MRCS_file_query, do_MRC_search\n";
        print $MRCS_FILE_HANDLE "# $0: $query_seq -> $DB_sequences\n";
        for($j=$i; $j< @DB_seq_names; $j++){
           $DB_seq=$DB_seq_names[$j];
           $DB_seq_file="$DB_seq\.spfa";
           #if($query_seq eq $DB_seq){ next }
           $sorted_pair=join('_', sort($query_seq, $DB_seq));
           print "$sorted_pair: $j / $num_of_DB_seq DB seqs with $i / $num_of_queries queries\n";

           #print "\n  (2) \$sorted_pair is $sorted_pair, encoding $query_seq_file and $DB_seq_file";
           $sorted_pair_seqs{$sorted_pair}++;

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Checks if the pair has been compared. If not, do search
           #___________________________________________________________
           if($sorted_pair_seqs{$sorted_pair} == 1){
              ($coded_seq1, $coded_seq1_rv,
               $coded_seq2, $coded_seq2_rv)=@{&encode_fasta_sequences($query_seq_file, $DB_seq_file)};
               #print "\n  (3) Coded seqs are: $coded_seq1, $coded_seq2, $coded_seq1_rv, $coded_seq2_rv";
               $base1=${&get_base_names($coded_seq1)};
               $base2=${&get_base_names($coded_seq2)};
               $base1_rv=${&get_base_names($coded_seq1_rv)};
               $base2_rv=${&get_base_names($coded_seq2_rv)};
               $output_herta_file   ="$base1\_$base2\.herta";
               $output_herta_file_rv="$base1\_$base2_rv\.herta";
               push(@HERTA_files_made, $output_herta_file, $output_herta_file_rv);
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               # Running forward ($coded_seq1 <-> $coded_seq2)
               #________________________________________________
               #print "\n  (5) Running herta with $coded_seq1 $coded_seq2\n Now running $coded_seq1 <=> $coded_seq2_rv";
               system("herta -M $algorithm -A $openning_gap_penalty -B $elongation_gap_penalty -P $coded_seq1 -Q $coded_seq2 -R $coded_Matrix_name > $output_herta_file");
               if(-s $output_herta_file){ #print "\n $output_herta_file is written with A=$openning_gap_penalty\n";
               }else{  die "\n $output_herta_file does not exist, something is wrong \n\n"; }

               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               # Running backward ($coded_seq1 <-> $coded_seq2_rv)
               #________________________________________________
               system("herta -M $algorithm -A $openning_gap_penalty -B $elongation_gap_penalty -P $coded_seq1 -Q $coded_seq2_rv -R $coded_Matrix_name > $output_herta_file_rv");
               if(-s $output_herta_file_rv){ #print "\n $output_herta_file is written with A=$openning_gap_penalty\n";
               }else{  die "\n $output_herta_file does not exist, something is wrong \n\n"; }

               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               # Writing alignment
               #____________________________________________________________
               &write_MRC_search_result_file(\$output_herta_file,
                                             \$query_seq_file,
                                             \$DB_seq_file,
                                             \$query_seq,
                                             \$DB_seq,
                                             \$openning_gap_penalty,
                                             \$elongation_gap_penalty,
                                             \$HERTA_algorithm_type,
                                             $NO_screen_print,
                                             \$MRCS_FILE_HANDLE,
                                             $write_EACH_MRCS_pair_file);

           }
        }
        unlink($coded_seq1, $coded_seq1_rv, $coded_seq2, $coded_seq2_rv);
        unless($keep_HERTA_files){
           unlink(@HERTA_files_made);
        }
        close($MRCS_FILE_HANDLE);
    }

}




#______________________________________________________________________________
# Title     : do_fasta_and_ssearch
# Usage     : $gzipped_mspa_file=${&do_fasta_and_ssearch("a=$algorithm",
#                        "O=$out_file_mspa_name", "File=$temp_file_name", "e=$E_val",
#                        "DB=$sequence_DB", "k=$k_tuple", "$machine_readable")};
#
# Function  : runs FASTA or SSEARCH algorithm (given by a= option)
# Example   :
# Keywords  : run_ssearch_sequence_search, do_fasta_sequence_search,
#             do_fasta_search
# Options   :
#             Query_seqs=  for enquiry sequences eg)  "Query_seqs=$ref_of_hash"
#             DB=   for target DB  "DB=$DB_used"
#             File= to get file base(root) name.  "File=$file[0]"
#             i= to get file base(root) name. same as File=
#             m  for MSP format directly from FASTA or Ssearch result than through sso_to_mspa to save mem
#             s  for the big single output (mspa file output I mean)
#             s= for the single big mspa file name
#             O= for Out file name, same as s=
#             o  for overwrite existing xxxx.fasta files for search
#             c  for create SSO file (sequence search out file)
#             d  for very simple run and saving the result in xxxx.gz format in sub dir starting with one char
#             r  for reverse the query sequence
#             R  for attaching ranges of sequences
#             k= for k-tuple value. default is 1 (ori. FASTA prog. default is 2)
#             u= for $upper_expect_limit
#             l= for $lower_expect_limit
#             a= for choosing either fasta or ssearch algorithm
#             d= for defining the size of subdir made. 2 means it creates
#                    eg, DE while 1 makes D
#             d  for $make_gz_in_sub_dir_opt, putting resultant sso files in gz format and in single char subdir
#             D  for $make_mspa_in_sub_dir_opt, convert sso to mspa and put in sub dir like /D/, /S/
#             n  for new format to create new mspa file format with sso_to_mspa routine
#          PVM=  for PVM run of FASTA (FASTA only)
#             M  for machine readable format -m 10 option
#             M= for machine readable format -m 10 option
#             N  for 'NO' do not do any processing but, do the searches only.
#       FILE_AGE for defining the age of file in days to be overwritten.
# Author    : Jong Park, jong@biosophy.org, for commercial use, ask me.
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub do_fasta_and_ssearch{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my($algorithm, $E_val, $sequence_DB, $k_tuple, $machine_readable,$create_sso,
			 @mspa_from_temp, $gzipped_mspa_file, @temp, $temp_file_name, $add_range,
			 $out_file_mspa_name, $input_file_name, $out_file_sso_name,
			 $gzipped_sso_file, $machine_readable);
		$algorithm='fasta';    $out_file_mspa_name='default_out_file.mspa';
		$E_val=1;              $sequence_DB=$ENV{'NRDB_FASTA'};
		$k_tuple=1;            $machine_readable='M';
		$upper_expect_limit=1; $lower_expect_limit=0;
		$Score_thresh=100;

		if($vars{'a'}=~/\S+/){ $algorithm          = $vars{'a'}            };
		if($vars{'O'}=~/\S+/){ $out_file_mspa_name  = $vars{'O'}            };
		if($vars{'e'}=~/\S+/){ $E_val              = $vars{'e'}            };
		if($vars{'u'}=~/\d+/){ $upper_expect_limit = $vars{'u'}            };
		if($vars{'l'}=~/\d+/){ $lower_expect_limit = $vars{'l'}            };
		if($vars{'k'}=~/\d+/){ $k_tuple            = $vars{'k'}            };
		if($vars{'t'}=~/\d+/){ $Score_thresh       = $vars{'t'}            };
		if($vars{'r'}=~/\S+/){ $add_range          = 'r'                   };
		if($vars{'s'}=~/\S+/){ $single_big_mspa     = 's'                   };
		if($vars{'DB'}=~/\S+/){            $sequence_DB=$vars{'DB'} ;
				if(-s $sequence_DB){
				}elsif(-s "../$sequence_DB"){  $sequence_DB= "../$sequence_DB"
				}elsif(-s "../../$sequence_DB"){  $sequence_DB= "../../$sequence_DB";
				}else{
						print "\n# (ERROR) do_sequence_search: You set DB param, but I can\'t find $sequence_DB\n";
						die;
				}
		}else{  print "\n# (ERROR) do_sequence_search: I need DB param defined, sorry, aborting\n"; }

		if($vars{'FILE'}=~/\S+/){ $input_file_name = $vars{'FILE'}; };
		if($vars{'File'}=~/\S+/){ $input_file_name = $vars{'File'}; };
		if($vars{'FILE_AGE'}=~/\S+/){ $age_in_days_of_out_file= $vars{'FILE_AGE'};  };
		if($vars{'Query_seqs'}=~/\S+/){ %seq_input = %{$vars{'Query_seqs'}}};
		if($vars{'Query'}=~/\S+/){      %seq_input = %{$vars{'Query'}}};
		if($vars{'u'}    =~/\S+/){ $E_val          = $vars{'u'}            };
		if($vars{'PVM'}  =~/\S+/){ $PVM_FASTA_run  = $vars{'PVM'}; print "\n# PVM opt is set\n";     };
		if($vars{'M'}  =~/\S+/){ $machine_readable = $vars{'M'};           };

		if($char_opt=~/r/){    $add_range          = 'r' }
		if($char_opt=~/o/){    $over_write         = 'o' }
		if($char_opt=~/c/){    $create_sso         = 'c' }
		if($char_opt=~/s/){    $single_big_mspa     = 's'; print "\n# Single file opt is set\n"; }
		if($char_opt=~/M/){    $machine_readable   = 'M' }
		if($char_opt=~/N/){    $No_processing      = 'N'; $create_sso='c'; }
		print "\n# (INFO) do_fasta_and_ssearch: $algorithm,$out_file_mspa_name,$input_file_name,$E_val,DB=$sequence_DB,$k_tuple,$machine_readable\n";

		if(!$algorithm or !$input_file_name or !$sequence_DB){
				print "\n# (ERROR) One of \$sequence_DB, \$input_file_name, \$algorithm is missing\n";
				die;
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (1) Machine readable opt for fasta and ssearch set ??
		#____________________________________________________
		if($machine_readable=~/M/){
		   if(-s "./Bin/$algorithm"){
					 print "\n# (INFO) Running $algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple\n";
					 @temp=`./Bin/$algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple`;
			 }else{
					 print "\n# (INFO) Running $algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple\n";
					 @temp=`$algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple`;
			 }
		}else{
		   if(-s "./Bin/$algorithm"){
					 print "\n# (INFO) Running $algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple\n";
					 @temp=`./Bin/$algorithm -H  -E $E_val $input_file_name $sequence_DB $k_tuple`;
			 }else{
					 print "\n# (INFO) Running $algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple\n";
					 @temp=`$algorithm -H  -E $E_val $input_file_name $sequence_DB $k_tuple`;
			 }
		}
		print "\n# (INFO) \@temp has ",scalar(@temp), " lines @temp!\n" if $verbose;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (2) @temp is parsed directly by open_sso_files to make MSP hashes
		#_______________________________________________________________________
		@mspa_hashes_from_temp = @{&open_sso_files(\@temp, $add_range, "u=$upper_expect_limit", "l=$lower_expect_limit")};
		if(@mspa_hashes_from_temp < 1){
				print "\n# (ERROR) do_sequence_search : Error, something is wrong with open_sso_files, LINE=", __LINE__, "\n";
				die;
		}else{   print "\n# (INFO) Good, \@mspa_from_temp has ",scalar(@mspa_hashes_from_temp), " lines !\n";   }
		@mspa_from_temp= values %{$mspa_hashes_from_temp[0]};
		print "\n# (INFO) @mspa_from_temp\n" if $verbose;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (3) Writing MSP file, and gzipping if possible
		#________________________________________________
		open(MSP, ">$out_file_mspa_name") or die "\n# (ERROR) $out_file_mspa_name could not be open\n";
		for(@mspa_from_temp){    print MSP $_;  }
		close MSP;
		$gzipped_mspa_file=${&compress_files_by_gzip($out_file_mspa_name)};

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (4) making SSO file if asked
		#__________________________________________________
		if($create_sso){
			 $seq_name=${&get_base_names($input_file_name)};
			 if($algorithm=~/ssearch/){
					$out_file_sso_name="$seq_name\.ssso";
					$out_file_sso_name_gz="$seq_name\.ssso\.gz";
			 }elsif($algorithm=~/fasta/){
					$out_file_sso_name="$seq_name\.fsso";
					$out_file_sso_name_gz="$seq_name\.fsso\.gz";
			 }
			 open(SSO, ">$out_file_sso_name");
			 for(@temp){  print SSO $_;  }; close (SSO);
			 if($machine_readable){
					 $out_file_msso_name="$seq_name\.msso";
					 &cp( $out_file_sso_name, $out_file_msso_name);
			 }
			 @gzipped_sso_files=@{&compress_files_by_gzip($out_file_sso_name, $out_file_msso_name)};

			 if(@gzipped_sso_files > 0){  print "\n# (INFO) @gzipped_sso_files are created"; }
		}
		if(-s $gzipped_mspa_file and !(-s $out_file_mspa_name)){
			 print "\n# (INFO) $gzipped_mspa_file is found and will be returned";
			 return(\$gzipped_mspa_file);
		}elsif(-s $out_file_mspa_name){
			 print "\n# (INFO) $out_file_mspa_name is found and will be returned";
			 return(\$out_file_mspa_name);
		}
}## end of do_fasta_and_ssearch



#__________________________________________________________________
# Title     : do_intermediate_sequence_search
# Usage     : &do_intermediate_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_mspa, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Function  :
# Example   : &do_intermediate_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_mspa, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   :
#             Query_seqs=  for enquiry sequences eg)  "Query_seqs=$ref_of_hash"
#             DB=   for target DB  "DB=$DB_used"
#             File= to get file base(root) name.  "File=$file[0]"
#             m  for MSP format directly from FASTA or Ssearch result than through sso_to_mspa to save mem
#             s  for the big single output (mspa file output I mean)
#             o  for overwrite existing xxxx.fasta files for search
#             c  for create SSO file (sequence search out file)
#             R  for adding ranges to the enquiry sequences as well.
#             k= for k-tuple value. default is 1 (ori. FASTA prog. default is 2)
#             u= for $upper_expect_limit
#             l= for $lower_expect_limit
#             a= for choosing either fasta or ssearch algorithm
#
# Returns   : the names of files created (xxxxx.mspa, yyy.mspa,,)
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------------------
sub do_intermediate_sequence_search{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@final_out, $add_range, $single_big_mspa, $base_name, $create_sso, @nondup,
	   $Single_mspa_out_file, %duplicate, $Evalue_thresh, $Score_thresh, @SSO, $sequence_DB,
	   @sso, @temp, $algorithm, $margin, $out_mspa_file, @MSP, @final_mspa_file_names_out,
	   $upper_expect_limit, $lower_expect_limit, $k_tuple, %seq_input, %MSP, $add_range_to_enquiry );
	my ($E_val) = 5;  ## default 5 <<<<<<<<<<<<<<<<<<<<<

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# DEFAULTS
	#________________________________________
	$k_tuple=1;
	$algorithm='fasta';
	$upper_expect_limit=10;
	$lower_expect_limit=0;
	$Score_thresh      =75;
	$margin            =0;
	$add_range         ='';
	$sequence_DB       =$ENV{'PDB40D_FASTA'};

	if($vars{'a'}=~/\S+/){ $algorithm          = $vars{'a'}            };
	if($vars{'u'}=~/\d+/){ $upper_expect_limit = $vars{'u'}            };
	if($vars{'l'}=~/\d+/){ $lower_expect_limit = $vars{'l'}            };
	if($vars{'k'}=~/\d+/){ $k_tuple            = $vars{'k'}            };
	if($vars{'t'}=~/\d+/){ $Score_thresh       = $vars{'t'}            };
	if($vars{'m'}=~/\d+/){ $margin             = $vars{'m'}            };
	if($vars{'r'}=~/\S+/){ $add_range          = 'r'                   };
	if($vars{'s'}=~/\S+/){ $single_big_mspa     = 's'                   };
	if($vars{'DB'}=~/\S+/){ $sequence_DB       = $vars{'DB'}           };
	if($vars{'File'}=~/\S+/){ $input_file_name = $vars{'File'}         };
	if($vars{'Query_seqs'}=~/\S+/){ %seq_input = %{$vars{'Query_seqs'}}};
	if($vars{'e'}         =~/\S+/){ $E_val     = $vars{'e'}            };

	if($char_opt=~/r/){    $add_range            = 'r' }
	if($char_opt=~/R/){    $add_range_to_enquiry = 'R'  }
	if($char_opt=~/c/){    $create_sso           = 'c' }
	if($char_opt=~/s/){    $single_big_mspa       = 's'; print "\n# Single file opt is set\n"; }
	if($char_opt=~/m/){    $mspa_directly_opt     = 'm' }
	if($char_opt=~/i/){    $do_intermediate_search   = 'i' }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	 #  When, you didn't use "DB=$XXX" and "File=$FXXX" format, first file input is DB etc
	 #_______________________________________________________________________________________
	 if($input_file_name=~/^$/){  $input_file_name=$file[1];
	  print "\n# You did not use \"File=\$XXX\" format\n"  };
	 if($sequence_DB=~/^$/){      $sequence_DB    =$file[0];
	  print "\n# You did not use \"DB=\$XXX\" format\n"   };
	 print "\n# Finished writing the enquiry fasta files from \%seq_input by write_fasta_seq_by_seq";
	 print "\n# I am in do_sequence_search sub, Target database used :  $sequence_DB \n";

	 my $base = ${&get_base_names($input_file_name)};
				 $out_mspa_file="$base\.mspa";
	 @temp=`$algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple`;
	 if(@temp < 40){	  print "\n# There must be error , \@temp is too small\n\n";   }
	 my @mspa_hashes_from_temp = @{&open_sso_files(\@temp, $add_range,
												"u=$upper_expect_limit",
												"l=$lower_expect_limit",
												$add_range_to_enquiry)};
	 my @mspa_from_temp= values %{$mspa_hashes_from_temp[0]};
	 $MSP{$out_mspa_file} = \@mspa_from_temp;
	 open(MSPOUT, ">$out_mspa_file");
	 for($i=0; $i< @mspa_from_temp; $i++){
						 print MSPOUT $mspa_from_temp[$i];
						 print $mspa_from_temp[$i];
	 }
	 close MSPOUT;
	 return(\$out_mspa_file);
}


#____________________________________________________________________________________
# Title     : do_sequence_search
# Usage     : &do_sequence_search("Query_seqs=\%pdb_seq", "DB=$sequence_DB",
#  		         "File=$ARGV[0]", $single_mspa, $over_write,
# 	        	 "u=$upper_expect_limit", "l=$lower_expect_limit",
#       		 "k=$k_tuple", $No_processing );
# Function  : do FASTA, SSEARCH or BLASTPGP(psi-blast) search
# Example   : &do_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_mspa, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Keywords  : sequence_search
# Options   :
#             Query_seqs=  for enquiry sequences eg)  "Query_seqs=$ref_of_hash"
#             DB=   for target DB  "DB=$DB_used"
#             File= to get file base(root) name.  "File=$file[0]"
#             m  for MSP format directly from FASTA or Ssearch result than through sso_to_mspa to save mem
#             s  for the big single output (mspa file output I mean)
#             s= for the single big mspa file name
#             o  for overwrite existing xxxx.fasta files for search
#             c  for create SSO file (sequence search out file)
#             d  for very simple run and saving the result in xxxx.gz format in sub dir starting with one char
#             r  for reverse the query sequence
#             R  for attaching ranges of sequences
#             k= for k-tuple value. default is 1 (ori. FASTA prog. default is 2)
#             u= for $upper_expect_limit
#             l= for $lower_expect_limit
#             E= for $Evalue_thresh
#             a= for choosing either fasta or ssearch algorithm
#             d= for defining the size of subdir made. 2 means it creates
#                    eg, DE while 1 makes D
#             d  for $make_gz_in_sub_dir_opt, putting resultant sso files in gz format and in single char subdir
#             D  for $make_mspa_in_sub_dir_opt, convert sso to mspa and put in sub dir like /D/, /S/
#             n  for new format to create new mspa file format with sso_to_mspa routine
#          PVM=  for PVM run of FASTA (FASTA only)
#             M  for machine readable format -m 10 option
#             M= for machine readable format -m 10 option
#             N  for 'NO' do not do any processing but, do the searches only.
#       FILE_AGE for defining the age of file in days to be overwritten.
#             L  for Lean output(removes xxxx.fasta query seq file)
#   $take_last_iter_psi_bla=l by l
#
# Returns   : the names of files created (xxxxx.mspa, yyy.mspa,,)
# Version   : 5.8
#----------------------------------------------------------------------------------------
sub do_sequence_search{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my (@final_out, $add_range, $single_big_mspa, $base_name, $create_sso, @nondup,
         $Single_mspa_out_file, %duplicate, $Evalue_thresh, $Score_thresh, @SSO, $sequence_DB,
         @sso, @temp, $algorithm, $margin, $out_mspa_file, @MSP, @final_mspa_file_names_out,
         $upper_expect_limit, $lower_expect_limit, $k_tuple, %seq_input, %MSP, $No_processing,
         $new_format, $PVM_FASTA_run, $over_write, $sub_dir_size, $age_in_days_of_out_file,
         $over_write_by_age, $Lean_output, $gzipped_mspa_file, $gzipped_sso_file,
         $defined_all_ok, $make_mspa_in_sub_dir_opt, $upper_expect_limit, $Evalue_thresh,
         $take_last_iter_psi_bla );

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # DEFAULTS
    #________________________________________
    $k_tuple           =1;  # 1 or 2, 1 is more sensitive
    $algorithm         ='fasta';
    $sub_dir_size      =2;  # the default char number taken from seq name to make sub dirs
    $upper_expect_limit=5;
    $lower_expect_limit=0;
    $Score_thresh      =75; # FASTA or SSSEARCH score
    $margin            =0;  # sequence region margin. If it is 2, 2 more edged residues will be added
    $add_range         ='';
    $pwd               =`pwd`; chomp($pwd);
    $age_in_days_of_out_file=1000; ## If the files being tested is older than this, let's make anew

    if($vars{'a'}=~/\S+/){ $algorithm          = $vars{'a'}            };
    if($vars{'u'}=~/\d+/){ $upper_expect_limit = $vars{'u'}            };
    if($vars{'l'}=~/\d+/){ $lower_expect_limit = $vars{'l'}            };
    if($vars{'k'}=~/\d+/){ $k_tuple            = $vars{'k'}            };
    if($vars{'t'}=~/\d+/){ $Score_thresh       = $vars{'t'}            };
    if($vars{'m'}=~/\d+/){ $margin             = $vars{'m'}            };
    if($vars{'d'}=~/\d+/){ $sub_dir_size       = $vars{'d'}            };
    if($vars{'r'}=~/\S+/){ $add_range          = 'r'                   };
    if($vars{'s'}=~/\S+/){ $single_big_mspa     = 's'                   };
    if($vars{'DB'}=~/\S+/){            $sequence_DB=$vars{'DB'} ;
        if(-s $sequence_DB){
        }elsif(-s "../$sequence_DB"){  $sequence_DB= "../$sequence_DB"
        }elsif(-s "../../$sequence_DB"){  $sequence_DB= "../../$sequence_DB";
        }else{
                print "\n# (E) do_sequence_search: You set DB param, but I can\'t find $sequence_DB\n";
                die;
        }
    }else{  print "\n# (E) do_sequence_search: I need DB param defined, sorry, aborting\n"; }

    if($vars{'FILE'}=~/\S+/){ $query_File = $vars{'FILE'}; push(@file,$query_File) };
    if($vars{'File'}=~/\S+/){ $query_File = $vars{'File'}; push(@file,$query_File) };
    if($vars{'FILE_AGE'}=~/\S+/){ $age_in_days_of_out_file= $vars{'FILE_AGE'};  };
    if($vars{'Query_seqs'}=~/\S+/){ %seq_input = %{$vars{'Query_seqs'}}};
    if($vars{'Query'}=~/\S+/){      %seq_input = %{$vars{'Query'}}};
    if($vars{'u'}    =~/\S+/){ $upper_expect_limit          = $vars{'u'}            };
    if($vars{'PVM'}  =~/\S+/){ $PVM_FASTA_run  = $vars{'PVM'}; print "\n# PVM opt is set\n";     };
    if($vars{'M'}  =~/\S+/){ $machine_readable = $vars{'M'};           };
    if($vars{'E'}  =~/\S+/){ $Evalue_thresh = $vars{'E'};           };

    if($char_opt=~/l/){    $take_last_iter_psi_bla= 'l' }
    if($char_opt=~/r/){    $add_range             = 'r' }
    if($char_opt=~/L/){    $Lean_output           = 'L' }
    if($char_opt=~/o/){    $over_write            = 'o' }
    if($char_opt=~/c/){    $create_sso            = 'c' }
    if($char_opt=~/s/){    $single_big_mspa        = 's'; print "\n# Single file opt is set\n"; }
    if($char_opt=~/m/){    $mspa_directly_opt      = 'm' }
    if($char_opt=~/M/){    $machine_readable      = 'M' }
    if($char_opt=~/d/){    $save_in_gz_in_sub_dir = 'd' }
    if($char_opt=~/D/){$make_mspa_in_sub_dir_opt   = 'D' } # for simple search and storing mspa file
    if($char_opt=~/N/){    $No_processing         = 'N'; $create_sso='c'; }
    if($char_opt=~/q/){    $make_MSP_files        = 'q' }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
    # When no %seq is given, but files
    #___________________________________________
    if(@hash==0 and @file > 0){
         print "\n# (i) do_sequence_search: You did not put sequences as in \%seq, but raw sequence file @file!\n";
         print "        I will run \'open_fasta_files\' sub to fetch sequences to store in \%seq_input\n";
         %seq_input=%{&open_fasta_files(\@file)};
         unless(%seq_input > 1){
            print "\n !!!! \%seq_input hash is too small, Error opening \@file : @file, in do_sequence_search sub \n\n";
            die;
         }
    }else{
         #print "\n# (i) do_sequence_search: I will use given seqs in \%seq_input from \%\{\$hash\[0\]\}\n";
         %seq_input=%{$hash[0]};
    }
    my (@seq_names) = keys %seq_input;

    $base_name = ${&get_base_names($query_File)};
    print "\n# (i) line:",__LINE__, ", do_sequence_search, \$algorithm => $algorithm, \$base_name:$base_name
                         $query_File <--> $sequence_DB\n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
    # If one of the files is missing, ask if it is for self self search and
    #  make query=target, else die
    #_______________________________________________________________________
    $defined_all_ok=&check_if_defined($query_File, $sequence_DB);
    unless($defined_all_ok){
         print "\n  Did you want to do self self search? ->(y/n) ";
         $answer_for_self_self=getc;
         if($answer_for_self_self =~/y/i){
              if($query_File){  $sequence_DB=$query_File }
              else{ $query_File=$sequence_DB }
         }else{
              print "\n# You seemed made a mistake, O.K., I will kill myself!\n\n";
              print chr(7);  die;
         }
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # [0] If blast is chosen run Blast
    #_________________________________________________________
    if($algorithm=~/[psi\-]*[pb][last]*/i){
         print "\n# (i) Doing PSI search with @file\n";
         @final_out=@{&do_psi_blast_search(\@file, "d=$source_DB_file",
                                            "i=$input_seq_file",
                                            $over_write,
                                            $make_mspa_in_sub_dir_opt,
                                            $Lean_output,
                                            $make_MSP_files,
                                            $take_last_iter_psi_bla)};
         return(\@final_out); #<<<<<<----------- F I N I S H
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # [1] Controlling which kind of search it should do. Do save_in_gz_in_sub_dir first if d is set
    #______________________________________________________________________________________________
    if( $make_mspa_in_sub_dir_opt ){  ## convert sso to mspa and put in sub dir like /D/, /S/
        print "[1] \$make_mspa_in_sub_dir_opt";
        for($x=0; $x < @seq_names; $x++){
             my ($over_write_sso_by_age, $over_write_mspa_by_age,  %single_seq, $out_file_sso_gz_name, $out_file_mspa_name, $out_file_gz_name, $existing_sso);
             my ($seq_name, $seq)= ($seq_names[$x], $seq_input{$seq_names[$x]});
             my $first_char= substr("\U$seq_name", 0, $sub_dir_size);
             mkdir ("$first_char", 0777) unless -d $first_char;
             chdir("$first_char");
             #print "\n# (i) do_sequence_search: You set \'d\' or \'D\' opt\n";
             #print "# (i) making subDIRs ($first_char) with $seq_name $sequence_DB to store MSP files\n";

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # Let's make each fasta file for each seq to be used in searching
             #_____________________________________________________________________
             my $temp_file_name="$seq_name.fasta";
             %single_seq=($seq_name, $seq_input{$seq_name});
             &write_fasta(\%single_seq, $temp_file_name ); ## e for writing each file

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # Making output file name according to the option given
             #_______________________________________________________________________
             if($machine_readable and $algorithm=~/[fastassearch]+/){  $out_file_sso_name="$seq_name\.msso";
             }else{ $out_file_sso_name="$seq_name\.sso";      }
             $out_file_sso_gz_name    ="$out_file_sso_name\.gz";
             $out_file_mspa_name       ="$seq_name\.mspa";
             $out_file_gz_name        ="$seq_name\.mspa\.gz";

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # Check if SSO file already there
             #_______________________________________________________________________
             if(-s $out_sso_file){ $existing_sso=$out_file_sso_name }
             elsif(-s $out_sso_gz_name){ $existing_sso=$out_file_sso_gz_name }
             if(-s $out_mspa_name){ $existing_mspa=$out_file_mspa_name }
             elsif(-s $out_gz_name){ $existing_mspa=$out_file_gz_name }

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # If the dates of files created are long ago, overwrite to refresh
             #____________________________________________________________________
             if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){  $over_write_sso_by_age='o';  }
             if(  (localtime(time- (stat($existing_mspa))[9]))[3] > $age_in_days_of_out_file ){  $over_write_mspa_by_age='o';  }

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             #  To check if the target seq DB is in ../
             #________________________________________________
             if(-s $sequence_DB){     #print "\n# (i) Good, target \$sequence_DB $sequence_DB is in this working dir\n";
             }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB"; }

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # [1.1] Searching: Making MSP files directly,  MSP file format is the major format used in prescop!, Default
             #_____________________________________________________________________________________________________________
             if($char_opt =~/D/){ #### To make MSP file
                 print "\n [1.1] \$char_opt = D";
                 if( !(-s $out_file_gz_name or -s $out_file_mspa_name) or $over_write or $over_write_mspa_by_age){
                      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                      # (2.1) Running  run_fasta_sequence_search !!
                      #_______________________________________________________
                      print "\n  [1.1.1] Running  run_fasta_sequence_search, \$create_sso = $create_sso!!\n";
                      $gzipped_mspa_file=${&run_fasta_sequence_search(
                                                  "a=$algorithm",
                                                  "O=$out_file_mspa_name",
                                                  "File=$temp_file_name", "u=$upper_expect_limit", "E=$Evalue_thresh",
                                                  "DB=$sequence_DB", "k=$k_tuple", "$machine_readable", $create_sso,
                                                  $verbose)};

                      $gzipped_sso_file=${&compress_files_by_gzip($out_file_sso_name)};
                   }else{
                      print "\n#  [1.1.2] Line No. ", __LINE__,", $out_file_gz_name already exists or
                                     \$over_write is set or NOT older than $age_in_days_of_out_file\n";
                   }
             }
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # [1.2] To make gzipped SSO files and MSP files
             #_______________________________________________
             elsif($create_sso or $char_opt=~/m/){ ### To make gzipped SSO files
                  print "\n [1.2] \$char_opt = D";
                      if( !(-s $out_file_sso_name or -s $out_file_sso_gz_name ) or $over_write or $over_write_sso_by_age){
                          print "\n#  [1.2.1] Running  run_fasta_sequence_search with \"\$create_sso option\"!!\n\n";
                          $gzipped_mspa_file=${&run_fasta_sequence_search(
                                               "a=$algorithm",
                                               "O=$out_file_mspa_name", "$create_sso",
                                               "File=$temp_file_name", "u=$upper_expect_limit", "E=$Evalue_thresh",
                                               "DB=$sequence_DB", "k=$k_tuple", "$machine_readable", $create_sso,
                                               $verbose)};

                          $gzipped_sso_file=${&compress_files_by_gzip($out_file_sso_name)};
                      }else{
                          print "\n#  [1.2.2]Line No. ", __LINE__,", $out_file_gz_name already exists or
                                      \$over_write is set or NOT older than $age_in_days_of_out_file\n";
                      }
             }else{
                  if( !(-s $out_file_sso_name or -s $out_file_sso_gz_name ) or $over_write or $over_write_sso_by_age){
                          system(" $algorithm -m 10 -H  -E $upper_expect_limit $temp_file_name $sequence_DB $k_tuple > $out_file_sso_name");
                          system("gzip $out_file_sso_name");
                  }else{
                          print "\n#  Line No. ", __LINE__,", $out_file_gz_name already exists or
                                      \$over_write is set or NOT older than $age_in_days_of_out_file\n";
                  }
             }
             if(-s "$seq_name.fasta"){  unlink("$seq_name.fasta")
             }elsif(-s "$first_char/$seq_name.fasta"){ unlink("$first_char/$seq_name\.fasta") ; }
             #print "\n# Sub dir $first_char and $seq_name\.mspa has been made, finishing do_sequence_search\n";
             chdir ('..');
          }
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # F I N I S H
          #________________________________________
          goto EXIT;
	 } # if ($char_opt =~/[dD]/){  is finished


	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # (2) Writing on PWD. This is the big single MSP output
	 #____________________________________________________________
	 $Single_mspa_out_file="$base_name\.mspa" if($single_big_mspa eq 's');
	 if(-s $Single_mspa_out_file and !$over_write ){
			 print "\n# (i) $Single_mspa_out_file exists, and no \$over_write is set, skipping \n";
			 push(@final_out, $Single_mspa_out_file);
	 }else{  $over_write  ='o';  }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # Check if it is necessary to write each sequences.fasta files
	 #______________________________________________________
	 if( $over_write ){  &write_fasta_seq_by_seq(\%seq_input, 'e'); } ## e for writing each seq file

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	 #  When, you didn't use "DB=$XXX" and "File=$FXXX" format, first file input is DB etc
	 #_______________________________________________________________________________________
	 $defined_all_ok=&check_if_defined($query_File, $sequence_DB);
	 if(!$defined_all_ok){ print "\n# (E) FATAL: do_sequence_search: You did not use \"DB=\$XXX\" format\n"; die   };

	 print "\n# Finished writing the enquiry fasta files from \%seq_input by write_fasta";
	 print "\n# I am in do_sequence_search sub, Target database used :  $sequence_DB with seqs of \'@seq_names\'\n";


	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  Main search with given @seq_names
	 #______________________________________________________________
	 for($j=0; $j< @seq_names; $j++){  # @seq_names has sequence names coming from  (@seq_names) = keys %seq_input;
			 my ($over_write_sso_by_age, @temp, $existing_sso, $out_gz_name,
					 $over_write_mspa_by_age, $existing_mspa, $out_mspa_file, $seq_name);
			 $seq_name=$seq_names[$j];
			 $each_seq_fasta="$seq_name\.fasta";
			 $out_mspa_file="$seq_name\.mspa";
			 $out_gz_name="$seq_name\.mspa\.gz";
			 $out_msso_file="$seq_name\.msso";

			 &die_if_file_not_present($each_seq_fasta);

			 print "\n# (i) :-) Found $each_seq_fasta is searched against $sequence_DB\n";
			 if($algorithm=~/fasta/){       $out_sso_file="$seq_name\.fsso";
			 }elsif($algorithm=~/ssearch/){ $out_sso_file="$seq_name\.ssso"; }
			 $out_sso_gz_name="$out_sso_name\.gz";

			 if(-s $out_sso_file){ $existing_sso=$out_sso_file }
			 elsif(-s $out_sso_gz_name){ $existing_sso=$out_sso_gz_name }
			 if(-s $out_mspa_file){ $existing_mspa=$out_mspa_file }
			 elsif(-s $out_gz_name){ $existing_mspa=$out_gz_name }
			 if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
						$over_write_sso_by_age='o';
			 }
			 if(  (localtime(time- (stat($existing_mspa))[9]))[3] > $age_in_days_of_out_file ){
						$over_write_mspa_by_age='o';
			 }

			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 #  To check if the target seq DB is in ../
			 #________________________________________________
			 if(-s $sequence_DB){ print "\n# (i) \$sequence_DB $sequence_DB exists, Good\n";
			 }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB";
			 }elsif( -s "../../$sequence_DB"){ $sequence_DB="../../$sequence_DB"; }

			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # If MSP file already exist
			 #_____________________________________________________________
			 if( -s $out_mspa_file and !$over_write_mspa_by_age and !$over_write ){
						print "\n# (i) File: $out_mspa_file exists, skipping, to overwrite use \'o\' opt or set days";
						push(@final_out, $out_mspa_file);
			 }else{  ## -E is for e value cutoff. -b is for num of seq fetched
					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~~~~~~~~~~~~~~
					 #  K-tuple is 1 by default. If xxxx.sso exsts, skip running fasta or ssearch
					 #________________________________________________________________________________
					 if(-s $out_sso_file and !$over_write ){ ## If SSO is already present, JUST READ IT!
								print "\n# (i) Just opening existing $out_sso_file $out_sso_file $out_mspa_file $over_write_mspa_by_age $over_write\n";
                                open(SSO_ALREADY, "$out_sso_file") || warn "\nWarnning. Can not open $out_sso_file\n";
								@temp=<SSO_ALREADY>;
								print "\n# (i) \@temp has ", scalar(@temp), " lines\n";
								close(SSO_ALREADY);
								&compress_files_by_gzip($out_sso_file);
					 }else{ ## Run FASTA HERE
							print "\n# (i) Running \"run_fasta_sequence_search\" ";
							$gzipped_mspa_file=${&run_fasta_sequence_search( "a=$algorithm",
																 "O=$out_mspa_file", "$create_sso",
																 "File=$each_seq_fasta", "E=$upper_expect_limit",
																 "DB=$sequence_DB", "k=$k_tuple", "$machine_readable")};
							push(@final_out, $gzipped_mspa_file) if -s $gzipped_mspa_file ;
							unlink($each_seq_fasta) if $Lean_output;
					 }
			 }
			 if($machine_readable and $create_sso and -s $out_sso_file){ &cp($out_sso_file, $out_msso_file); }
	 } # end of for($j=0; $j< @seq_names; $j++){
	 return(\@final_out);
	 EXIT:

} # do_sequence_search



#__________________________________________________________________________
# Title     : do_hmm_sequence_search
# Usage     : &do_hmm_sequence_search(\@file, "method=$default_search_method",
#								$over_write, "DB=$pdbd40_seq_fasta");
#
# Function  : does hmm sequence search using Sean Eddy's HMMER (hmmls, hmmfs)
# Example   :
# Keywords  : do_seq_search_with_hmm, do_hmmt_sequence_search
# Options   :
#    "method=ls"  for turning hmmls search option on (default)
#    "method=fs"  for turning hmmfs search option on
#    method= by method=
#   o  for overwriting existint xxxxx.hmm files
#   E=Enguiry_name    for specifying enquiry seq name rather than 'HMM', the default
#   t=20  for score thresh at the level of hmmls. Default of hmmls is 0. example showed has 15
#   $evalue_cutoff= by e=
#   $over_write = o by -o o
# Returns   :
# Argument  :
# Version   : 1.6
#----------------------------------------------------------------------------
sub do_hmm_sequence_search{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my($version_number, @out_hmm_file_names, $evalue_cutoff);
		my $score_thresh=5; # default threshold

		$evalue_cutoff=3;
		$default_search_method='hmmsearch';
		$version_number=2; ## default

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# checking the existence of hmm search file
		#_____________________________________________________
		if(&check_file_exists_in_path("hmmsearch")){
				$default_search_method='hmmsearch';
				$version_number=2;
		}elsif(&check_file_exists_in_path("hmmls")){
				$default_search_method='hmmls';
				$version_number=1;
		}else{
				print "\n# (ERROR) $0 can not find hmmsearch or hmmls, Please put them in the PATH\n\n";
				if($vars{'method'}=~/ls/){
				}elsif( $vars{'method'}=~/fs/){ $default_search_method='hmmfs';
				}else{ die; }
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
		# Checking the target DB file
		#______________________________________________________________
		if( $vars{'DB'} =~/\S/ and -s $vars{'DB'}){  $target_DB = $vars{'DB'}
		}else{  print "\n# (WARN) I need target DB to search for hmmls-fs. Use: DB=xxxx.fasta form";
				print "\n#     or Default PDB40D_FASTA ENV setting will be used for DB\n";
				$target_DB= $ENV{'PDBD40_SEQ_FASTA'};
				unless(-s $target_DB){
					 print "\n# (ERROR) Even the default DB setting $target_DB does not exist, check path/file\n\n";
					 die;
				}
		}
		if($vars{'E'}=~/\S/){ $enquiry_name        =$vars{'E'} }
		if($vars{'t'}=~/\S/){ $score_thresh        =$vars{'t'} }
		if($vars{'e'}=~/\S/){ $evalue_cutoff       =$vars{'e'} }

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# running hmm with @file (hmmb files) against target_DB
		#____________________________________________________________
		for($i=0; $i< @file; $i++){
				print "\n# Running do_hmm_sequence_search with $file[$i], options are: $char_opt\n";
				if($vars{'E'}=~/\S/){ $base=$enquiry_name;   # When $enquiry_name is given, it uses for output name
				}else{
						$base=${&get_base_names($file[$i])};
				}

				if($default_search_method=~/hmmfs/){
						$output_hmm_result = "$base\.hmmfs";
				}elsif($default_search_method=~/hmmsearch/ or $default_search_method=~/hmmls/){
						$output_hmm_result = "$base\.hmmls";
				}
				if($char_opt=~/o/ or !(-s $output_hmm_result) ){
						if($version_number==2){
								print "Running: $default_search_method -T $score_thresh -E $evalue_cutoff $file[$i] $target_DB \> $output_hmm_result\n";
								system("$default_search_method -T $score_thresh -E $evalue_cutoff $file[$i] $target_DB > $output_hmm_result");
						}else{
								print "Running: $default_search_method -t $score_thresh $file[$i] $target_DB \> $output_hmm_result\n";
								system("$default_search_method -t $score_thresh $file[$i] $target_DB > $output_hmm_result");
						}
				}else{
						print "\n# The $out_hmm_file file already exists. To overwrite use -o opt\n";
				}
				push(@out_hmm_file_names, $output_hmm_result);
		}
		if(@out_hmm_file_names > 1){
			 return(\@out_hmm_file_names);
		}else{
			 return(\$out_hmm_file_names[0]);
		}
}

#_______________________________________________________________________
# Title     : divide_clusters
# Usage     : &divide_clusters(\@file);
# Function  : This is the main funciton for divclus.pl
#               divides complex single linkage cluster into smaller duplication
#               module level sub clusters.
# Example   : &divide_clusters(\@file, $verbose, $range, $merge, $sat_file,
# 	                $dindom, $indup, "T=$length_thresh", "E=$Evalue_thresh", $over_write,
#                   $optimize, "s=$score", "f=$factor");
#
# Keywords  : divicl, divclus, div_clus, divide clusters
# Options   : _  for debugging.
#   f=<digit>   for determing the factor in filtering out non-homologous
#                  regions, 7 = 70% now!!
#   l=<digit>   for seqlet(duplication module) length threshold
#   t=<digit>   for seqlet(duplication module) length threshold
#                  (same as l opt, confusing, huh? )
#   s=<digit>   for score threshold
#   E=<digit>   for evalue threshold
#   z           for activating remove_similar_sequences, rather than remove_dup....
#   o           for overwriting
#   v           for verbose printout (infor)
#   D           for dynamic factor
#   S  $short_region=  S by S -S  # taking shorter region overlap in removing similar reg
#   L  $large_region=  L by L -L  # taking larger  region overlap in removing similar reg
#   A  $average_region=A by A -A  # taking average region overlap in removing similar reg
#   o  for $over_write
#
# Version   : 3.3
#------------------------------------------------------------------------
sub divide_clusters{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($merge, $verbose, $sat_file, $length_thresh, $factor, $indup, $indup_percent,
         $score, @temp_show_sub, $optimize, $file, $Evalue_thresh, $over_write, $din_dom,
         $sum_seq_num, $base_1, $output_clu_file, $short_region, $large_region,
         $average_region, $dynamic_factor, @sub_clustering_clu_files,
         @splited1, $link_or_not,  %duplicate);

    $Evalue_thresh=0.001; # the default
    $factor=7; # default factor is 7 for 70%
    $length_thresh=30;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Dealing with options
    #_________________________________________
    if($char_opt=~/m/){        $merge='m';
    }if($char_opt=~/v/){       $verbose='v'; # for showing debugging information
    }if($char_opt=~/i/){       $indup='i';
    }if($char_opt=~/z/){       $optimize='z';
    }if($char_opt=~/o/){       $over_write='o';
    }if($char_opt=~/d/){       $din_dom='d';
    }if($char_opt=~/s/){       $sat_file='s';
    }if($char_opt=~/y/){       $dynamic_factor='y';
    }if($char_opt=~/S/){       $short_region  ='S';
    }if($char_opt=~/L/){       $large_region  ='L';
    }if($char_opt=~/A/){       $average_region='A';
    }if($vars{'T'}=~/\d+/){    $length_thresh= $vars{'T'};
    }if($vars{'l'}=~/\d+/){    $length_thresh= $vars{'l'}; ## synonym of 't'
    }if($vars{'f'}=~/\S+/){    $factor= $vars{'f'};
    }if($vars{'s'}=~/\d+/){    $score = $vars{'s'};
    }if($vars{'e'}=~/\d+/){    $Evalue_thresh= $vars{'e'}; # synonym of e
    }if($vars{'E'}=~/\d+/){    $Evalue_thresh= $vars{'E'}; # synonym of e
    }
    $percent_fac=$factor*10; # <-- this is just to show the factor in %
    print "\n(i) Input to divide_clusters sub are: \"@file\"";
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # (0) When one file input was given (yes, divclus can handle multiple files, Sarah!)
    #________________________________________________________________________________
    if(@file == 1){  #<=== @file has xxxx.mspa, yyyy.mspa  zzzz.mspa ....,
		$file=$file[0];
		$base_1=${&get_base_names($file)};
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (2) Define the output cluster file name:  eg, 3-232_cluster_F7.clu , F7 means factor used is 7
		#______________________________________________________________________________________________
		$output_clu_file="$base_1\_F${factor}\.clu";

		if( !$over_write and -s $output_clu_file){
			print "\n# $output_clu_file Already EXISTS, skipping. Use \'o\' opt to overwrite\n"; exit;
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (3) merge_sequence_in_mspa_file does not do much. Just filtering and producing
		#     sequences in ISPA_PBS_21-215 VPR_PBS_160-354 format from mspa format
		#________________________________________________________________________________
        print "\n(i) Running merge_sequence_in_mspa_file";
        @grouped_seq_names=@{&merge_sequence_in_mspa_file(\@file, "s=$score", $optimize, $din_dom, $sat_file,
							$optimize, "T=$length_thresh", "E=$Evalue_thresh", "f=$factor", "$range", "$merge", $verbose,
							$short_region, $large_region, $average_region, $over_write, $dynamic_factor)};

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (4) This is critical seqlet merging step. Up to now, things are fine usually.!!!
		#________________________________________________________________________________
		unless(@grouped_seq_names == 1){  ##  if @grouped_seq_names has one string like 'FAM_8_7 FAM_8_4 FAM_8_3' skip
			F1: for($i=0; $i< @grouped_seq_names; $i++){
                @splited1=split(/\s+/, $grouped_seq_names[$i]);
				for($j=0; $j< @grouped_seq_names; $j++){
    				 if($grouped_seq_names[$i] eq $grouped_seq_names[$j]){ next  }
					 @splited2=split(/\s+/, $grouped_seq_names[$j]);
                     $link_or_not=${&check_linkage_of_2_similar_seqlet_sets(\@splited1,
                                                                           \@splited2,
                                                                           "f=$factor")};
					if($link_or_not){
                        $optimize=1; ## This should be nearly always 1 !!!!!!!
                        if($optimize){ ##---- This will also remove similar seqlets, not only identical ones
                            $grouped_seq_names[$i]=join(' ', sort @{&remove_similar_seqlets( [@splited1, @splited2],
																		$short_region, $large_region, $average_region)} );
     				    }else{
							$grouped_seq_names[$i]=join(' ', grep { ! $duplicate{$_}++ } (@splited1, @splited2) );
					    }
                        splice(@grouped_seq_names, $j,1);
						$j--; $i--; next F1;
					}
				}

             }
		}
		#~~~~~~~~~~~~~~ I used to use a sub, but to save time above is inserted ~~~~~~~~~~~~~
        #@grouped_seq_names=@{&cluster_merged_seqlet_sets(\@grouped_seq_names, $dynamic_factor,
	    #				 "f=$factor", $short_region, $large_region, $average_region, $optimize)};

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# (5) This is showing the result in clu file format
				#________________________________________________________________________________
                @temp_show_sub=&show_subclusterings(\@grouped_seq_names, $file, $sat_file, $dindom, $indup,
						   "E=$Evalue_thresh", "p=$percent_fac", "f=$factor" );
				$good_bad       = $temp_show_sub[0];
				$indup_c        = $temp_show_sub[1];
				$sum_seq_num   += $temp_show_sub[2];
				push(@sub_clustering_out_files, @{$temp_show_sub[3]});

				if($good_bad==1){      push(@good, $file);
				}else{                 push(@bad, $file);       }

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# (6) Final write up stage (unecessary)
				#_______________________________________________________________
          &write_good_bad_list_in_divide_clusters(\@good, \@bad);

	 }
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # when more than one single file input is given (Default usually)
	 #_________________________________________________________________
	 elsif(@file >1 ){
			 my (@good, @bad);
			 if($indup =~/i/i){   open (INDUP, ">indup_stat\.txt");  } # this is not essential.

			 for($i=0; $i< @file; $i++){
						my (@grouped_seq_names, @temp_show_sub, $indup_c, $big_mspa_file);
						$indup_c=0;
						$big_mspa_file=$file[$i];
						unless(-s $big_mspa_file){ print "\n# (E) \$big_mspa_file does not exist\n"; exit }

						$base_1=${&get_base_names($big_mspa_file)};
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# (1) Define the output cluster file name:  eg, 3-232_cluster_F7.clu , F7 means factor used is 7
						#______________________________________________________________________________________________
						$output_clu_file="$base_1\_F${factor}\.clu";

						if( !$over_write and -s $output_clu_file){
							print "\n# $output_clu_file Already EXISTS, skipping. Use \'w\' opt to overwrite\n";
							next;  }

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						#  (2) If clu file(eg 2-1618_ss.clu ) is in pwd, tries to skip
						#____________________________________________________________
						if((-s $output_clu_file) > 10 and $over_write !~/o/){
							print "# $output_clu_file exists, skipping, use \"o\" option to overwrite\n";  next;
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# (3) merge_sequence_in_mspa_file does not do much. Just filtering and producing
						#     sequences in ISPA_PBS_21-215 VPR_PBS_160-354 format of STRING from mspa format
						#     $big_mspa_file is an MSPA file
						#________________________________________________________________________________
                        @grouped_seq_names=@{&merge_sequence_in_mspa_file(\$big_mspa_file, "s=$score", $din_dom, $sat_file, $optimize,
																"T=$length_thresh", "E=$Evalue_thresh", "f=$factor", "$range", "$merge", $verbose, $over_write,
																 $short_region, $large_region, $average_region, $dynamic_factor )};
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						#  (4) Clustering the sets of merged seqlets => CORE algorithm
						#_____________________________________________________________________
						unless(@grouped_seq_names == 1){  ##  if @grouped_seq_names has one string like 'FAM_8_7 FAM_8_4 FAM_8_3' skip
								F2: for($g=0; $g< @grouped_seq_names; $g++){
										@splited1=split(/ +/, $grouped_seq_names[$g]);
										for($h=0; $h< @grouped_seq_names; $h++){
												if($grouped_seq_names[$g] eq $grouped_seq_names[$h]){ next  }
												@splited2=split(/ +/, $grouped_seq_names[$h]);
												$link_or_not=${&check_linkage_of_2_similar_seqlet_sets(\@splited1, \@splited2, "f=$factor")};
												if($link_or_not){
														if($optimize){ ##---- This will also remove similar seqlets, not only identical ones
															 $grouped_seq_names[$g]=join(' ', sort @{&remove_similar_seqlets( [@splited1, @splited2],
																													 $short_region, $large_region, $average_region)} );
														}else{
															 $grouped_seq_names[$g]=join(' ', grep { ! $duplicate{$_}++ } (@splited1, @splited2) );
														}
														splice(@grouped_seq_names, $h, 1); $h--; $g--; %duplicate=(); next F2;
												}
										}
								}
						}
						#~~~~~~~~~~~~~~ I used to use a sub, but to save time above is inserted ~~~~~~~~~~~~~
						#@grouped_seq_names=@{&cluster_merged_seqlet_sets(\@grouped_seq_names, "f=$factor", $optimize, $dynamic_factor,
						#			 $short_region, $large_region, $average_region)};
						@temp_show_sub=&show_subclusterings(\@grouped_seq_names, $big_mspa_file, $sat_file, $dindom, $indup,
																										"E=$Evalue_thresh", "p=$percent_fac", "f=$factor");
												$good_bad       = $temp_show_sub[0];
												$indup_c        = $temp_show_sub[1];
												$sum_seq_num   += $temp_show_sub[2];
						push(@sub_clustering_out_files, @{$temp_show_sub[3]});

						if($good_bad==1){          push(@good, $big_mspa_file);
						}else{         push(@bad, $big_mspa_file);       }

					}
					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
					&write_good_bad_list_in_divide_clusters(\@good, \@bad);
					sub write_good_bad_list_in_divide_clusters{
							 my  (@good, $i, @bad); @good=@{$_[0]}; @bad=@{$_[1]};
                             open(GOODBAD, ">good_bad.list") || warn "\n Can not open good_bad.list \n\n";
							 print GOODBAD "GOOD: all link    : 000\n";
							 for($i=0; $i< @good; $i++){  print GOODBAD "$good[$i]\n";  }
							 print GOODBAD "BAD : Not all link: 000\n";
							 for($i=0; $i< @bad; $i++){   print GOODBAD "$bad[$i]\n";   }
							 close(GOODBAD);
					}
					#_______________________________________________________________

	 }
	 return(\@sub_clustering_out_files); # contains (xxxx.clu, yyy.clu,, )
}







#______________________________________________________________________________
# Title     : remove_file_extension
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub remove_file_extension{
		my (@modified_files, $i, @files);
		@files=@_;
		for($i=0; $i< @files; $i++){
				$base=${&get_base_names($files[$i])};
				rename($files[$i], $base);
				push(@modified_files, $base);
		}
		return(\@modified_files);
}




#______________________________________________________________________________
# Title     : remove_lines_in_text_files
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub remove_lines_in_text_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($line_counter, $line_num_from, $line_num_to);
    if($vars{'r'}=~/(\S*)\-(\S*)/){
       if($1){
           $line_num_from=$1;
       }else{
           $line_num_from=0;
       }
       if($2){
           $line_num_to=$2;
       }else{
           $line_num_to=10000000000000000;
       }
    }
    if($vars{'f'}=~/\S/ and  $vars{'t'}=~/\S/){
       $line_num_from=$vars{'f'};
       $line_num_to=$vars{'t'}
    }
    for($i=0; $i< @files; $i++){
       $file=$files[$i];
       print "\n Opening $file with From: $line_num_from TO: $line_num_to\n\n";
       open(FILE, "<$file") || die "\n Can not open $file \n\n";
       while(<FILE>){
           $line_counter++;
           if($line_counter <= $line_num_from or $line_counter >= $line_num_to){
               print;
           }
       }
       close(FILE);
    }
    return(\"$line_num_from\-$line_num_to");
}





#______________________________________________________________________________
# Title     : remove_entry_line_in_fasta_file
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub remove_entry_line_in_fasta_file{
    my($fasta_in_file, $remove_count);
    $fasta_in_file=${$_[0]} || $_[0];
    open(FASTA, "<$fasta_in_file") || die "\n cannot open $fasta_in_file\n";
    while(<FASTA>){
       if(/\>/){ $remove_count++; next
       }else{
          print
       }
    }
    return(\$remove_count);
}




#______________________________________________________________________________
# Title     : remove_small_files
# Usage     : @files_removed=@{&remove_small_files(@ARGV)};
# Function  :
# Example   :
# Keywords  : delete_small_files
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub remove_small_files{
	my($file_size_cut_line, $creation_time, $size, @files_removed, $i, @files);

    @files=@_;
    for($i=0; $i< @files; $i++){
        if($i == 0 and !(-s $files[$i])){
           $file_size_cut_line=$files[$i];
           splice(@files, $i, 1); $i--;
        }elsif($i == 1 and !(-s $files[$i])){
           $file_size_cut_line_bigger_than=$files[$i];
           splice(@files, $i, 1); $i--;
        }
    }
    if(@files < 1){
        @files=@{&read_file_names_only('.')};
    }

    for($i=0; $i< @files; $i++){
        $size= -s $files[$i];
        $creation_time= localtime( (stat($files[$i]))[9] );
        if($size <= $file_size_cut_line and $size >= $file_size_cut_line_bigger_than){
            unlink($files[$i]);
            push(@files_removed, $files[$i]);
            print "\n# (i) $files[$i] is removed , size= $size byte, $creation_time";
        }
    }
    print "\n$0 finished. You might have killed some useful files :-)  \n\n\n\n";
    return(\@files_removed);
}



#______________________________________________________________________________
# Title     : remove_mail_header_in_files
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub remove_mail_header_in_files{
		my (@modified_files, $i, @files);
		@files=@_;
		for($i=0; $i< @files; $i++){
				my ($line_count);
				&cp("$files[$i]", "$files[$i]\.bak");
				open(FILE_BAK, "$files[$i]\.bak");
				open(FILE, ">$files[$i]");
				while(<FILE_BAK>){
					 $line_count++;
					 if($line_count > 33){
							 print FILE $_; next;
					 } # No point in looking very far down !
					 if(/^From\s+\S/){               next;
					 }elsif(/^Received\:\s+/){           next;
					 }elsif(/^Message\-Id\:\s+/){           next;
					 }elsif(/id \S+; \S+ \d+\s+\S+\s+\d{4} \d+\:\d+:\d+/){   next;
					 }elsif(/by \w+\.\w+\.\w+\.+\w+ \(+\d/){   next;
					 }elsif(/for\s+\<\w+\@\w+\.\w+\S*\>\; \S+\, \d+ \S+\s+\d+/){   next;
					 }elsif(/^Date\:\s+/){           next;
					 }elsif(/^From:\s+/){            next;
					 }elsif(/^To:\s+/){              next;
					 }elsif(/^Subject\:\s+/){         next;
					 }elsif(/^Status\:\s+/){         next;
					 }elsif(/^X\-Mozilla\-/){         next;
					 }elsif(/^X\-UIDL\:\s+/){         next;
					 }else{                   print FILE;
					 }
				}
				close(FILE_BAK);
				close(FILE);
				if(-s $files[$i] > ( $original_file_size - 400) ){
						push(@modified_files,$files[$i]);
						print "\n# (i) $files[$i] has real size, I am removing $files[$i]\.bak\n";
				}else{
						print "\n# (i) The file size of new $files[$i] is a bit small, I am leaving $files[$i]\.bak\n";
				}
		}
		return(\@modified_files);
}

#________________________________________________________________________
# Title     : subtract_similar_seq_elements
# Usage     : @subs = @{&subtract_similar_seq_elements(\@match_seqs1, \@match_seqs2, "Percent_similarity=80")};
# Function  : removes any occurances of certain elem. of the first
#             input array with second input array.
# Example   :
#
#     @match_seqs1=('xxxx_1-30', 'YYYYY_30-44', 'ZZZZ_1-4000');
#     @match_seqs2=('xxxx_4-32', 'YYYYY_25-41', 'ZZZZ_2000-3000');
#
# Keywords  : remove_similar_seq_elements, subtract_similar_sequence_elements
#              remove_similar_sequence_elements
# Options   :
# Returns   :
# Argument  :
#     $Percent_similarity= by Percent_similarity=
#
# Authors   : jong@biosophy.org
# Version   : 1.0
#--------------------------------------------------------------------
sub subtract_similar_seq_elements{
		my($i, $j, @seq_match_array1, @seq_match_array2, $Percent_similarity, %counter,
		   @diff_elements, $seq1, $start1, $end1, $tail1,
		   $seq2, $start2, $end2, $tail2, $diff_start, $diff_end,
		   $leng1, $leng2, $smaller_leng, @truly_diff_elements);
		$Percent_similarity=80; # 80% similarity is accepted
		$leng_thresh=10;

		@seq_match_array1=@{$_[0]};
		@seq_match_array2=@{$_[1]};
		if($_[2]=~/Percent_similarity=(\S+)/){
				$Percent_similarity=$1;
		}

		grep($counter{$_}++, @seq_match_array2 );
		@diff_elements= grep(!$counter{$_}, @seq_match_array1);

		for($i=0; $i< @diff_elements; $i++){  ## @diff_elements is from  @seq_match_array1
			 if($diff_elements[$i]=~/^(\S+)_(\d+)\-(\d+)(\S*)/){
					 ($seq1, $start1, $end1, $tail1)=($1, $2, $3, $4);
					 for($j=0; $j< @seq_match_array2; $j++){
							if($seq_match_array2[$j]=~/^(\S+)_(\d+)\-(\d+)(\S*)/){
									($seq2, $start2, $end2, $tail2)=($1, $2, $3, $4);
									#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
									# Check if the seqs are identicl (from the two arrays), no point to merge which are not identical from the first
									#__________________________________________________________________________________________
									if($seq1 eq $seq2){
											 $diff_start=abs($start1-$start2);
											 $diff_end  =abs($end1  -$end2  );
											 $leng1=$end1-$start1;
											 $leng2=$end2-$start2;
											 if($leng1 >= $leng2){
													 $smaller_leng=$leng2;
													 $larger_leng =$leng1
											 }else{
													 $smaller_leng=$leng1;
													 $larger_leng =$leng2
											 }

											 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
											 # Checking the minimal seq region leng here
											 #______________________________________________________
											 if($smaller_leng < $leng_thresh){ next }
											 $overlapping_seq_match_size=${&get_overlapping_seq_match_size($start1, $end1, $start2, $end2)};
											 $averge_seq_leng_of_2_seqs=($leng1+$leng2)/2;

											 $finally_adjusted_seq_leng=$smaller_leng*($Percent_similarity/100);

											 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
											 # Now let's check if we regard them homologous or not\
											 #_______________________________________________________
											 if( $overlapping_seq_match_size >=  $finally_adjusted_seq_leng){
													 push(@truly_diff_elements, $diff_elements[$i]);
											 }
									 }
							 }
					  }
				}
		} # END of for($i=0; $i< @diff_elements; $i++){
		return(\@truly_diff_elements);
}



#_____________________________________________________________________________
# Title     : remove_similar_seqlets
# Usage     : @seqlets=@{&remove_similar_seqlets(\@split)};
# Function  : merges(gets average starts and ends ) of similar
#             seqlets to reduce them into smaller numbers. This can also handle
#              names like XLBGLO2R_8-119_d1hlm__.
#
# Example   : @seqlets=@{&remove_similar_seqlets(\@mrg1, $mrg2, \@mrg3)};
#               while @mrg1=qw(M_2-100 M_2-110 M_8-105 M_4-108 N_10-110 N_12-115);
#                     $mrg2='Z_3-400 Z_2-420';
#                     @mrg3=('X_2-300 X_3-300', 'X_2-300', 'X_5-300 X_2-301' );
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : merge_sequence_names, merge_seq_names, merge_sequence_ranges
#             merge_seq_ranges
# Options   : _  for debugging.
#             #  for debugging.
#             f= for factor
#             S  for shorter region matched is used
#             A  for average region matched is used
#             L  for larger region matched is used
#
# Version   : 2.1
#-------------------------------------------------------------------------------
sub remove_similar_seqlets{
	 my ($i, $j, $seq1, $smaller_leng, $leng1, $leng2, $start1, $end1, $seq2, $start2,
	   $av_diff, $num_of_seq, $av_end, $av_start, $end2, @seqlets,
	   @array_input, @seqlet, $tail1, $tail2, $shorter_region, $larger_region,
	   $average_region, $factor);
	 $factor=7;  ## !!! This var makes big difference in the final clustering
	 $average_region = 'A'; ## default is to get the average of comparing regions

	 for($i=0; $i< @_; $i++){
	    if(ref($_[$i]) eq 'ARRAY'){
		     @array_input=@{$_[$i]};
		     for($j=0; $j<@array_input; $j++){
			      @seqlet=split(/\s+/, $array_input[$j]);
					  push(@seqlets, @seqlet);
		     }
		     #if($verbose){
				 #   print "\n# remove_similar_seqlets: ARRAY ref is given as input\n";
				 #   print "#  They are: @seqlets\n";
				 #}
	    }elsif($_[$i]=~/f=(\S+)/){   $factor=$1
	    }elsif($_[$i]=~/^(S)\s*$/){   $shorter_region=$1 ; $average_region=0;
	    }elsif($_[$i]=~/^(L)\s*$/){   $larger_region =$1 ; $average_region=0;
	    }elsif($_[$i]=~/^(A)\s*$/){   $average_region=$1 ; $shorter_region=$larger_region=0;
	    }elsif($_[$i]=~/\S+\_\d+\-\d+/){
		     push(@seqlets, split(/\s+/, $_[$i]) );
	    }elsif(ref($_[$i]) eq 'SCALAR' and ${$_[$i]}=~/\S+\_\d+\-\d+/){
	       push(@seqlets, split(/\s+/, ${$_[$i]}) );
	    }
	 }
	 #print "\n# remove_similar_seqlets : I am using \$factor : $factor\n" if $verbose;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # Sorting is necessary as I am not doing the real thorough comparison
	 #______________________________________________________________________
	 $num_of_seq=@seqlets=sort @seqlets;

	 my ($short_start, $large_start, $short_end, $large_end);

	 for($i=0; $i< @seqlets; $i++){
			if($seqlets[$i]=~/^\s*(\S+)_(\d+)\-(\d+)(\S*)/){  ## last (\S*) is necessary for XLBGLO2R_8-119_d1hlm__
				 my($seq1, $start1, $end1, $tail1)=($1, $2, $3, $4);
				 if($seqlets[$i+1]=~/^(\S+)_(\d+)\-(\d+)(\S*)/){
						 ($seq2, $start2, $end2, $tail2)=($1, $2, $3, $4);
						 if($seq1 eq $seq2){
								$diff_start=abs($start1 - $start2);
								$diff_end  =abs($end1   - $end2  );
								$leng1=$end1-$start1;
								$leng2=$end2-$start2;

								if($leng1 >= $leng2){ $smaller_leng=$leng2; }else{ $smaller_leng=$leng1; }
								if( ($diff_start+$diff_end)/2 <= $smaller_leng*($factor/10) ){

										if($average_region){
											 $av_start=int(($start1+$start2) / 2);
											 $av_end  =int(($end1 + $end2) / 2);
															 $seqlets[$i]="$seq1\_$av_start\-${av_end}$tail1";  # $tail1 is for names like XLBGLO2R_8-119_d1hlm__
															 # print "\n# new seqlet : $seqlets[$i]\n" if $verbose;
											 splice(@seqlets, $i+1, 1);
											 $i--;
										}else{
											 if($start1 < $start2){
											 $short_start=$start2; $large_start=$start1;  ## note that short start should be $start2 if $start2 is bigger
											 }else{
													$short_start=$start1; $large_start=$start2;
											 }
											 if($end1 < $end2){
													$short_end=$end1;  $large_end=$end2;
											 }else{
													$short_end=$end2;  $large_end=$end1;
											 }
											 if($shorter_region){
													 $seqlets[$i]="$seq1\_$short_start\-${short_end}$tail1";
											 }elsif($larger_region){
													 $seqlets[$i]="$seq1\_$large_start\-${large_end}$tail1";
											 }

											 splice(@seqlets, $i+1, 1);
											 $i--;
										}
								}
						 }
		     }
	    }
	 }
	 #print "\n# (3) remove_similar_seqlets: The final out are: @seqlets\n" if $verbose;
	 return(\@seqlets);
}


#______________________________________________________________________________
# Title     : show_FDAT_hash
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub show_FDAT_hash{
    my(@structure, $i,@subdomains, $structure, $j, @details);
    @structure=keys %{$_[0]};

    for($i=0; $i< @structure; $i++){
      $structure=$structure[$i];
      @subdomains=keys %{$FDAT_hash{$structure}};
      for($j=0; $j< @subdomains; $j++){
         @details=@{$FDAT_hash{$structure}{$subdomains[$j]}};
         print "\n$structure $subdomains[$j] : @details \n";
      }
    }
}


#______________________________________________________________________________
# Title     : show_codon_lookup_table_hash
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub show_codon_lookup_table_hash{
    my(%amino_acid_codon_lookup_table, $i, $amino_acid, @amino_acid_residues);
    %amino_acid_codon_lookup_table=%{$_[0]};
    @amino_acid_residues=sort keys %amino_acid_codon_lookup_table;
    for($i=0; $i< @amino_acid_residues; $i++){
        $amino_acid=$amino_acid_residues[$i];
        print "$amino_acid : @{$amino_acid_codon_lookup_table{$amino_acid}}\n";
        push(@codon_table, "$amino_acid : @{$amino_acid_codon_lookup_table{$amino_acid}}");
    }
    return(\@codon_table);
}



#______________________________________________________________________________
# Title     : show_alignment_for_herta
# Usage     :
# Function  :
# Example   :
# Keywords  : show_seqlet_alignment
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.3
#------------------------------------------------------------------------------
sub show_alignment_for_herta{
    my($herta_input_file, $sequence_1_file, $sequence_2_file,
       @seq1_aligned, @seq2_aligned, @scores, %align_position_seq1_seq2_rv,
       %align_position_seq1_seq2, $herta_input_file_rv, %sequence_1, %sequence_2,
       %sequence_rv, %sequence, @positions1_rv, @positions2_rv,
       @positions1, @positions2, @seq1_start, @seq2_start, @residues1, @residues2,
       @seq1_start_final, @seq2_start_final, @scores_gap, @seq2_start_final_rv,
       @seq1_start_final_rv, $query_seq, $DB_seq);

    $herta_input_file=${$_[0]} || $_[0];
    $herta_input_file_rv=${&attach_suffix_in_file_name($herta_input_file, 'rv', 'e')};

    $sequence_1_file=${$_[1]} || $_[1];
    $sequence_2_file=${$_[2]} || $_[2];
    $query_seq      =${$_[3]} || $_[3];
    $DB_seq         =${$_[4]} || $_[4];
    #print "\n $sequence_1_file $sequence_2_file\n\n";

    %sequence_1=%{&open_fasta_files(\$sequence_1_file)};
    %sequence_2=%{&open_fasta_files(\$sequence_2_file)};
    @keys1=keys %sequence_1;
    @keys2=keys %sequence_2;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # In herta, there is no seq name, so I note them as 1 and 2
    #______________________________________________________________
    $sequence{1}=$sequence_1{$keys1[0]}; # the first key is for seq (only 1 key in fact)
    $sequence{2}=$sequence_2{$keys2[0]};
    $sequence_rv{1}=$sequence_1{$keys1[0]}; #!!!!!!!!!!!!!!!! <<<<<<<<<<<<
    $sequence_rv{2}=reverse $sequence_2{$keys2[0]};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #  $herta_align_hash{$seq_name}{$seq_pos}=$align_score;
    #_______________________________________________________________
    %align_position_seq1_seq2   =%{&open_herta_files(\$herta_input_file)};
    %align_position_seq1_seq2_rv=%{&open_herta_files(\$herta_input_file_rv)};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # temporarily assign 1, 2 as herta can not distinguish them
    #______________________________________________________________
    ($seq_name1, $seq_name2)=(1,2);

    @residues1   =split(//, $sequence{$seq_name1});
    @residues2   =split(//, $sequence{$seq_name2});
    @residues1_rv=split(//, $sequence_rv{$seq_name1});
    @residues2_rv=split(//, $sequence_rv{$seq_name2});
    @positions1   =sort {$a <=> $b} keys %{$align_position_seq1_seq2{$seq_name1}};
    @positions2   =sort {$a <=> $b} keys %{$align_position_seq1_seq2{$seq_name2}};
    @positions1_rv=sort {$a <=> $b} keys %{$align_position_seq1_seq2_rv{$seq_name1}};
    @positions2_rv=sort {$a <=> $b} keys %{$align_position_seq1_seq2_rv{$seq_name2}};

    $herta_align_score   =${&align_herta_inputs_with_adjustments(\@residues1, \@residues2,
                                                              \@positions1, \@positions2,
                                                              \%align_position_seq1_seq2,
                                                              $query_seq,
                                                              $DB_seq)};

    $herta_align_score_rv=${&align_herta_inputs_with_adjustments(\@residues1_rv, \@residues2_rv,
                                                              \@positions1_rv, \@positions2_rv,
                                                              \%align_position_seq1_seq2_rv,
                                                              $query_seq,
                                                              "$DB_seq\_rv")};

    ($seq1_len, $seq2_len)=(@residues1, @residues2);
    $forward_minus_backward_score = $herta_align_score - $herta_align_score_rv;
    print "# ForBackScore: $forward_minus_backward_score, ForScore: $herta_align_score, BackScore: $herta_align_score_rv\n\n";
}





#______________________________________________________________________________
# Title     : show_ENV_hash
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub show_ENV_hash{
    my(@keys, $i);
    @keys=sort keys %ENV;

    for($i=0; $i< @keys; $i++){
       print "\n $keys[$i]  $ENV{$keys[$i]}";
    }
}




#______________________________________________________________________________
# Title     : show_DDDF_hash
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub show_DDDF_hash{
    my(%DDDF_hash, @DDD_IDs, $i);
    %DDDF_hash=%{$_[0]};
    @DDD_IDs=sort keys %DDDF_hash;
    for($i=0; $i <@DDD_IDs; $i++){
       print "$DDD_IDs[$i] @{$DDDF_hash{$DDD_IDs[$i]}}\n";
    }
}




#______________________________________________________________________________
# Title     : show_FSSP_hash
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub show_FSSP_hash{
    my($i);
    my %FSSP_alignment_1to1_residue_map=%{$_[0]};
    @rep_names=keys %FSSP_alignment_1to1_residue_map;
    for($i=0; $i< @rep_names; $i++){
       print ">>$rep_names[$i]\n";
       @homol_names=keys %{$FSSP_alignment_1to1_residue_map{$rep_names[$i]}};
       for($j=0; $j<@homol_names; $j++){
          @rep_posi=sort {$a<=>$b} keys %{$FSSP_alignment_1to1_residue_map{$rep_names[$i]}{$homol_names[$j]}};
          print "  >$rep_names[$i] <=> $homol_names[$j]\n";
          for($k=0; $k< @rep_posi; $k++){
              print "    R: $rep_posi[$k] - H: $FSSP_alignment_1to1_residue_map{$rep_names[$i]}{$homol_names[$j]}{$rep_posi[$k]}\n";
          }
       }
    }
    print "\n Finished running  show_FSSP_hash\n";
}


#______________________________________________________________________________
# Title     : show_DALI_domain_table_hash
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub show_DALI_domain_table_hash{
    my(%DOM_table_hash);
    %DOM_table_hash=%{$_[0]};
    @DALI_str_ID=keys %DOM_table_hash;
    for($i=0; $i< @DALI_str_ID; $i++){
       @DALI_node_ID= keys %{$DOM_table_hash{$DALI_str_ID[$i]}};
       for($j=0; $j<@DALI_node_ID; $j++){
          print "$DALI_str_ID[$i] $DALI_node_ID[$j] @{$DOM_table_hash{$DALI_str_ID[$i]}{$DALI_node_ID[$j]}}\n";
       }
    }
    print "\n";

}



#______________________________________________________________________________
# Title     : show_information_on_perd
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  : Tk
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub show_information_on_perd{
    my ($t) = @_;
    my ($count_time);
    my ($perd_version)='0.1';

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Prepare for choosing bg color randomly
    #_________________________________________
    my(@colors_array)=qw(cyan bisque steelblue pink yellow white red blue green
                         black violet darkgreen wheat grey20 green4);
    until($bg_color ne $fg_color and $fg2 ne $bg2 and $fg_color ne $bg2){
       srand(time()|$$);
       ($bg_color, $fg_color, $fg2, $bg2)=@{&pick_randomly(\@colors_array, 4)};
    }

    $message="  PERD version $perd_version Jong Park.  ";
    my $d=$t->{'show_information_on_perd'}=$t->Toplevel(-popover => 'cursor', -popanchor => 'nw');
    $frame1=$d->Frame(-label=>'1st Frame -relief ridge Button', -relief=>ridge,  -borderwidth=>3)->pack;
    $frame2=$d->Frame(-label=>'2nd Frame -relief groove Button', -relief=>groove, -borderwidth=>4)->pack;
    $frame3=$d->Frame(-label=>'3rd Frame -relief raised Containing Button', -relief=>raised, -borderwidth=>5)->pack;
    $frame4=$d->Frame(-label=>'4th Frame -relief sunken Containing Button', -relief=>sunken, -borderwidth=>3)->pack;

    $label1=$frame1->Label(-fg=>$fg_color, -bg=>$bg2,
                      -text=>'I am PERD',
                      -font=>'12x24');

    $label1->pack( -fill => 'both', -expand => 1 );
    $m1 = $frame2->Label(-fg=>$fg_color, -bg=>$bg_color,
                    -textvariable => \$message, -width => 30, -font=>'10x20' );
    $m2 = $frame4->Label(-wrap=>1, -fg=>$fg_color, -bg=>$bg_color,
                    -textvariable => \$message2, -width=>1, -height=>3 );
    $m3 = $frame4->Label(-wrap=>1, -fg=>$bg2, -bg=>$fg_color,
                    -textvariable => \$message3, -width=>1, -height=>3 );
    $m4 = $frame4->Label(-wrap=>1, -fg=>$fg_color, -bg=>$bg_color,
                    -textvariable => \$message4, -width=>1, -height=>3 );
    $m5 = $frame4->Label(-wrap=>1, -fg=>$bg_color, -bg=>$fg2,
                    -textvariable => \$message5, -width=>1, -height=>3 );
    $m6 = $frame4->Label(-wrap=>1, -fg=>$fg2, -bg=>$bg_color,
                    -textvariable => \$message6, -width=>1, -height=>3 );
    $m7 = $frame4->Label(-wrap=>1, -fg=>$bg_color, -bg=>$fg_color,
                    -textvariable => \$message7, -width=>1, -height=>3 );
    $m1->pack();
    $m2->pack(-anchor=>'w', -side=>left);
    $m3->pack(-anchor=>'w', -side=>left);
    $m4->pack(-anchor=>'w', -side=>left);
    $m5->pack(-anchor=>'e', -side=>right);
    $m6->pack(-anchor=>'e', -side=>right);
    $m7->pack(-anchor=>'e', -side=>right);

    $CLOSE_button=$frame3->Button( -text    =>'O.K., Enough..', -bg=>$fg_color, -borderwidth=>4,
                               -command => sub{ $d->withdraw; $count_time=0; }
                              )->pack;

    after(2, \&scroll_it);
    ($bg_color, $fg_color, $fg2, $bg2)=@{&pick_randomly(\@colors_array, 4)};
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # scroll_it, from terry@encompass.is.net (Contrib/TickerTape)
    #_______________________________________________________
    sub scroll_it {
        $count_time++;
        if($count_time > 50){ $d-> withdraw; $count_time=0  }
        $message =~ /(.)(.*)/;
        $message=$message4="$2$1";
        $message2=$message5=$message7=reverse($message);
        $message3=$message6=${&scramble_string(\$message)};
        $t->after(250, \&scroll_it);
    }

}

#______________________________________________________________________________
# Title     : show_written_html_file_content
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub show_written_html_file_content{
		my(@lines);
		my($html_file_input)=$_[0];
		open(PSUB_FILE, "<$html_file_input") or die "Couldn't open $html_file_input";
		print "\n<br># (i) Following lines were written<br><br><pre><B><font COLOR=\"#DC143C\">\n";
		@lines=<PSUB_FILE>;
		for(@lines){
			 print $_, "<br>";
		}
		print "</pre></font>";
		return(\$html_file_input);
}


#__________________________________________________________________________
# Title     : show_subclusterings
# Usage     : &show_subclusterings(\@out);
# Function  : This is the very final sub of divclus.pl
# Example   : @temp_show_sub=&show_subclusterings(\@out, $file, $sat_file, $dindom, $indup);
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : print_subclusterings, sum_subclusterings, write_subclustering
#             show_clusterings, display_subclusterings
# Options   :
#             f  for file output, eg: xxxxxxx.sat
# Category  :
# Version   : 2.9
#-------------------------------------------------------------------------
sub show_subclusterings{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my ($max_size, $sat_file_name, $clu_file_name,
	$ori_cluster_size, $ori_cluster_num, $good_bad, @keys, $percentage_fac,
	$indup, @sizes, $sum_seq_num, $indup_percent, $indup_count, %tem4,
	@sub_clustering_out_files);  # clusall_1e-5_clu_14-324_ss.sat
	my @out=@{$array[0]};
	$indup_count=0;

	if($char_opt=~/d/){	    $dindom=1;	}
	if($char_opt=~/i/){		$indup=1;	}
	if($vars{'f'}=~/\S+/){     $factor= $vars{'f'}; }
	if($vars{'p'}=~/\d+/){ $percentage_fac= int($vars{'p'}); }
	if($vars{'s'}=~/\d+/){	   $score = $vars{'s'};	}
	if($vars{'e'}=~/\d+/){	   $evalue= $vars{'e'};	}

	print "\n# (1) show_subclusterings : \@file has : @file";
    if( $file[0]=~/([\S+_]*?(\d+)\-(\d+)[_\w]*)\.mspa/  or
		$file[0]=~/([\S+_]*?(\d+)\-(\d+)[_\w]*)\.sat/   ){
         $ori_cluster_size=$2;
         $ori_cluster_num =$3;
         $base=$1;
		 $sat_file_name="$base\.sat";
         $clu_file_name="$base\.clu";
	}else{
         $ori_cluster_size="0000";
	     $ori_cluster_num ="0000";
	     $base=${&get_base_names($file[0])};
	     $clu_file_name="$base\.clu";
		 warn "\n# (2) LINE:",__LINE__," WARN: the \@file input to show_subclusterings is not the right format, dying\n";
		 warn "     Sarah!, right format looks like: 13-234.mspa or 8-420_cluster.mspa \n";
	}

	open(CLU, ">$clu_file_name") || die "\n# (ERROR) show_subclusterings failed miserably to CREATE \"$clu_file_name\" \n";
	push(@sub_clustering_out_files, $clu_file_name);


	@out=@{&sort_string_by_length(\@out)};

	for($i=0; $i< @out; $i++){ # @out has ( 'YAL054C_98-695 YBR041W_90-617', 'YBR115C_230-842 YBR222C_16-537 YER015W_121-686', etc)
	   my $count+=$i+1;
	   my ( $int_dup_number, $sub_clu_size, $seq_with_range, @sp, $new_clus_NAME,
	        %tem, %tem2, %tem3, $j, @keys, $num_seq);
	   if($out[$i]=~/^\s*$/){ next }
	   @sp=sort split(/\s+/, $out[$i]);

	   for($j=0; $j < @sp; $j++){
		  $seq_with_range=$sp[$j];
		  if($seq_with_range=~/^((\S+)_((\d+)\-(\d+)))/){
			 $tem{$2}++;
			 $tem2{$2}.=sprintf("%-15s ", $1);
			 $tem3{$2} =$3;
			 $tem4{$2} =$5-$4;
		  }
	   }

	   @keys=sort keys %tem;
	   $num_seq=$sub_clu_size=@keys;

	   if($max_size < $sub_clu_size){
		  $max_size=$sub_clu_size; ## This is to collect the sizes of clusters to see if it is good.
	   }
	   $indup_count= &print_summary_for_divclus(
		         $count, \%tem2, \%tem,
		         $ori_cluster_num,
		         $ori_cluster_size,
		         $dindom,
		         $clu_file_name,
								 \%tem3, \%tem4,
								 $indup, );

					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					 # Local subroutine
					 #_______________________________________________________________
	   sub print_summary_for_divclus{ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							 my(@keys, $indup_count, $x, $m, $percentage_fac);
							 my $count=$_[0]; # count of cluster
	       my %tem2=%{$_[1]};	my $num_seq=@keys=sort keys %tem2;
	       my %tem=%{$_[2]};	my $ori_cluster_num=$_[3];
	       my $new_clus_NAME=$ori_cluster_num.'0'.$count.'0'.$num_seq;
	       my $ori_cluster_size=$_[4];
	       my $dindom=$_[5];	my %tem3=%{$_[7]};
	       my $indup=$_[9];	my (%internal_dup);
	       my %tem4=%{$_[8]};
							 #~~~~~~~~~~ Domain Inside Domain ~~~~~~~~~~~~~~~~~
	       if($dindom){
	          for($x=0; $x <@keys; $x++){
											 @domain_inside_domain=@{&get_domain_inside_domain($tem2{$keys[$x]})};
											 @domain_inside_domain=@{&remove_dup_in_array(\@domain_inside_domain)};
											 for($m=0; $m< @domain_inside_domain; $m++){ print "  # Dindom: $m : $domain_inside_domain[$m]\n";   }
											 print "\n";
		  }
							 }
							 #==========================================================================================

	       #~~~~~~~~~~ Internal duplication  ~~~~~~~~~~~~~~
	       if($indup==1){
		   # @keys is the same as sub cluster size,
		   for($x=0; $x < @keys; $x++){
														 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														 # Checks each sequence for duplication
														 #___________________________________________________
														 my %internal_dup=%{&get_internal_dup_in_a_cluster( $tem2{$keys[$x]} )};
														 my @dup_keys=keys %internal_dup;
														 if(@dup_keys > 0){
																		 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
																		 #  This calculates the actual duplicated number rather than jus tthe sequences
																		 #______________________________________________________________________________
																		 $indup_count++;
																		 printf ("%-14s %-12s %-4s", $keys[$x], $new_clus_NAME, $num_seq);
																		 for($m=0; $m< @dup_keys; $m++){
																						 printf ("%-19s=> %s\n", $dup_keys[$m], $internal_dup{ $dup_keys[$m] } );
																		 }
														 }
										}
								 }

								#~~~~~~~~~~ Summary ~~~~~~~~~~~~~~~~~~~~~~~~~~~
								print  CLU  "Cluster size $num_seq\n";
																				printf CLU ("Cluster number %-12s # E:%-5s Factor:%-2s P:%-2s, Ori size:%-4s Sub:%-4s From:%-12s\n",
																					$new_clus_NAME, $evalue, $factor, $percentage_fac,
																					$ori_cluster_size, $num_seq, $ori_cluster_num);
								print       "Cluster size $num_seq\n";
								printf     ("Cluster number %-12s # E:%-5s Factor:%-2s P:%-2s, Ori size:%-4s Sub:%-4s From:%-12s\n",
															$new_clus_NAME, $evalue, $factor, $percentage_fac,
															$ori_cluster_size, $num_seq, $ori_cluster_num);
								for($x=0; $x <@keys; $x++){
									 printf CLU ("   %-4s %-5s %-17s %-10s %-3s leng: %-s\n",
															 $num_seq, $ori_cluster_num, $keys[$x], $tem3{$keys[$x]}, $tem{$keys[$x]}, $tem4{$keys[$x]});
									 printf ("   %-4s %-5s %-17s %-10s %-3s leng: %-s\n",
													$num_seq, $ori_cluster_num, $keys[$x], $tem3{$keys[$x]}, $tem{$keys[$x]}, $tem4{$keys[$x]});
								}
								return($indup_count);
	   }
	}
		close(CLU); ## this is a bug fix

	if($max_size == $ori_cluster_size){   $good_bad=1;
	}else{	                              $good_bad=0;	}

    print "\n# Sarah, Do you think the subclusterings are O.K.?" if $verbose;
    print "\n#   Tell me, if you feel suspicious, jong\@salts.med.harvard.edu\n\n" if $verbose;
    return($good_bad, $indup_count, $ori_cluster_size, \@sub_clustering_out_files);
}






#__________________________________________________________________________
# Title     : exchange_query_with_match_in_mspa
# Usage     : @exchanged_mspa=@{&exchange_query_with_match_in_mspa(\@file)};
# Function  :
# Example   :
# Keywords  : swap_query_with_match_in_mspa, invert_query_with_match_in_mspa,
#             swap_query_seq_with_match_seq_in_mspa,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub exchange_query_with_match_in_mspa{
		 #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		 my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		 my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		 my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		 my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		 my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		 if($debug==1){print "\n\t\@hash=\"@hash\"
		 \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		 \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 my(%exchanged_mspa, @sorted_by_query_seq_names, @new_mspa_lines);
		 $open_mspa_files_x_opt = 'x';
		 if($char_opt=~/n/){ $names_only='n' }
		 %exchanged_mspa=%{&open_mspa_files(@file, $open_mspa_files_x_opt, $names_only )};

		 @new_mspa_lines=values %exchanged_mspa;
		 @sorted_by_query_seq_names=
				map{ $_->[0] } sort {$a->[1] cmp $b->[1]} map {/^\d+\s+\S+\s+\d+\s+\d+\s+(\S+)/ && [$_, $1] } @new_mspa_lines;
		 return(\@sorted_by_query_seq_names);
}

#______________________________________________________________________________
# Title     : swap_array_elements
# Usage     :
# Function  :
# Example   :
# Keywords  : swap_array_elem_positions, swap_positions_in_array
# Options   :
# Author    : jong@biosophy.org,
# Category  : Array-handling, Action=Swap
# Version   : 1.0
#------------------------------------------------------------------------------
sub swap_array_elements{
		my($pos1, $pos2, $array_ref)=@_;
		if($pos1 >= @{$array_ref} ){ $pos1=$#{$array_ref}; }
		if($pos2 >= @{$array_ref}){ $pos2 = $#{$array_ref};
			 print "\n# (i) \$pos2 is larger than last elem position";
		}

		@{$array_ref}[$pos2, $pos1]=@{$array_ref}[$pos1, $pos2];
		return($array_ref);
}


#______________________________________________________________
# Title     : get_internal_dup_in_a_cluster
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_internal_dup_in_a_cluster{
	$cluster_line=$_[0] || ${$_[0]};
	my($i, $j, @seq, %out, $seq_name);
	my $overlap_factor=40;
	my $min_inside_dom_size=40;
	@seq=split(/\s+/, $cluster_line);  ## These sequence are single seq with different regions
	@seq= map{$_->[0]} sort{$a->[1] cmp $b->[1] or $a->[2] <=> $b->[2] }
			             map {/^((\S+)_(\d+)\-(\d+)\s*.*)$/ && [$1, $2, $3, $4]} @seq;

	F1:for($i=0; $i< @seq; $i++){
	   $seq1=$seq[$i];
	   if($seq1=~/^(\S+)_(\d+)\-(\d+)/){
		  $seq_name=$1;
		  $start1=$2;
		  $end1=$3;
	   }
	   F:for($j=1; $j< @seq; $j++){
		  $seq2=$seq[$j];
		  if($seq1 eq $seq2){ next } ### Skip IDENTICAL ones (xxxx_1-10, xxxx_1-10)
		  if($seq2=~/^(\S+)_(\d+)\-(\d+)/){
			 $start2=$2;
			 $end2=$3;
		  }
		  $leng2=$end2-$start2;
		  $margin=$leng2/12;   ## 8% overlap is regarded as not overlapping

		  if(( ($start1+$margin) > $end2)||
		    ( ($start2+$margin) > $end1)){ # skips non overlapping seqlets

			$out{"$start1\-$end1"}.="$start2\-$end2 ";

			splice(@seq, $j, 1);
			$j--;
		  }
	   }
	}
	#@out=sort (@out);
	#@out=@{&remove_dup_in_array(\@out)};
	#@out=@{&remove_similar_seqlets(\@temp, "f=2")};
	return(\%out);
}

#______________________________________________________________
# Title     : get_domain_inside_domain
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : find_dindoms, domain_inside_domain, domain_in_domain
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_domain_inside_domain{
	$cluster_line=$_[0] || ${$_[0]};
	my($i, $j, @seq, @out);
	my $overlap_factor=40;
	my $min_inside_dom_size=40;
	@seq=split(/\s+/, $cluster_line);
	F1:for($i=0; $i< @seq; $i++){
	   $seq1=$seq[$i];
	   if($seq1=~/^(\S+)_(\d+)\-(\d+)/){
		  $seq_name=$1;
		  $start1=$2;
		  $end1=$3;
	   }
	   F:for($j=0; $j< @seq; $j++){
		  $seq2=$seq[$j];
		  if($seq1 eq $seq2){ next } ### Skip IDENTICAL ones (xxxx_1-10, xxxx_1-10)
		  if($seq2=~/^(\S+)_(\d+)\-(\d+)/){
			 $start2=$2;
			 $end2=$3;
		  }
		  if(($start1 > $end2)||($start2 > $end1)){ # skips non overlapping seqlets
			 next;
		  }
		  if(($start1 > $start2)&&($end1 < $end2)){  #   -----
			 $leng_seq1=$end1-$start1;               # ----------
			 $leng_seq2=$end2-$start2;
			 if(( ($leng_seq2/2) >= $leng_seq1 )&&
			    ($leng_seq1 > $min_inside_dom_size) ){   # if seq1 is less than 60% of seq2, it is a hidden domain
				push(@out, "$seq2\($seq1\)");
			 }
		  }elsif(($start1 < $start2)&&($end1 > $end2)){  # -----------
			 $leng_seq1=$end1-$start1;                   #   ------
			 $leng_seq2=$end2-$start2;
			 if(( ($leng_seq1/2) >= $leng_seq2)&&
			    ($leng_seq2 > $min_inside_dom_size) ){   # if seq1 is less than 60% of seq2, it is a hidden domain
				push(@out, "$seq1\($seq2\)");
			 }
		  }
	   }
	}
	return(\@out);
}



#______________________________________________________________
# Title     : scale_for_horizontal_histogram
# Usage     :
# Function  : used to make things like:
#
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub scale_for_horizontal_histogram{
	my @query=@{$_[0]};
	if(@query > 2400){
	$condense_factor=20;
	}elsif(@query > 2200){
	$condense_factor=18;
	}elsif(@query > 1900){
	$condense_factor=16;
	}elsif(@query > 1600){
	$condense_factor=15;
	}elsif(@query > 1400){
	$condense_factor=14;
	}elsif(@query > 1200){
	$condense_factor=12;
	}elsif(@query > 1000){
	$condense_factor=10;
	}elsif(@query > 800){
	$condense_factor=9;
	}elsif(@query > 630){
	$condense_factor=8;
	}elsif(@query > 440){
	$condense_factor=6;
	}elsif(@query> 220){
	$condense_factor=4;
	}elsif(@query > 120){
	$condense_factor=3;
	}else{
	$condense_factor=2;
	}
	return(\$condense_factor);
}



#______________________________________________________________
# Title     : get_added_matched_regions_in_mspa
# Usage     :
# Function  : This reads MSP file regions matched for a target seq
#             and adds things up to plot horizontally.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_added_matched_regions_in_mspa{
	 my @lines=@{$_[0]};
	 for($i=0; $i< @lines; $i++){
	  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  #                   $1                 $2     $3    $4      $5     $6    $7     $8
	  #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
	  #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
	  if($lines[$i]=~/^\s*(\d+)\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s*(.*)/){
		 if($4 eq $7){
			$query_name=$4;
			$query_leng=$3;
			for($j=0; $j<$query_leng; $j++){ $query[$j]=0; }
			next;
		 }else{
			if($match_name ne $7){ push(@matched_members, $7); }
			$query_start=$2;
			$query_end  =$3;
			$query_seq  =$4;
			$match_start=$5;
			$match_end  =$6;
			$desc       =$8;
			$match_name =$7;
			for($k= $query_start; $k<$query_end; $k++){
			   $query[$k]++;
			}
		 }
	  }
	 }
	 return(\@query);
}




#______________________________________________________________
# Title     : cluster_merged_seqlet_sets
# Usage     : @out=@{&cluster_merged_seqlet_sets(\@lines)};
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#  $short_region=  S by S -S  # taking shorter region overlapped in removing similar regions
#  $large_region=  L by L -L  # taking larger  region overlapped in removing similar regions
#  $average_region=A by A -A # taking average region overlapped in removing similar regions
#
# Version   : 1.8
#--------------------------------------------------------------
sub cluster_merged_seqlet_sets{
	 #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	 my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	 my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	 my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	 my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	 my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	 if($debug==1){print "\n\t\@hash=\"@hash\"
	 \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	 \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($optimize, @splited1, @splited2, $verbose, $link_or_not);
	 my @seq_names_in_clu=@{$array[0]};
	 $link_or_not=0;
	 my $factor=7; # 7 means 70% now

	 if($vars{'f'}=~/(\S+)$/){ $factor=$1 }
	 if($char_opt=~/o/){ $optimize=1 }
	 if($char_opt=~/S/){ $short_region='S'; }
	 if($char_opt=~/L/){ $large_region='L';   }
	 if($char_opt=~/A/){ $average_region='A'; }
	 if($char_opt=~/v/){ $verbose=1 }

	 if($verbose){ print "\n# (1) cluster_merged_seqlet_sets: Checking linkage and merging <<<<<>>>>>\n@seq_names_in_clu\n";   }

	 F1: for($i=0; $i< @seq_names_in_clu; $i++){
			@splited1=split(/\s+/, $seq_names_in_clu[$i]);
			for($j=0; $j< @seq_names_in_clu; $j++){
				if($seq_names_in_clu[$i] eq $seq_names_in_clu[$j]){ next  }
				@splited2=split(/\s+/, $seq_names_in_clu[$j]);

				$link_or_not=${&check_linkage_of_2_similar_seqlet_sets(\@splited1, \@splited2, "f=$factor")};
				print "\n +++++ \$link_or_not is  $link_or_not +++" if $verbose;
				if($link_or_not==1){
						 if($verbose){
								 print "\n# (2) cluster_merged_seqlet_sets: \n $seq_names_in_clu[$i]  \n and $seq_names_in_clu[$j] \n are linked \n";
						 }

						 if($optimize){ ##---- This will also remove similar seqlets, not only identical ones
								$seq_names_in_clu[$i]=join(' ', sort @{&remove_similar_seqlets( [@splited1, @splited2],
																						$short_region, $large_region, $average_region)} );
						 }else{
								$seq_names_in_clu[$i]=join(' ', sort @{&remove_dup_in_array( [@splited1, @splited2])} );
						 }
						 splice(@seq_names_in_clu, $j,1);
						 $j--; $i--;
						 next F1;
		 }
	  }
	 }
	 return(\@seq_names_in_clu);
}

#______________________________________________________________________________
# Title     : check_mailcap_file_in_user_home_dir
# Usage     :
# Function  : checks rasmol setting in .mailcap file
# Example   :
# Keywords  : check_rasmol_setting mailcap
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub check_mailcap_file_in_user_home_dir{
    my($mailcap_file_correctly_set, $mailcap_file);
    $mailcap_file="$ENV{\"HOME\"}\/.mailcap";
    unless(-s "$mailcap_file"){
       print "\n$0:  To view molecules with Rasmol, you have to set $mailcap_file file correctly\n";
       die;
    }else{
       open(MAIL_CAP_FILE, "$mailcap_file") || die "$0: can not open $ENV{HOME}\/.mailcap file\n";
       while(<MAIL_CAP_FILE>){
          if(/rasmol/){
              $mailcap_file_correctly_set=1;
          }
       }
       close(MAIL_CAP_FILE);
    }
    return(\$mailcap_file_correctly_set);
}




#______________________________________________________________________________
# Title     : check_homology_of_seq_pair
# Usage     :
# Function  :
# Example   : $homology_info=${&check_homology_of_seq_pair(\$pairs[$i], \%pdbg_hash_table)};
#
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub check_homology_of_seq_pair{
		my($i, %pdbg_hash_table,%input_hash, $seq_name1, $seq_name2,
			 $classification1, $classification2);
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Argument handling
		#___________________________________
		for($i=0; $i< @_; $i++){
			 if(ref($_[$i]) eq 'SCALAR' and ${$_[$i]}=~/^(\S+)\s+(\S+)$/){
					 ($seq_name1, $seq_name2)=($1,$2);
					 splice(@_, $i, 1); $i--;
			 }elsif(ref($_[$i]) eq 'ARRAY' and @{$_[$i]} eq 2){
					 ($seq_name1, $seq_name2)=@{$_[$i]}; splice(@_, $i, 1); $i--;
			 }elsif(ref($_[$i]) eq 'HASH'){
					 %input_hash=%{$_[$i]};
					 my @keys=keys %input_hash;
					 if($keys[0]=~/\S+/ and $input_hash{$keys[0]}=~/(\d+\.\d+\.\d+)/){
							 %pdbg_hash_table=%input_hash; splice(@_, $i, 1); $i--;
					 }
					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					 # When seq pair name is given in HASH=(seq1, seq2)
					 #___________________________________________________
					 elsif($keys[0]=~/^\S+$/ and $input_hash{$keys[0]}=~/^\S+$/){
							 ($seq_name1, $seq_name2)= ($keys[0], $input_hash{$keys[0]});
							 splice(@_, $i, 1); $i--;
					 }elsif($keys[0]=~/^(\S+)\s(\S+)$/){
							 ($seq_name1, $seq_name2)= ($1, $2); splice(@_, $i, 1); $i--;
					 }
			 }
		}

		if($pdbg_hash_table{$seq_name1}=~/^(\d+\.\d+\.\d+)/){
			 $classification1=$1;
		}else{
			 print "\n\t# (W) \'$pdbg_hash_table{$seq_name1}\' is NOT FOUND in \%pdbg_hash_table, One member group??";
		}
		if($pdbg_hash_table{$seq_name2}=~/^(\d+\.\d+\.\d+)/){
			 $classification2=$1;
		}else{
			 print "\n\t# (W) \"$pdbg_hash_table{$seq_name2}\" is NOT FOUND in \%pdbg_hash_table, One member group??";
		}
		if($classification1 and $classification1 eq $classification2){
			 print "\n\t# (i) $seq_name1 $classification1 == $seq_name2 $classification2";
			 return(\'Homolog');
		}else{
			 print "\n\t# (i) $seq_name1 $classification1 =X= $seq_name2 $classification2";
			 return(\'Nomolog');
		}
}



#______________________________________________________________
# Title     : check_linkage_of_2_similar_seqlet_sets
# Usage     :
# Function  : connects two clusters of seqlets if they share
#              identical or near identical seqlets
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#  $factor = by f=  # eg)  "f=$factor" in the higher level sub
#
# Returns   :
# Argument  :
# Version   : 2.0
#--------------------------------------------------------------
sub check_linkage_of_2_similar_seqlet_sets{
	 my ($seq1, $name1, $start1, $end1, $seq2, $leng1, $leng2,
	    $name2, $start2, $end2, $diff_start,  $diff_end, @splited1,
	    @splited2, $link_or_not, $factor, $s, $t, $final_factor);
	 @splited1=@{$_[0]};
	 @splited2=@{$_[1]};

	 $link_or_not=0;
	 $factor=7;  # this means 70% sequence region overlap of the intermediate is chosen

	 if($_[2]=~/f=(\S+)/i){	  $factor=$1;	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # Breaks the splited1 and splited2 strings to words to compare
	 #_________________________________________________________________
	 F1: for($s=0; $s<@splited1; $s++){
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# Checks if the input has :  XXXXX_10-400 format or simple name like XXXXXX
			#______________________________________________________________________________
			if($splited1[$s]=~/^ *((\S+)_(\d+)\-(\d+))/){ $seq1=$1;	$name1=$2; $start1=$3; $end1=$4;
			}else{   $seq1=$splited1[$s]; $name1=$start1=$end1='';    }
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# Breaks the splited2
			#_____________________________________________________________________
			F2: for($t=0; $t< @splited2; $t++){
				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 # If splited1 has XXXX_10-100 format(def($name1)), then compare regions
				 #_________________________________________________________________________
                 if($name1 and $splited2[$t]=~/^ *((\S+)_(\d+)\-(\d+))/){ $seq2=$1; $name2=$2; $start2=$3; $end2=$4;
					 if($seq1 eq $seq2){ $link_or_not=1; return(\$link_or_not) }
					 if($name1 ne $name2){
						 next F2;
					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                     # The most impoartant part is here. $final_factor=$smaller_leng - $smaller_leng*($factor/10);
					 #____________________________________________
                     }elsif($name1 eq $name2){
						 $leng1=$end1-$start1; $leng2=$end2-$start2;
						 if($leng1 >= $leng2){ $smaller_leng=$leng2; }else{ $smaller_leng=$leng1; }
						 $diff_start=abs($start1-$start2);
						 $diff_end  =abs($end1  -$end2  );
                         $final_factor=$smaller_leng - $smaller_leng*($factor/10);
                         $final_diff=($diff_start+$diff_end)/2;
                         if($final_diff <= $final_factor ){
                            $|=1;
                            print "\n$seq1 $seq2: $final_diff ($diff_start, $diff_end): $smaller_leng, $final_factor, ";
							$link_or_not=1; return(\$link_or_not);
    					 }else{  print "\n$seq1 $seq2: $final_diff ($diff_start, $diff_end): $smaller_leng, $final_factor 0\n"; }
					 }## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 }else{
				     $seq2=$splited2[$t];
					 if($seq1 eq $seq2){ $link_or_not=1; }
				 }
			}
	 }
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # If $link_or_not has become 1 in any of the above part, 1 is returned
	 #________________________________________________________________________
	 return(\$link_or_not);
}



#__________________________________________________________________________
# Title     : merge_arrays_by_common_elements
# Usage     :  @out=@{&merge_arrays_by_common_elements(\@ref_of_arrays)}
# Function  : merges arrays if there are common array elements.
#             if @A has (1,2,3) and @B has (2, 4, 5), they share 2, so
#             they are merged to be (1,2,3,4,5)
# Example   :
# Keywords  : cluster_arrays_by_common_elements, merge_arrays_if_common_elements
#             merge_array_if_common_elements, merge_arrays_when_common_elements_occur
#             merge_arrays
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub merge_arrays_by_common_elements{
	my ($i, @mother_array);

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Handling input array
	#______________________________________
	if( @_==1 and ref($_[0]) eq 'ARRAY'){  @mother_array=@{$_[0]};
	}elsif(@_ > 1){   @mother_array=@_;
	}else{
	   print "\n# The input for merge_arrays_by_common_elements needs one ref of array or multiple refs of array\n";
	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Main algo
	#______________________________________
	for($i=0; $i< @mother_array; $i++){
	   my @merged=(@{$mother_array[$i]}, @{$mother_array[$i+1]});
	   my ($common_or_not, %merged_hash, $j);

	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   #  Checks if there is a common element
	   #______________________________________
	   for($j=0; $j< @merged; $j++){
		  $merged_hash{$merged[$j]}++;
		  if($merged_hash{$merged[$j]} > 1){ $common_or_not=1 }
	   }
	   my @non_redundant=keys %merged_hash;

	   if($common_or_not==1){
		   $mother_array[$i]=\@non_redundant;
		   splice(@mother_array, ($i+1), 1);
		   $i--;
	   }
	}
	return(\@mother_array);
}


#________________________________________________________________________________
# Title     : check_parf_files
# Usage     : $number_of_parf=${&check_parf_files(@input)};
# Function  : checks if given file(s) is a parf file and returns the number of
#              identified parf file. If you check 2 files and both are parf, you
#               will get (\$num_of_parf_file) value of 2.
# Example   :
#    PARF file looks like this>
#   d1nsca_   d3nn9__   Homolog -664.92 2.43.1.1.3  2.43.1.1.2
#   d1dppa_   d2olba_   Homolog -617.41 3.68.1.1.6  3.68.1.1.1
#   d2ach.1a1 d9api.1a1 Homolog -556.38 5.2.1.1.3   5.2.1.1.4
# Keywords  :
# Options   :
# Author    :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------------------
sub check_parf_files{
		 my(@parf_file, $i, $j, @array, $counter, $num_of_parf_file);

		 for($i=0; $i< @_; $i++){
					if(ref($_[$i]) eq 'SCALAR' and -f ${$_[$i]}){
							 push(@parf_file, ${$_[$i]})
					}elsif(-f $_[$i]){
							 push(@parf_file, $_[$i])
					}elsif(ref($_[$i]) eq 'ARRAY'){
							 @array=@{$_[$i]};
							 for($j=0; $j< @array; $j++){
									 if(-f $array[$j]){
											 push(@parf_file, $array[$j]);
									 }
							 }
					}
		 }
		 print "\n# There were: @parf_file\n" if $verbose;
		 for($i=0; $i< @parf_file; $i++){ ## usually @parf_file has only one element!
					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					# Check if it is parf file inside the file
					#__________________________________________________________
					open(INPUT_FILE, "<$parf_file[$i]");
					while(<INPUT_FILE>){
							 $counter++;
							 if(/^\s*\S+\s+\S+\s+[HN]omolog\s+\S+\s+\S+\s+\S+/){
									 $num_of_parf_file++;
									 print "\n# $parf_file[$i] is a PARF file\n" if $verbose;
									 last;
							 }else{
									 if($counter > 100){  ## giving up, it is not PARF file!
											 print "\n# $0 needs to have PARF files, others are ignored" if $verbose;
											 last;
									 }else{
											 next;
									 }
							 }
					}
					close(INPUT_FILE);
		 }
		 return(\$num_of_parf_file);
}



#__________________________________________________________________________
# Title     : check_common_elements_in_array
# Usage     : &check_common_elements_in_array($mother_array[$i], $mother_array[$i+1]));
# Function  : accepts 1 or 2 refs of arrays and checks if there is any
#             common(repeating) elements between the two (or inside one)
#             The result is either ref of 1, or 0
# Example   :
# Keywords  : is_there_common_element, if_common_elements
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub check_common_elements_in_array{
	my(@merged_array, %common_elemnts_count, $m);
	if(@_ > 1){
	   @merged_array=(@{$_[0]}, @{$_[1]});
	}else{
	   @merged_array=(@{$_[0]});
	}
	for $m (@merged_array){
	   $common_elemnts_count{$m}++;
	   if($common_elemnts_count{$m} > 1){
		  print "\n# $common_elemnts_count{$m}     $m common!\n";
		  return(\1);
	   }
	}
	return(\0);
}

#______________________________________________________________________________
# Title     : list_small_files
# Usage     : @files_listed=@{&list_small_files(@ARGV)};
# Function  :
# Example   :
# Keywords  : show_small_files
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub list_small_files{
    my($file_size_cut_line, $creation_time, $size, @files_listed, $i, @files);

    $file_size_cut_line=${$_[0]} || $_[0];
    $file_size_cut_line_bigger_than=$ARGV[1];

	 @files=@{&read_file_names_only('.')};

	 for($i=0; $i< @files; $i++){
        $size= -s $files[$i];
        $creation_time= localtime( (stat($files[$i]))[9] );
		if($size <= $file_size_cut_line and $size >= $file_size_cut_line_bigger_than){
			push(@files_listed, $files[$i]);
			print "\n# (i) $files[$i] is smaller than  $file_size_cut_line, size= $size byte, $creation_time";
		}
	 }
	 return(\@files_listed);
}


#______________________________________________________________________________
# Title     : link_prot_superfam_class_to_SCOP_web_site
# Usage     :
# Function  :
# Example   :
# Keywords  : attach_scop_URL_to_superfamily_classification
#             link_protein_superfam_class_to_SCOP_web_site
#             link_SCOP, link_scop, attach_URL_, attach_url
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub link_prot_superfam_class_to_SCOP_web_site{
    my($line, @classi, $i, $class, $fold, $superfam, $class_URL, $first_num,
       $sec_num, $third_num, $final_line, @line_frags, @classi_URL);
    $line=${$_[0]} || $_[0];
    if($line=~/\/scop.\d+.\d+.\d+.\d+\.\d+\.\d+\.html\"\>\d+\.\d+\.\d+/){
        return(\$line)
    }

    if(@classi=$line=~/(\d+\.\d+\.\d+)/g){
        for($i=0; $i<@classi; $i++){
            if($classi[$i]=~/(\d+)\.(\d+)\.(\d+)/){
              ($class, $fold, $superfam)=($1, $2, $3);
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               # To create: http://scop.mrc-lmb.cam.ac.uk:80/scop/data/scop.1.000.000.000.000.html
               #______________________________________________________________________________________
               if(length($class) ==1){      $first_num="00$class";
               }elsif(length($class) ==2){  $first_num="0$class";
               }else{                       $first_num="$class"; }
               if(length($fold) ==1){       $sec_num="00$fold";
               }elsif(length($fold) ==2){   $sec_num="0$fold";
               }else{                       $sec_num="$fold"; }
               if(length($superfam) ==1){      $third_num="00$superfam";
               }elsif(length($superfam) ==2){  $third_num="0$superfam";
               }else{                          $third_num="$superfam"; }
               $class_URL="$first_num\.$sec_num\.$third_num";
               push(@classi_URL, $class_URL);
            }else{
               die "\nlink_prot_superfam_class_to_SCOP_web_site: Something is wrong \n";
            }
        }
    }else{
        return(\$line);
    }
    @line_frags=split(/\d+\.\d+\.\d+/, $line);
    for($i=0; $i<@line_frags; $i++){
        $final_line .="$line_frags[$i]<A href=\"http://scop.mrc-lmb.cam.ac.uk/scop/data/scop.1.$classi_URL[$i].000.000.html\"\>$classi[$i]<\/A>";
    }
    return(\$final_line);
}

#__________________________________________________________________________
# Title     : link_ranges
# Usage     : @all_ranges = @{&link_ranges(@all_ranges)};
# Function  : merges ranges(10-20, 11-21 etc) when there is any overlap
#              is present
#             If you put a reverse range like '2000-20', it will
#              complain and reverse the order and do the job after correction.
#
# Example   : INPUT:
#
#   @input=( '1-30 1-40 1-50',
#            '2-49 4-40 2-99'....)
#
# Keywords  : connect_ranges, link_overlapping_ranges, connect_overlapping_ranges
# Options   : _  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------
sub link_ranges{
	 my (@all_ranges, $new_start, $new_end, @output, $i, $seq1, $start1,
	   $end1, $seq2,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2);
	 my $leng_thresh=30;
	 my $optimize=0;
	 for($i=0; $i< @_; $i++){
	  if(ref($_[$i]) eq 'ARRAY'){
		  @all_ranges=@{$_[$i]};
			}
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  Sorting the ranges by the starting range number.(essential)
	 #______________________________________________________________
	 @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			   map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  iterating merger
	 #_________________________________________________________
	 for($i=0; $i< @all_ranges; $i++){
	   if($all_ranges[$i] =~/(\d+)\-(\d+)/){
		   ($start1, $end1)=($1, $2);
		   if($start1 > $end1){
			  print "\n# link_ranges: Error, \$start1 :$start1 is larger than \$end1: $end1\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }
	   }
	   if($all_ranges[$i+1] =~/(\d+)\-(\d+)/){
		   ($start2, $end2)=($1, $2);
		   if($start2 > $end2){
			  print "\n# link_ranges: Error, \$start2 :$start2 is larger than \$end2: $end2\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i+1]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }

		   if($start1 <= $start2      and $end1 >= $end2){    ## -----------------
			   $new_start=$start1;                             #      --------
			   $new_end =$end1;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if( $start2 <= $start1     and $end2 >= $end1){      ##    -------
			   $new_start=$start2;                              #  ----------------
			   $new_end =$end2;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if($start1 <= $start2      and $start2 <= $end1){     #  -----------
			   $new_start=$start1;                               #     -----------
			   $new_end =$end2;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if($start2 <= $start1      and $start1 <= $end2){    #       -----------
			   $new_start=$start2;                              #  ---------
			   $new_end =$end1;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
	   }
	 }
	 return(\@all_ranges);
}



#__________________________________________________________________________
# Title     : merge_similar_ranges
# Usage     : @all_ranges = @{&merge_similar_seqlets(@all_ranges)};
# Function  : merges ranges(10-20, 11-21 etc) when there is any overlap
#              is present (resulting in average start and end at each level)
#             If you put a reverse range like '2000-20', it will
#              complain and reverse the order and do the job after correction.
#
# Example   : INPUT:
#
#   @input=( '1-30 1-40 1-50',
#            '2-49 4-40 2-99'....)
#
# Keywords  : merge_similar_regions, merge_ranges, merge_regions,
#              merge_sequence_ranges, merge_overlap_ranges, connect_ranges
#              connect_overlapping_ranges, connect_similar_ranges,
#              remove_similar_ranges
# Options   : f=   for setting factor (0.7 for 70% overlap minimum)
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#--------------------------------------------------------------------------
sub merge_similar_ranges{
	 my (@all_ranges, $new_start, $new_end, @output, $i, $seq1, $start1,
	   $end1, $seq2, $average_leng,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2);
	 my $factor=0.9;     #  0.8 means 80% overlap
	 my $leng_thresh=30;
	 my $optimize=0;
	 for($i=0; $i< @_; $i++){
	  if(ref($_[$i]) eq 'ARRAY'){
		  @all_ranges=@{$_[$i]};
	  }elsif($_[$i]=~/f=(\S+)/){
					$factor=$factor_ori=$1;
	      #print "\n# merge_similar_ranges: Factor used will be $factor\n\n";
	  }elsif($_[$i]=~/z/i){
	      $optimize=1 }
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  Sorting the ranges by the starting range number.(essential)
	 #______________________________________________________________
	 @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			   map { $_=~/(\d+)\-\d+\s*$/ and [$_, $1] } @all_ranges;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  iterating merger
	 #_________________________________________________________
	 for($i=0; $i< @all_ranges; $i++){
			 $factor=$factor_ori;

	   if($all_ranges[$i] =~/(\d+)\-(\d+)\s*$/){  ## ranges are at the end
		   ($start1, $end1)=($1, $2);
		   if($start1 > $end1){
			  print "\n# merge_similar_ranges: Error, \$start1 :$start1 is larger than \$end1: $end1\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-\d+\s*$/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }
	   }
			 if($all_ranges[$i+1] =~/(\d+)\-(\d+)\s*$/){ ## ranges are at the end
		   ($start2, $end2)=($1, $2);
		   if($start2 > $end2){
			  print "\n# merge_similar_ranges: Error, \$start2 :$start2 is larger than \$end2: $end2\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i+1]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-\d+\s*$/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }

					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					 $average_leng=(($end2+$end1)/2)-(($start1+$start2)/2);   ## this is protein seq ranges
					 $factor-=$average_leng/5000; ## adjusting $factor according to the size of range.
					 #_____________________________________________________________

					 if($start1 <= $start2      and $end1 >= $end2){        ## -----------------

							 if(($end1-$start1)*$factor  <= ($end2-$start2) ){    #      --------
									$new_start=(($start1+$start2)/2);
			      $new_end  =(($end1+$end2)/2);
			      splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			      $i--;			      next;
			   }
		   }
		   if( $start2 <= $start1     and $end2 >= $end1){       ##    -------
							 if(($end2-$start2)*$factor  <= ($end1-$start1) ){  #  ----------------
									 $new_start=(($start2+$start1)/2);
									 $new_end  =(($end1+$end2)/2);
									 splice(@all_ranges, $i, 2, "$new_start\-$new_end");
									 $i--;              next;
							 }
		   }
		   if($start1 <= $start2      and $start2 <= $end1){     #  -----------
							 if(($end1-$start2) >=  ($end2-$start1)*$factor ){ #      -----------
									$new_start=(($start2+$start1)/2);
									$new_end  =(($end1+$end2)/2);
									splice(@all_ranges, $i, 2, "$new_start\-$new_end");
									$i--;               next;
							 }
		   }
		   if($start2 <= $start1      and $start1 <= $end2){     #       -----------
							 if(($end2-$start1) >=  ($end1-$start2)*$factor ){ #    -----------
									 $new_start=(($start2+$start1)/2);
									 $new_end  =(($end1+$end2)/2);
									 splice(@all_ranges, $i, 2, "$new_start\-$new_end");
									 $i--;              next;
							 }
		   }
	   }
	 }
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  making everything integer at the last minute to save CPU
	 #___________________________________________________________
	 @all_ranges = map{ /(\S+)\-(\S+)\s*$/ and int($1).'-'.int($2)  } @all_ranges;
	 return(\@all_ranges);
}



#_________________________________________________________________________
# Title     : merge_similar_seqlets
# Usage     : @all_seqlets = @{&merge_similar_seqlets(@all_seqlets)};
# Function  : merges seqlet sets which have identical
#             sequences and share similar regions by connection factor of 30%
#             This means, if any two seqlets from the same sequences which
#             share more than 70% seqlet regions overlapping are merged
#             This only sees the very first sequence in the seqlets line!!!
#             (so, PARTIAL MERGE !!)
# Example   : INPUT:
#
#   @input=( 'seq1_1-30 seq2_1-40 seq3_1-50',
#            'seq1_2-49 seq3_4-40 seq4_2-99'....)
#
#   @output=('seq1_1-30 seq2_1-45 seq3_2-45 seq4_2-99');
#
# Keywords  : merge_similar_sequences, merge_sequence_names, merge_sequences,
#              merge_sequence_ranges, merge_similar_sequences_with_ranges,
#              merge_seqlets, merge_duplication_modules
# Options   :
#
#   f=<digit>   for determing the factor in filtering out non-homologous
#                  regions, 7 = 70% now!!
#   l=<digit>   for seqlet(duplication module) length threshold
#   z           for activating remove_similar_sequences, rather than remove_dup....
#   S  $short_region=  S by S -S  # taking shorter region overlap in removing similar reg
#   L  $large_region=  L by L -L  # taking larger  region overlap in removing similar reg
#   A  $average_region=A by A -A  # taking average region overlap in removing similar reg
#
# Version   : 2.2
#-------------------------------------------------------------------------------
sub merge_similar_seqlets{
	 my (@all_seqlets, @result_all_seqlets, $i, $j, $k, $seq1, $start1, $end1, $seq2,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2, $factor, $leng_thresh, $optimize,
			 $short_region, $large_region, $average_region, $overlapping_seq_match_size);
	 $factor=7;     #  30% sequence mismatch region is allowed(3)
	 $leng_thresh=30;
	 $optimize=1;
	 $average_region='A'; # default

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	 # Sorting (parsing) input to get options and input array
	 #_________________________________________________________
	 for($i=0; $i< @_; $i++){
	     if(ref($_[$i]) eq 'ARRAY'){
             @all_seqlets=@{$_[$i]}; #<------------ @all_seqlets is a very very big array with all the mspa chunks altogether
			 }elsif($_[$i]=~/f=(\S+)/){  $factor=$1;
			 }elsif($_[$i]=~/z/i){       $optimize=1;
			 }elsif($_[$i]=~/l=(\d+)/i){ $leng_thresh=$1;
			 }elsif($_[$i]=~/^S/){       $short_region='S';   $large_region=$average_region='';
			 }elsif($_[$i]=~/^L/){       $large_region='L';   $short_region=$average_region='';
			 }elsif($_[$i]=~/^A/){       $average_region='A'; $short_region=$large_region  =''; }
	 }
	 if(@all_seqlets==1){
         return(\@all_seqlets);
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # This is to remove which are identical in @all_seqlets;
	 #_________________________________________________________
	 F1: for($i=0; $i< @all_seqlets; $i++){
		my $merged_two_seqlet_lines;

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
        # The following is correct. Don't touch again
        #__________________________________________________
        if($all_seqlets[$i] eq $all_seqlets[$i+1]){
			splice(@all_seqlets, $i+1, 1);
			$i-- if $i >0;    next F1;
	    }else{
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # @split1 and 2 are arrays from different string entry in @all_seqlets
            #______________________________________________________________________
            @split1=sort split(/\s+/, $all_seqlets[$i]);
            @split2=sort split(/\s+/, $all_seqlets[$i+1]);
		}

	    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	    #   (3) If the first elements of @split1 and 2 are identical, lets merge the two arrays. For example,
	    #    aa_EC1427_1-390 aa_EC388_1-374 ap_EC143_23-399 dr_6457710_11-405 ec_1787201_9-360 mj_MJ1649_5-387 mj_MJ1653_4-383 pa_5459109_1-394 ph_PH1915_1-394 tm_4982274_20-385
        #    aa_EC1427_1-390 aa_EC388_1-372 ap_EC143_40-399 dr_6457710_11-407 dr_6459463_3-373 ec_1787201_4-367 mj_MJ1649_5-385 mj_MJ1653_39-382 pa_5459109_21-392 ph_PH1915_21-392 tm_4982274_12-382
	    #__________________________________________________________________________________________________
		if($split1[0] eq $split2[0] or $split1[0] eq $split2[1] or $split1[1] eq $split2[0]){
              @split=(@split1, @split2);
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # This step is proven to be fine. optimize option removes similar seqlets
              #___________________________________________________
              if(1){
                 $all_seqlets[$i]= join(' ', sort @{&remove_similar_seqlets(\@split,
		                              $short_region, $large_region, $average_region)} );
		      }else{
				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 # Only removes exactly identical ones
				 #__________________________________________________________
				 $all_seqlets[$i]=  join(' ', @{&remove_dup_in_array(\@split, 's')} );
		      }
		      splice(@all_seqlets, $i+1, 1);     $i-- if $i >0;     next F1;
	    }

	    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (4) If the first elements of @split1 and 2 are NOT identical, lets check the sequence ranges
	    #_____________________________________________________________________________________________
		F2: for($j=0; $j < @split1; $j++){
			if($split1[$j] =~/^\s*(\S+)_(\d+)\-(\d+)/){
				 my ($seq1, $start1, $end1)=($1, $2, $3);

				 F3: for($k=0; $k<@split2; $k++){
					 if($split2[$k] =~/(\S+)_(\d+)\-(\d+)/){
						 my($seq2, $start2, $end2)=($1, $2, $3);

						 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
						 # Check if the seqs are identicl (from the two arrays), no point to merge which are not identical from the first
						 #__________________________________________________________________________________________
						 if($seq1 eq $seq2){
                             $diff_start=abs($start1-$start2); $diff_end  =abs($end1  -$end2  );
                             $leng1=$end1-$start1; $leng2=$end2-$start2;
                             if($leng1 >= $leng2){  $smaller_leng=$leng2; $larger_leng =$leng1
                             }else{  $smaller_leng=$leng1;  $larger_leng =$leng2       }

                             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                             # Checking the minimal seq region leng here
                             #______________________________________________________
                             if($smaller_leng < $leng_thresh){ next }

                             $overlapping_seq_match_size=${&get_overlapping_seq_match_size($start1, $end1, $start2, $end2)};
                             $averge_seq_leng_of_2_seqs=($leng1+$leng2)/2;

                             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                             # This is the critically important part
                             #_______________________________________________________________
                             if($average_region){      $finally_adjusted_seq_leng=$averge_seq_leng_of_2_seqs*($factor/10);
                             }elsif($short_region){    $finally_adjusted_seq_leng=$smaller_leng*($factor/10);
                             }elsif($large_region){    $finally_adjusted_seq_leng=$larger_leng*($factor/10);     }

                             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                             # Now let's check if we regard them homologous or not\
                             #_______________________________________________________
                             if( $overlapping_seq_match_size >=  $finally_adjusted_seq_leng){
                                 @split= (@split1, @split2);
                                 if($optimize){ #~~~~~ $optimize option removes similar seqlets
                                     $all_seqlets[$i]= join(' ', sort @{&remove_similar_seqlets(\@split,
                                                         $short_region, $large_region, $average_region)} );
                                 }else{
                                       $all_seqlets[$i]= join(' ', @{&remove_dup_in_array(\@split, 's')} );
                                 }
                                 $merged_two_seqlet_lines=1;
                                 splice(@all_seqlets, $i+1, 1);
                                 $i-- if $i >0;  next F1;
                             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                             # We believe they are not homologous
                             #____________________________________________
                             }else{  next F3;  }
                          }
                       }
                       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                       # If there is no range (region) in seq naem, let's skip, as there is no way to check
                       #__________________________________________________________________________________
                       else{ # when split2 does not match xxx_10-20 format
                               next;
                       }
                  }
            }else{  next; } # when split1 does not match xxx_10-20 format
        }
        unless($merged_two_seqlet_lines){   }
	 }
	 return(\@all_seqlets);
}






#______________________________________________________________
# Title     : sort_by_digits_in_string
# Usage     :
# Function  : sorts arrays of strings like
#
#   MJ0228_314-573 MJ1197_348-601
#   MJ0228_451-576 sll0078_502-594 sll1425_489-611
#   MJ0228_479-572 sll0078_502-594
#
#   According to the digits after seq names _314-, _451-, _479-
#    in the above
#   This only looks at the very first sequence in the string
#
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.4
#--------------------------------------------------------------
sub sort_by_digits_in_string{
	 my (@out, $i,  @temp1, @temp2, $old, @T);
	 my @array_of_string=sort @{$_[0]};

	 for($i=0; $i<= @array_of_string; $i++){
	  if($array_of_string[$i]=~/^((\S+)_(\d+)\-(\d+)\s*.*)$/){
		 unless(defined($old)){
			$old=$2;
			push(@temp1, $1);
			push(@temp2, $3);
		    next;
		 }elsif($2 eq $old){
			push(@temp1, $1);
			push(@temp2, $3);
			next;
		 }elsif( ($2 ne $old)||($i==$#array_of_string) ){
		    &sort_and_put_strings_to_out;
		    push(@temp1, $1);
		    push(@temp2, $3);
			$old  =$2;

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			sub sort_and_put_strings_to_out{
			   my ($j, $k, $num);
			   @temp2=sort { $a<=>$b } @temp2; ## sort numerically
			   F1: for($j=0; $j< @temp2; $j++){
				  $num=$temp2[$j];
				  for($k=0; $k< @temp1; $k++){
					 if($temp1[$k]=~/^(\S+)_$num\-/){
						push(@out, $temp1[$k]);
						splice(@temp1, $k, 1);
						$k--;
						splice(@temp2, $j, 1);
						$j--;
						next F1;
					 }
				  }
			   }
			   @temp1=@temp2=();

			}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	     }
	  }elsif($i > 0){ ## for the very last sort
		  &sort_and_put_strings_to_out;
	  }
	 }
	 return(\@out);
}



#______________________________________________________________
# Title     : sort_words_in_string
# Usage     :
# Function  : sort words in strings sperated by ' ' or "\n"
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : sort_words_in_sequences, sort_sequences_in_string,
#             sort_strings_in_string, sort_string_by_words, sort_elements_in_string
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub sort_words_in_string{
		my @in=@{$_[0]} || @_;
		my @OUT;
		for (@_){
		 push(@OUT, join(' ', sort split(/\s+|\n/) ));
		}
		return(\@OUT);
}



#__________________________________________________________________________
# Title     : convert_hmmls_to_mspa_files
# Usage     : @out=@{&convert_hmmls_to_mspa_files(\@file)};
# Function  :
# Example   :
# Keywords  : convert_hmmls_to_mspa
# Options   :
#   S=$single_out_file_name   for producing single mspa file with all the hmmls contents
#   E=Enguiry_name    for specifying enquiry seq name rather than 'HMM', the default
#   $bit_score_threshold= by t=
# Returns   :
# Argument  :
# Version   : 1.4
#----------------------------------------------------------------------------
sub convert_hmmls_to_mspa_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@all_the_files_written, $written_file, $base,
						$bit_score_threshold, $out_mspa_file_name, $input_hmmls_file,
	    %out);
	$bit_score_threshold=10;

				if($vars{'S'}=~/\S/){ $single_out_file_name=$vars{'S'} };
				if($vars{'t'}=~/\S/){ $bit_score_threshold =$vars{'t'} };
	if($vars{'E'}=~/\S/){ $enquiry_name        =$vars{'E'}   # default $enquiry_name is input file base
	}else{ $enquiry_name='HMM' }

	for($i=0; $i< @file; $i++){

							if($vars{'E'}=~/\S/){ $base=$enquiry_name;   # When $enquiry_name is given, it uses for output name
							}else{
									$base=${&get_base_names($file[$i])};
							}
							$out_mspa_file_name="$base\.mspa";
							$input_hmmls_file=$file[$i];
							if($vars{'S'}=~/\S/){
											%out=(%out, %{&open_hmmls_files($input_hmmls_file,
																			 'm', "E=$enquiry_name",
																			 "t=$bit_score_threshold"
																			 )} );
							}else{
											%out=%{&open_hmmls_files($input_hmmls_file,
														 "t=$bit_score_threshold", 'm')};  # m for mspa out
											$written_file=${&write_mspa_files(\%out, $out_mspa_file_name)};
											push(@all_the_files_written, $written_file);
							}
	}

	if($vars{'S'}=~/\S/){
		$written_file=${&write_mspa_files(\%out, $single_out_file_name)};
		push(@all_the_files_written, $written_file);
	}
	if(@all_the_files_written > 1){
		return(\@all_the_files_written);
	}else{
		return(\$all_the_files_written[0]);
	}
}


#______________________________________________________________
# Title     : convert_mmp_to_mrg
# Usage     :
# Function  :
# Example   :
#  Example OUT as string
#
#   slr1950 sll1920 sll0672 sll1076 sll1614 slr0797 slr0798 slr0822 slr1729
#   slr1729 sll1076 sll0672 sll1614 sll1920 slr0797 slr0798 slr0822 slr1950
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub convert_mmp_to_mrg{
	 my @mmp=@{$_[0]};
	 my($i, $long, $out, @out, $leading_seq);
	 for($i=0; $i< @mmp; $i++){
	 if(($mmp[$i]=~/^\s*\d+\s+\d+\.?[e\-\d]*\s+\d+\s+\d+\s+(\S+)\s+\d+\s+\d+\s+(\S+)\s*$/)&&($1 eq $2)){
		next;
	 }elsif($mmp[$i]=~/^\s*\d+\s+\d+\.?[e\-\d]*\s+\d+\s+\d+\s+(\S+)\s+\d+\s+\d+\s+(\S+)\s*$/){
		$leading_seq=$1;
		$long=$2;
		$long=~s/\,/ /g;
		$out="$leading_seq $long";
		push(@out, $out);
	 }
	 }
	 return(\@out);
}


#______________________________________________________________________________
# Title     : add_revcomp_sequences
# Usage     : %out=%{&add_revcomp_sequences(\@input_string_or_seq)};
#              or %out=%{&add_revcomp_sequences(\%input_string_or_seq)};
# Function  :
# Example   :
# Keywords  : add_revcomp_string, add_revcomp_string_array, insert_revcomp_string_array
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub add_revcomp_sequences{
		my(@revcomp_patterns, @patterns, $pattern, $i, %patterns, @names, $arbit_name);
		if(ref($_[0]) eq 'ARRAY'){
				@patterns=@{$_[0]};
				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# Making a hash out of array
				#_________________________________________________
		    for($i=0; $i< @patterns; $i++){
					 $arbit_name="PAT\_$i";
					 $patterns{$arbit_name}=$patterns[$i];
		    }
		}elsif(ref($_[0]) eq 'HASH'){ %patterns=%{$_[0]};
		}else{  print "\n# (Error) add_revcomp_sequences get either HASH or ARRAY only \n"; die }
		@names=sort keys %patterns;
		for($i=0; $i< @names; $i++){
			 $pattern =$patterns{$names[$i]};
		   $pattern =~ tr/\[\]acgtrymkswhbvdnACGTRYMKSWHBVDN/\]\[tgcayrkmswdvbhnTGCAYRKMSWDVBHN/;
			 $pattern = reverse $pattern; # reversing it to add complimentary to the original
			 $patterns{"$names[$i]\_rc"}=$pattern;
		}
		return(\%patterns);
}



#_______________________________________________________________________________
# Title     : add_ranges_in_mspa_line
# Usage     :
# Function  : this adds ranges to the seqnames of mspa files
#             mmp line is mspa line with additional sequences at the end
# Example   :
# Keywords  : convert_mspa_to_mmp, convert_mspa, convert_mspa_2_mmp
#             change_mspa_to_mmp, add_range_in_mspa, convert_mspa_line_to_mmp_line
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#-------------------------------------------------------------------------------
sub add_ranges_in_mspa_line{
	 my $input_mspa=${$_[0]} || $_[0];
	 my($score, $evalue, $long_1, $new_seq1, $new_seq2, $middle,
	  $start1, $end1, $start2, $end2, $seq1, $seq2, $new);

	 if($input_mspa=~/^\s*(\d+)\s+(\S+)\s*\S*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
	  ($score, $evalue, $start1, $end1, $start2, $end2)=($1, $2, $3, $4, $6, $7);
	  ($seq1, $seq2)=($5, $8);
	  if($seq1=~/(\S+)\_\d+\-\d+/){
		 $new_seq1="$1\_$start1\-$end1";
	  }else{
		 $new_seq1="$seq1\_$start1\-$end1";
	  }
	  if($seq2=~/(\S+)\_\d+\-\d+/){
		 $new_seq2="$1\_$start2\-$end2";
	  }else{
		 $new_seq2="$seq2\_$start2\-$end2";
	  }
	  $new=sprintf("%-6s %-9s %-5s %-5s %-32s %-5s %-5s %-32s",
			$score, $evalue, $start1, $end1, $new_seq1, $start2, $end2, $new_seq2);
	 }
	 return(\$new);
}



#______________________________________________________________________________
# Title     : align_herta_inputs_with_adjustments
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.6
#------------------------------------------------------------------------------
sub align_herta_inputs_with_adjustments{
    my($i, %align_position_seq1_seq2, %MRC_search_alignment,
       @scores, @align_info, $herta_align_score, $min_score,
       @align_info, @seq1_aligned, @seq2_aligned, @residues1, @residues2,
       @positions1, @positions2, $align_info, $REF_seq1_start_final, $query_seq, $DB_seq,
       $REF_seq2_start_final, $REF_scores_gap, @seq1_start_final, @seq2_start_final, @scores_gap,
       $seq1_len, $seq2_len, @herta_align_score, $last_residue_posi_1, $last_residue_posi_2,
       @residues1_suffix, @residues2_suffix, @complete_seq1_aligned, @complete_seq2_aligned,
       $NO_screen_print, $Mutual_Seq_Id_aligned, $Mutual_Seq_Id, $non_idential,
       $ALIGNMENT_LENG, $query_seq_align_range, $DB_seq_align_range);
    @residues1       =@{$_[0]};
    @residues2       =@{$_[1]};
    @positions1      =@{$_[2]};
    @positions2      =@{$_[3]};
    %align_position_seq1_seq2=%{$_[4]};
    $query_seq       =${$_[5]} || $_[5];
    $DB_seq          =${$_[6]} || $_[6];
    $NO_screen_print =${$_[7]} || $_[7];

    $seq1_len=@residues1;
    $seq2_len=@residues2;

    $query_seq_align_range="$positions1[0]-$positions1[$#positions1]";
    $DB_seq_align_range   ="$positions2[0]-$positions2[$#positions2]";

    $query_seq="$query_seq\_$query_seq_align_range";
    $DB_seq="$DB_seq\_$DB_seq_align_range";

    ($REF_seq1_start_final, $REF_seq2_start_final, $REF_scores_gap)
               =&get_prefix_arrays_for_seqlet_alignment(\@residues1, \@residues2,
                                                        \@positions1, \@positions2);

    @seq1_start_final=@{$REF_seq1_start_final};
    @seq2_start_final=@{$REF_seq2_start_final};
    @scores_gap      =@{$REF_scores_gap};

    # ALIGNMENT 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, -8, 0, 0, 0, 0, 0, 0, 10, 0,
    @align_info      =@{$align_position_seq1_seq2{'ALIGNMENT'}};
    #print "\n", @align_info, "\n";
    $herta_align_score=$align_position_seq1_seq2{'RAW_ALIGN_SCORE'};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # min_score from herta (around -5 to +7 range). -3 is reasonable
    #________________________________________________
    $min_score=-3.0;  # arbitrary

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # This is affected when the first residue was indel (such as -5, or 5)
    #________________________________________________________________________
    if(abs($align_info[0])){  #<-- Yet another adjustment
        $align_info[0]= ($align_info[0] + @seq1_start_final);
        for($i=0; $i< @seq1_start_final; $i++){
           push(@scores, -3);
        }
    }
    #print "\n@positions2\n";
    #print "\n@seq1_start_final\n@seq2_start_final\n@align_info\n";
    for($i=0; $i < @align_info; $i++){
       $align_info=$align_info[$i];
       if($align_info < 0){
           $align_info=abs($align_info);
           $last_residue_posi_1=$i+$align_info-1;
           push(@seq2_aligned, split(//, "."x$align_info));
           push(@seq1_aligned, @residues1[$i .. $last_residue_posi_1]);
           for(1..$align_info){  push(@scores, $min_score);  }
       }elsif($align_info > 0){
           $last_residue_posi_2=$i+$align_info-1;
           push(@seq1_aligned, split(//, "."x$align_info));
           push(@seq2_aligned, @residues2[$i .. $last_residue_posi_2]);
           for(1..$align_info){  push(@scores, $min_score); }
       }elsif($align_info == 0){
           $posi1=shift(@positions1);
           $posi2=shift(@positions2);
           #print "\n$posi1";
           $score=$align_position_seq1_seq2{$seq_name2}{$posi2};
           push(@scores, $score);
           if($score < $min_score){ $min_score=$score }
           push(@seq1_aligned, $residues1[$posi1-1]);
           push(@seq2_aligned, $residues2[$posi2-1]);
           $last_residue_posi_1=$posi1-1;
           $last_residue_posi_2=$posi2-1;
           #print "\t $residues1[$posi1-1] $last_residue_posi_1 $last_residue_posi_2";
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Get mutual seq. identity of aligned region
           #_________________________________________________________
           if($residues1[$last_residue_posi_1] eq $residues2[$last_residue_posi_2]){
               $Mutual_Seq_Id++;
           }else{
               $non_idential++;
           }

       }
    }
    $ALIGNMENT_LENG       =$Mutual_Seq_Id + $non_idential;
    if($ALIGNMENT_LENG < 1){ $Mutual_Seq_Id_aligned=0.000;
    }else{ $Mutual_Seq_Id_aligned=$Mutual_Seq_Id/$ALIGNMENT_LENG; }

    $Mutual_Seq_Id_aligned  = sprintf("%-.3f", $Mutual_Seq_Id_aligned);
    unshift(@seq1_aligned, @seq1_start_final) if @seq1_start_final;
    unshift(@seq2_aligned, @seq2_start_final) if @seq2_start_final;
    @scores=(@scores_gap, @{&normalize_numbers_in_array(\@scores, '-3 5')});
    #print "SEQ1\n", @seq1_aligned, "\nSEQ2\n", @seq2_aligned, "\nSCO\n", @scores, "\n\n";
    $herta_align_score  = sprintf("%-.4f", $herta_align_score);
    @herta_align_score=($herta_align_score);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # handlign residues suffices
    #_________________________________________________________________
    $last_residue_posi_1++;  ## need to increment by 1 as I want SUFFIX, not the last residue
    $last_residue_posi_2++;
    @residues1_suffix=@residues1[$last_residue_posi_1 .. $#residues1];
    @residues2_suffix=@residues2[$last_residue_posi_2 .. $#residues2];

    push(@scores, split(//, "."x(@residues1_suffix + @residues2_suffix)) );
    if(@residues1_suffix > @residues2_suffix){
         $residues1_arr_size=@residues1_suffix;
         unshift(@residues1_suffix, split(//, "."x@residues2_suffix));
         push(@residues2_suffix, split(//, "."x$residues1_arr_size));
    }else{
         $residues2_arr_size=@residues2_suffix;
         unshift(@residues2_suffix, split(//, "."x@residues1_suffix));
         push(@residues1_suffix, split(//, "."x$residues2_arr_size));
    }
    $suffix_size_diff=abs(@residues1_suffix-@residues2_suffix);

    @complete_seq1_aligned=(@seq1_aligned, @residues1_suffix);
    @complete_seq2_aligned=(@seq2_aligned, @residues2_suffix);
    if($query_seq eq $DB_seq){ $DB_seq='SELF'; }
    %MRC_search_alignment=("$query_seq", [@complete_seq1_aligned],
                              "$DB_seq", [@complete_seq2_aligned],
                      'RAW_ALIGN_SCORE', [@scores],
                      'SUM_HERTA_SCORE', \@herta_align_score,
                      'SEQ_IDENTITY'   , \$Mutual_Seq_Id_aligned,
                      'ALIGNMENT_LENG' , \$ALIGNMENT_LENG);

    unless($NO_screen_print){
       print "\n# Seqlet Matrix Profile search result. Using Forward-Backward Herta algorithm\n";
       print "# SEQ1: $query_seq ($seq1_len), SEQ2: $DB_seq($seq2_len)\n";
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # returning ForBack score, forward score, backward score
    #________________________________________________________________
    return(\%MRC_search_alignment);

}



#______________________________________________________________
# Title     : convert_mspa_line_to_mmp_line
# Usage     :
# Function  : this adds ranges to the seqnames of mspa files
#             mmp line is mspa line with additional sequences at the end
# Example   :
# Keywords  : convert_mspa_to_mmp, convert_mspa, convert_mspa_2_mmp
#             change_mspa_to_mmp, add_range_in_mspa
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#--------------------------------------------------------------
sub convert_mspa_line_to_mmp_line{
	 my $input_mspa=${$_[0]} || $_[0];
	 my($score, $evalue, $long_1, $new_seq1, $new_seq2, $middle,
	  $start1, $end1, $start2, $end2, $seq1, $seq2, $new);

	 if($input_mspa=~/^\s*(\d+)\s+(\S+)\s*\S*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
	  ($score, $evalue, $start1, $end1, $start2, $end2)=($1, $2, $3, $4, $6, $7);
	  ($seq1, $seq2)=($5, $8);
	  if($seq1=~/(\S+)\_\d+\-\d+/){
		 $new_seq1="$1\_$start1\-$end1";
	  }else{
		 $new_seq1="$seq1\_$start1\-$end1";
	  }
	  if($seq2=~/(\S+)\_\d+\-\d+/){
		 $new_seq2="$1\_$start2\-$end2";
	  }else{
		 $new_seq2="$seq2\_$start2\-$end2";
	  }
	  $new=sprintf("%-6s %-9s %-5s %-5s %-32s %-5s %-5s %-32s",
			$score, $evalue, $start1, $end1, $new_seq1, $start2, $end2, $new_seq2);
	 }
	 return(\$new);
}



#______________________________________________________________________________
# Title     : combine_forward_backward_sec_str_prediction
# Usage     :%av_of_forw_backw_pred=%{&combine_forward_backward_sec_str_prediction(
#                                  \$output_pred_file_name, \$output_pred_file_name_rv)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub combine_forward_backward_sec_str_prediction{
		my($input_predator_file_name, $reverse_order_of_one_hash,
			 $input_predator_file_name_rv, $ignore_non_matching_residues,
			 $ignore_non_matching_residues);
		if(ref($_[0]) eq 'SCALAR'){ $input_predator_file_name=${$_[0]};
		}else{    $input_predator_file_name=$_[0]; }
		if(ref($_[1]) eq 'SCALAR'){ $input_predator_file_name_rv=${$_[1]};
		}else{    $input_predator_file_name=$_[1]; }
		if($_[2]=~/i/){    $ignore_non_matching_residues='i' }
		$reverse_order_of_one_hash='r';
		my $input_predator_file_name_rv=$_[1] or $_[1];
		print "\n# (i) $input_predator_file_name \n";
		my %sec1=%{&open_predator_files($input_predator_file_name)};
		my %sec1_rv=%{&open_predator_files($input_predator_file_name_rv)};
		my %av_of_forw_backw_pred=%{&get_averaged_prediction(\%sec1, \%sec1_rv,
							 "w=$weight_factor",
							 $give_weight_with_good_match,
							 "$reverse_order_of_one_hash")};
		return(\%av_of_forw_backw_pred);
}



#________________________________________________________________________________
# Title     : merge_sequence_alignments
# Usage     : &merge_sequence_alignments(@seq);  while @seq has
#              @seq=(\%hash1, \%hash2);  while %hash1 and %hash2 have
#    %hash1=qw(seq1 ANN-NTMQQRRQQQRKRRRQQQSSSSTTST seq2 --NNN--QQ--QQQ--RRRR--SSSS--);
#    %hash2=qw(seq2 NN-QQQQQ--RRRR----SS--SS---    seq3 -NNXQQQXQRTRRRXTTSTSSMMSSTTT);
#
# Function  :
# Example   :
# Keywords  : combine_sequence_alignment, merge_sequence_alignment_pairs
#             merge_seq_alignment, make_interm_alignment, make_3_way_alignment
#             merge_alignment, combine_alignment
# Options   :
#    l=  for sequence block length by print_seq_in_block subroutine
#    t=  for specifying the length of seq names shown.
#    t   for truncating the seq names in printing out.
#    s   for sorting the final output lines (default anyway for print_seq_in_block)
#
# Category  :
# Version   : 1.5
#--------------------------------------------------------------------------------
sub merge_sequence_alignments{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my (@splited1_common, @splited2_common, @splited1_non_common, @splited2_non_common,
			 @gap_pos1, @gap_pos2, $block_length, $sort_seq_names,
			 $truncate_name_to_10_char, $trunc_name_to, $block_range,
			 $common_seq_entry, $non_common_seq_entry1, $non_common_seq_entry2,
			 %temp1, %temp2, %out_hash_issa, @homology_line2, @homology_line1,
			 @temp );
		$block_length=60;
		$sort_seq_names='s'; ## this is for &print_seq_in_block

		if($vars{'l'}=~/\d+/){  $block_length=$vars{'l'}; }
		if($char_opt=~/t/){    $truncate_name_to_10_char='t' }
		if($vars{'t'}=~/\d+/){ $trunc_name_to=$vars{'t'}; }
		if($char_opt=~/s/){    $sort_seq_names='s' }
		if($vars{'r'}=~/(\d+\-\d+)/){ $block_range= $1 };

		for($i=0; $i< @hash; $i+=2){
				 my(%hash1, %hash2, @aligned_alignments);

 		     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		     # Running the MAIN intermediate alignments
		     #__________________________________________________________________________

				 @aligned_alignments=@{&align_intermediate_alignments($hash[$i], $hash[$i+1])};

				 %hash1=%{$aligned_alignments[0]};
				 %hash2=%{$aligned_alignments[1]};

				 $common_seq_entry     =${$aligned_alignments[2]};
				 $non_common_seq_entry1=${$aligned_alignments[3]};
				 $non_common_seq_entry2=${$aligned_alignments[4]};

				 #~~~~~~~~~~~~~~~~~~~~~~~`~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
				 # Aligning the start of common sequences
				 #  i.e. putting pre gap part to a sequence which does not have it.
				 #
				 #  NKNWKLRAHLC-KHTGEKP---FPCKEEGCDKGFTSLHHLTRHS---ITHTGEKN--FKCDSDKCDLTFTTKANMKKHFNRFH splited1_common
				 #  --------------NKNWKLRAHLCKHTGEKPFPCKEEGCDKGFTSLHHLTRHSITHTGEKNFKCDSDKCDLTFTTKANMKKHFNRFH-- splited2_common
				 #  becomes->
				 #
				 #  --------------NKNWKLRAHLC-KHTGEKP---FPCKEEGCDKGFTSLHHLTRHS---ITHTGEKN--FKCDSDKCDLTFTTKANMKKHFNRFH splited1_common
				 #  --------------NKNWKLRAHLCKHTGEKPFPCKEEGCDKGFTSLHHLTRHSITHTGEKNFKCDSDKCDLTFTTKANMKKHFNRFH-- splited2_common
				 #_______________________________________________________________________________________________________________

				 #@gap_pos1=@{&get_gap_positions(\$hash1{$common_seq_entry}, 'p' )}; # p means all positive positions wanted
				 #@gap_pos2=@{&get_gap_positions(\$hash2{$common_seq_entry}, 'p' )};

				 @splited1_common    =split(//, $hash1{$common_seq_entry} );
				 @splited2_common    =split(//, $hash2{$common_seq_entry} );
				 if( $hash1{$non_common_seq_entry1} ){
						@splited1_non_common=split(//, $hash1{$non_common_seq_entry1} );
						@splited2_non_common=split(//, $hash2{$non_common_seq_entry2} );
				 }else{
						@splited1_non_common=split(//, $hash2{$non_common_seq_entry1} );
						@splited2_non_common=split(//, $hash1{$non_common_seq_entry2} );
						@temp=@splited1_common; @splited1_common=@splited2_common; @splited2_common=@temp;
				 }

				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 #  Core algorithm (For MSF files)
				 #__________________________________________
				 for($j=0; $j< @splited1_common; $j++){
						 if($splited1_common[$j] ne $splited2_common[$j]){
								 if($splited1_common[$j]=~/(\W)/){
										 splice(@splited2_common, $j, 0, $1);
										 splice(@splited2_non_common, $j, 0, $1); # unless($splited2_non_common[j]=~/\W/);
								 }
								 elsif($splited2_common[$j]=~/(\W)/){
										 splice(@splited1_common, $j, 0, $1);
										 splice(@splited1_non_common, $j, 0, $1);# unless($splited1_non_common[j]=~/\W/);
										 if($splited1_common[$j] eq $splited1_non_common[$j]){
												 $homology_line2[$j]=':';
										 }else{
												 $homology_line2[$j]=' ';
										 }
								 }
						 }
				 }

				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 # Making Homology line
				 #_____________________________________________
				 for($j=0; $j< @splited1_non_common; $j++){
							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							# The first SEQ1  with Interm HOMOLOGY
							#______________________________________________________________
							if($splited1_non_common[$j]=~/\W/){ $homology_line1[$j]=' '; goto HOMOL_2 }

							if($splited1_non_common[$j] eq $splited1_common[$j] and $splited1_non_common[$j] eq $splited2_non_common[$j] ){
									$homology_line1[$j]= $splited1_non_common[$j];
							}elsif($splited1_non_common[$j] eq $splited1_common[$j] ){
									$homology_line1[$j]=$splited1_non_common[$j] ;
							}elsif($splited2_non_common[$j] eq $splited1_non_common[$j]){
									if($splited2_non_common[$j]=~/\W/){ $homology_line1[$j]=' '
							    }else{  $homology_line1[$j]=':' }
							}elsif( ${&amino_acid_homology_matrix($splited2_non_common[$j], $splited1_non_common[$j])} ){
									$homology_line1[$j]='.';
							}
							else{ $homology_line1[$j]=' '; }

							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							# The Second SEQ2  with Interm HOMOLOGY
							#______________________________________________________________
							HOMOL_2:
							if($splited2_non_common[$j]=~/\W/){ $homology_line2[$j]=' '; next }

							if($splited2_non_common[$j] eq $splited2_common[$j] and $splited2_non_common[$j] eq $splited1_non_common[$j] ){
									$homology_line2[$j]=$splited2_non_common[$j] ;
							}elsif($splited2_non_common[$j] eq $splited2_common[$j]){
									$homology_line2[$j]=$splited2_non_common[$j] ;
							}elsif($splited2_non_common[$j] eq $splited1_non_common[$j]){
									if($splited2_non_common[$j]=~/\W/){ $homology_line2[$j]=' ';
									}else{  $homology_line2[$j]=':'; }
							}elsif( ${&amino_acid_homology_matrix($splited2_non_common[$j], $splited1_non_common[$j])} ){
									$homology_line2[$j]='.';
							}else{ $homology_line2[$j]=' '; }

				 }
				 $out_hash_issa{"1 $non_common_seq_entry1 "}=join('', @splited1_non_common);
				 $out_hash_issa{"3 $common_seq_entry"}=join('', @splited2_common);
				 $out_hash_issa{'2 homol_line1'}=join('', @homology_line1);
				 $out_hash_issa{'4 homol_line2'}=join('', @homology_line2);
				 $out_hash_issa{"5 $non_common_seq_entry2"}=join('', @splited2_non_common);
				 &print_seq_in_block(\%out_hash_issa, $sort_seq_names,  "t=$trunc_name_to", "f=defaul_result\.issa",
															 $truncate_name_to_10_char, "l=$block_length", "r=$block_range");
		}
}


#________________________________________________________________________________________
# Title     : merge_sequence_in_mspa_file
# Usage     :
# Function  :
# Example   : INPUT: (MSPA file) ===>
#  59     2.6        47    64     d2pia_3        10    30     d1erd___10-30
#  161    1.1e-07    24    91     d2pia_3        16    85     d1frd___16-85
#
#  722    0          1     106    d1put__        1     106    d1put___1-106
#  66     4.9        2     68     d1put__        43    106    d2lbp___43-106
#  69     1.3        12    49     d1put__        81    120    d1cgo___81-120
#
#  60     3.3        13    38     d1frd__        32    57     d1orda1_32-57
#  65     1.7        21    58     d1frd__        40    69     d2mtac__40-69
#
#   ==== OUTPUT ===>
#    d1frd___1-98 d1frd___1-98_1-98 d1frd___16-85 d2pia_3_24-91_24-91
#    d1frd___16-85_16-85 d2pia_3_24-91
#    d1put___1-106 d1put___1-106_1-106
#    d2pia_3_1-98 d2pia_3_1-98_1-98
#
# Keywords  : mergr_seq_in_mspa_file, merge_sequence_in_mspa, merge_sequences_in_mspa_file
# Options   :
#  $dynamic_factor =  y by y -y   # adjusting factor value dynamically(more seq higher factor)
#  $short_region   =  S by S -S  # taking shorter region overlapped in removing similar regions
#  $large_region   =  L by L -L  # taking larger  region overlapped in removing similar regions
#  $average_region =  A by A -A # taking average region overlapped in removing similar regions
#
# Thanks    : Alexey Eroshkin <alexey@axyspharm.com>
# Version   : 3.5
#----------------------------------------------------------------------------------------
sub merge_sequence_in_mspa_file{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($mspa_value, @all_seqlets, %temp_hash, @mspa_chunks, $clu_out, $size_of_all_seqlets,
		    $ragne, $base, $optimize, $mrg_out, @arr, $sat_out, %final_hash_out, @final_pre_hash,
				$length_thresh, $merge, $factor, $Evalue_thresh, $score, $dynamic_factor, $score_match,
				$eval_match, $query_seq, $query_start, $query_stop, $match_seq, $match_start,
				$short_region, $large_region, $average_region, $original_clu_size, $match_stop,
				$total_mspa_line_count);
		$factor=$default_factor=7; #~~~~ default connection factor U, 7 means 70% now!
		$length_thresh=30;
		$Evalue_thresh=1;
		$score =75;
		$range='r';
		if(@file < 1){ print "\n# (E) merge_sequence_in_mspa_file needs at least 1 MSPA file\n"; die }

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Following changes the defaults with given parameters
		#_____________________________________________________________
		if($char_opt=~/z/i){       $optimize='z';    ## This will cause using remove_similar_seqlets than remove_dup_in_array !
		}if($char_opt=~/m/){       $merge='m';
		}if($char_opt=~/y/){       $dynamic_factor='y';
        }if($char_opt=~/r/){       $verbose='r';
        }if($char_opt=~/v/){       $verbose='v';
		}if($char_opt=~/S/){       $short_region='S';
		}if($char_opt=~/L/){       $large_region='L';
		}if($char_opt=~/A/){       $average_region='A';
		}if($vars{'T'}=~/\d+/){    $length_thresh=$vars{'T'};
		}if($vars{'f'}=~/\S+/){    $factor=$vars{'f'};  ## Here I give a generous $factor !
		}if($vars{'s'}=~/\d+/){    $score = $vars{'s'};
        }if($vars{'e'}=~/\S+/){    $Evalue_thresh= $vars{'e'};
        }if($vars{'E'}=~/\S+/){    $Evalue_thresh= $vars{'E'}; }

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		#  Just to inform what parameters have been chosen
		#_____________________________________________________________
        print "\n# (1) merge_sequence_in_mspa_file : default \$score      : $score";
        print "\n#                                 : default \$Evalue_thresh     : $Evalue_thresh";
        print "\n#                                 : used \$length_thresh : $length_thresh";
        print "\n#                                 : default \$factor     : $default_factor";
        print "\n#                                 : used    \$factor     : $factor";
        print "\n#                                 : \$dynamic_factor     : $dynamic_factor\n";

		for($c=0; $c< @file; $c++){
             open(MSPA, "$file[$c]") || die "Can not open $file[$c] \n";
			 $base=${&get_base_names($file[$c])};
			 $clu_out="$base\_F${factor}.clu"; # <-- This is the most important output. Sarah's program will process this
			 $sat_out="$base\_F${factor}.sat";
             my $total_mspa_lines=@mspa1=<MSPA>;
             print "\n $file[$c] is opened successfully \$total_mspa_lines : $total_mspa_lines\n";

			 for($i=0; $i< @mspa1; $i++){
					#~~~~~~~~~~ Include range or NOT in the seq name ~~~~~~~~~~~~~~~~~~~~~~~~~~`
					# %temp_hash is just to get the chunk of MSPA block. As mspa file uses empty line as a delimiter
					#____________________________________________________________________________
					if($char_opt=~/r/){
						 if($mspa1[$i]=~/^\s*(\S+)\s+(\S+)\s*\S*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
                              $total_mspa_line_count++;
									$score_match=$1;	$eval_match=$2;
                                    $query_seq=$5;      $query_start=$3;
									$query_stop=$4;		$match_seq=$8;
									$match_start=$6;	$match_stop=$7;
                                    if($score_match < $score or $eval_match > $Evalue_thresh){next};
									if($query_seq=~/\S+_\d+\-\d+$/){ $new_seq1=$query_seq }else{ $new_seq1="$query_seq\_$query_start\-$query_stop"; }
									if($match_seq=~/\S+_\d+\-\d+$/){ $new_seq2=$match_seq }else{ $new_seq2="$match_seq\_$match_start\-$match_stop"; }

									if($new_seq1 eq $new_seq2){ next};

									#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									# Modifying $mspa1[$i] line !!!
									#______________________________
									$mspa1[$i]=sprintf("%s %-3s %s %s %s %s %s %s",
													$score_match, $eval_match, $query_start,
													$query_stop, $new_seq1, $match_start,
													$match_stop, $new_seq2);
									$temp_hash{$query_seq}.="$mspa1[$i]\n";
						 }
					}else{
						 if($mspa1[$i]=~/^\s*(\S+)\s+(\S+)\s*\S*\s+\d+\s+\d+\s+(\S+)[_\d+\-\d+]?\s+\d+\s+\d+\s+\S+/){
									if($1 < $score or $2 > $Evalue_thresh){	next };
									$temp_hash{$3}.="$mspa1[$i]\n";
						 }
					}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			}
            close(MSPA);
		}
        $original_clu_size=@mspa_chunks= values(%temp_hash); ## Using temp hash is more than 2 times faster than push

        print "\n The total seq to divclus is : $original_clu_size \$total_mspa_line_count: $total_mspa_line_count\n";
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Determining the dynamic factor here (when 'd' opt is set)
		#____________________________________________________________
		if($dynamic_factor){
				#--> 100 => 10, 1000 => 15, 10000 => 20
				print "\n# ### \$factor: $factor\n";
				$factor += (log($original_clu_size)*5)/10 - 1; ## This is a simplistic.
				if($factor > 9.5){ $factor=9.5 } # this is the very upper limit for any factor.
				print "\n# ### \$factor: $factor\n";
		}

		for($i=0; $i< @mspa_chunks; $i++){
            @arr=@{&merge_sequence_in_mspa_chunk($mspa_chunks[$i], $verbose, $optimize,
								"$merge", "E=$Evalue_thresh", "s=$score",
								"f=$factor", "T=$length_thresh",
								$short_region, $large_region, $average_region)};
			push(@all_seqlets,  @arr);
		}

		#~~~~~~~~~ sorting inner sequences in strings ~~~~~~~~~
		#______________________________________________________
		@all_seqlets=@{&sort_words_in_string(@all_seqlets)}; ## This speeds up about 2 times !!!

		#~~~~~~~ Sort by the _digit-  in seqlet names ~~~~~~~~~
		@all_seqlets= map{$_->[0]} sort{$a->[1] cmp $b->[1] or $a->[2] <=> $b->[2]  }
									map {/^\s*((\S+)_(\d+)\-(\d+).*)/ && [$1, $2, $3, $4]} @all_seqlets;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# merge sequences in a simple way until there is no change in the array
		#  This is an incomplete merge(merges first seqlets of string ...
		#______________________________________________________________________
		for($i=0; $i< @mspa_chunks; $i ++){
             ITERATION_RETURN_POINT:
             $size_of_all_seqlets=@all_seqlets;
             @all_seqlets = @{&merge_similar_seqlets(\@all_seqlets, $optimize,
                                                      $short_region, $large_region,
                                                      $average_region, "f=$factor")};
             if($size_of_all_seqlets > @all_seqlets){
                 @all_seqlets = @{&merge_similar_seqlets(\@all_seqlets, $optimize,
                                                $short_region, $large_region, $average_region, "f=$factor")};
                 print "\n $size_of_all_seqlets Iterating merge_similar_seqlets \n";
                 goto ITERATION_RETURN_POINT;
             }else{
                 last;
             }
		}

		if($optimize){
             @all_seqlets=@{&remove_similar_seqlets(\@all_seqlets,
                                             $short_region, $large_region, $average_region)};
             #@all_seqlets=@{&remove_dup_in_array(\@all_seqlets)};

		}else{
             @all_seqlets=@{&remove_dup_in_array(\@all_seqlets)};
		}
		return(\@all_seqlets);
}




#__________________________________________________________________________
# Title     : merge_sequence_in_mspa_chunk
# Usage     :
# Function  : merges sequences which are linked by common regions
#             This filters the sequences by evalue and ssearch score
#             This is the main algorithm of merging similar sequences.
#             MSPA lines become pairs of seq_regions
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : connect_sequence_in_mspa, link_sequence_in_mspa_chunk
#             connect_sequence_in_mspa_chunk, link_sequence_in_mspa
#             merge_sequence, link_sequence, connect_sequence
# Options   : _  for debugging.
#             #  for debugging.
#             m  for merge file output format (.mrg)
#             t= for threshold of seqlet length eg)  "t=30"
#             f= for overlap factor (usually between 2 to 7 )
#                 2 means, if the two regions are not overlapped
#                  by more than HALF of of the smaller region
#                  it will not regard as common seqlet block
#             s= for ssearch score minimum
#             e= for ssearch e value maximum
#             S  for S -S  # taking shorter region overlapped in removing similar regions
#             L  for L -L  # taking larger  region overlapped in removing similar regions
#             A  for A -A # taking average region overlapped in removing similar regions
#
# Returns   :
# Argument  :
# Thanks    : Alexey Eroshkin <alexey@axyspharm.com>
# Version   : 2.9
#--------------------------------------------------------------
sub merge_sequence_in_mspa_chunk{
	 #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	 my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	 my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	 my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	 my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	 my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	 if($debug==1){print "\n\t\@hash=\"@hash\"
	 \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	 \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($ssearch_score2, $evalue_found2, $evalue_found1, $ssearch_score1, $optimize );
	 my ($L, %out_hash, @out, $LL, @Final_out, $verbose, $final_factor, $R_diff, @seqlets,
			 $short_region, $large_region, $average_region, $factor, $score, $evalue, $length_thresh);
	 $factor =7; # default factor for around 30% sequence mis-overlap is the threshold for common block
	 #~~~~~~~~~~~~~~ The lower the factor the larger clustering will occur ~~~~~~~~~~~~
	 $score  =75; # default ssearch score. seq below this will be chucked out
	 $evalue =10; # default maximum e value used. Seq higher than this will be thrown out
	 $length_thresh =30; # sequence length threshold. overlap less than this will be ignored

	 if($char_opt=~/v/){     $verbose = 'v'
	 }if($char_opt=~/z/){    $optimize = 'z'
	 }if($char_opt=~/S/){    $short_region='S';
	 }if($char_opt=~/L/){	   $large_region='L';
	 }if($char_opt=~/A/){	   $average_region='A'; }

	 if($vars{'T'}=~/\d+/){   $length_thresh=$vars{'T'};
	 }if($vars{'f'}=~/\S+/){  $factor=$vars{'f'};
	 }if($vars{'s'}=~/\d+/){  $score = $vars{'s'};
     }if($vars{'e'}=~/\d+/){  $evalue= $vars{'e'};
     }if($vars{'E'}=~/\d+/){  $evalue= $vars{'E'};
	 }

     @seqlets=split(/\n+/, (${$_[0]} || $_[0]) );

	 F1: for($i=0; $i < @seqlets; $i ++){
			if($seqlets[$i]=~/^\s*((\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+))\s+(\S+)\s*(.*)/){
		     if($6 eq $9){ splice(@seqlets, $i, 1); $i--; next };
				 ($long_match1, $enq_seq1, $mat_seq1, $R_start1, $R_end1 )=($1, $6, $9, $4, $5);
                 $Region_leng1=$R_end1-$R_start1;  $ssearch_score1= $2;  $evalue_found1 = $3;
	       }
	       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	       # Following lines are disabled as I believe seqlets have been checked in previous sub
		   #________________________________________________________________________________________________
	       if( ($Region_leng1 < $length_thresh) || ($ssearch_score1 < $score) ){ splice(@seqlets, $i, 1); $i--; next; }
	       if( $evalue_found1 > $evalue){ splice(@seqlets, $i, 1); $i--; next; }

		   F2: for($j=0; $j < @seqlets; $j ++){
		     if($seqlets[$i] eq $seqlets[$j]){ next };
		     if($seqlets[$j]=~/^\s*((\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+))\s+(\S+)\s*(.*)/){
			      ($long_match2, $enq_seq2, $mat_seq2, $R_start2, $R_end2)=($1, $6, $9, $4, $5);
			      $Region_leng2=$R_end2-$R_start2;	$ssearch_score2=$2;	$evalue_found2= $3;
	         }

			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # Following lines are disabled as I believe seqlets have been checked in previous sub
			 #________________________________________________________________________________________________
		     #if( ($Region_leng2 < $length_thresh)||($ssearch_score2 < $score) ){ splice(@seqlets, $j, 1); $j--; next; }
		     #if( $evalue_found2 > $evalue){ splice(@seqlets, $j, 1); $j--; next; }

             $R_diff=abs($Region_leng1-$Region_leng2);   ## <<<---- Note it is div by 2

		     if($Region_leng2 < $Region_leng1){ $smaller_leng=$Region_leng2; }else{ $smaller_leng=$Region_leng1; }

             $Start_diff=abs($R_start1-$R_start2); ## <<<---- Note it is div by 2
             $final_factor=$smaller_leng - $smaller_leng*($factor/10);

			 #~~~~~~~~~~ If average R_diff and average Start_diff are less then 1/7 of the smaller seqlet
			 #~~~~~~~~~~ we regard they are same selqets
             if( $R_diff <= $final_factor ){  ### if diff is less than around 30% of the smaller length
					  if($Region_leng2 >= $Region_leng1){
							 #~~~~~ $mat_seq1 or $mat_seq2 can increase to 'slr1453,sll0238', so you need ',' in the middle only
                             $extended_name="$mat_seq2|-|$mat_seq1";
							 $L=length($extended_name);
							 $LL=length($long_match2)+2;
							 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							 # This makes "368   2.3e-06  0.352  4    189   af_AF2051  20   208   hi_HI1334,hi_34343"
							 #_________________________________________________________________________________________
							 $seqlets[$i]= sprintf("%-${LL}s %-${L}s", $long_match2, $extended_name);
							 splice(@seqlets, $j, 1);
							 $i-- unless($i==0);
							 $j--;
							 next F1;
					  }elsif( $Region_leng1 >= $Region_leng2){  ## chooses the bigger range seq
							 $extended_name="$mat_seq1|-|$mat_seq2"; # must be ',' not ' '
							 $L=length($extended_name);
							 $LL=length($long_match1)+2;
							 $seqlets[$i]=sprintf("%-${LL}s %-${L}s", $long_match1, $extended_name);
							 splice(@seqlets, $j, 1);
							 $i-- unless($i <= 0);
							 $j--;
							 next F1;
					  }
	       }else{
			      next F2;
		   }
	    }
	 }
	 #print "\n @seqlets \n";
	 if($char_opt=~/m/){ # #             m  for merge file output format (.mrg)
            for($i=0; $i< @seqlets; $i++){
				 if($seqlets[$i]=~/^\s*\S+\s+\S+\s+\d+\s+\d+\s+(\S+)\s+\d+\s+\d+\s+(\S+)/){
						if($1 eq $2){ next }
						$leading_seq=$1; $long=$2; $long=~s/\|\-\|/ /g;
						push(@Final_out, "$leading_seq $long" );
				 }
			}
	 }
	 @Final_out=sort @Final_out;
     #print "\n========># \@Final_out: @Final_out ";
	 return(\@Final_out);
}




#______________________________________________________________
# Title     : get_overlapping_range
# Usage     : @n1=@{&get_overlapping_range(\@ranges1, \@ranges2)};
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : get_overlapping_range_in_mspa, get_overlapping_range_in_mspa_file,
#             get_overlapping_seq_match_range, get_overlap_seq_match_range
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_overlapping_range{
	 my (@new_range, $R_start1, $R_start2);
	 ($R_start1, $R_end1)=@{$_[0]}[0..1];
	 ($R_start2, $R_end2)=@{$_[1]}[0..1];

	 if(($R_start1 <= $R_start2)&&        # ------------
	 ( $R_end1 >= $R_end2) ){           #   -------
	   @new_range= ($R_start2, $R_end2);
	 }elsif(($R_start1 <= $R_start2)&&    # -----------
	 ( $R_end1 <= $R_end2) &&           #    -----------
	 ( $R_end1 >  $R_start2) ){
	   @new_range= ($R_start2, $R_end1);
	 }elsif(($R_start1 >= $R_start2)&&    #    -----------
	 ( $R_end1 >= $R_end2  ) &&         # -----------
	 ( $R_end2 >  $R_start1) ){
	   @new_range= ($R_start1, $R_end2);
	 }elsif(($R_start1 >= $R_start2)&&    #   ------
	 ( $R_end1 <= $R_end2) ){           # -----------
	   @new_range= ($R_start1, $R_end1);
	 }else{                                #  ----
	  @new_range=(0,0);                  #        --------
	 }
	 return(\@new_range);
}


#______________________________________________________________________________
# Title     : find_small_files
# Usage     : @files_found_to_be_small=@{&find_small_files(@ARGV)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub find_small_files{
    my($file_size_cut_line, $size, @files_removed, $i, @files);
     $file_size_cut_line=$_[0];
     $file_size_cut_line_bigger_than=$ARGV[1];

     @files=@{&read_file_names_only('.')};

     for($i=0; $i< @files; $i++){
         $size= -s $files[$i];
         if($size <= $file_size_cut_line and $size >= $file_size_cut_line_bigger_than){
             push(@files_removed, $files[$i]);
             print "\n# (i) $files[$i] is smaller than $file_size_cut_line, size= $size byte";
         }
     }
     return(\@files_removed);
}

#______________________________________________________________________________
# Title     : find_missing_files_of_certain_extension
# Usage     : @files=@{&find_missing_files_of_certain_extension(".", "e=fa,mspa,pbla.gz")};
#
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub find_missing_files_of_certain_extension{
	my($in_dir, $i, $j, $x, $k, $dir, @final_files, @possible_dirs, $sort_opt, $ext, @extensions,
		 $path_include, @in, $glob_given, @files_globed, @in_dir, $pwd, $extension_given,
		 %target_file_names, @target_file_names, @read_files, %ext_hash, $input_dir,
		 %found_files, %missing_files, @files_created);
	$pwd=`pwd`; chomp($pwd);
	$in_dir=$pwd;
	@in=@_;

    $input_dir=${$_[0]} || $_[0];
    push(@in_dir, $input_dir);
    $extensions=${$_[1]} || $_[1];
    if($extensions=~/e=(\S+[\, ]\S+)/){ push( @extensions, split(/\,/, $1) );
    }elsif($extensions=~/e=(\S+)/){ push(@extensions, $extensions) }
    print "\@extensions :  @extensions\n";

    for $ext(@extensions){ $extensions{$ext}=$ext }

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Main READING PART. I have @in_dir and @extensions now
	#_________________________________________________________________
    for($k=0; $k< @in_dir; $k++){
		 chdir($in_dir[$k]) or die "\n# read_file_names_only: could not get into $in_dir[$k]\n";
	     opendir(DIR1, ".");
		 @read_files = readdir(DIR1);
	     if(@read_files < 1){ print "\n# read_file_names_only: ERROR??, \@read_files is empty\n\n\n"; }
	     for($i=0; $i < @read_files; $i ++){
              if( $read_files[$i] =~ /^([^\.]+)\.(\S+)$/){
                  $base=$1;
                  $ext=$2;
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # There must be at least one matching extension for one base name
                  #___________________________________________________________________
                  unless($extensions{$ext}){ print "\n Excluded : $base"; next
                  }else{
                      for($j=0; $j< @extensions; $j++){
                          $target_file="$base\.$extensions[$j]";
                          if(-s "$target_file"){
                             push(@{$found_files{$extensions[$j]}}, $base);
                          }else{
                             push(@{$missing_files{$extensions[$j]}}, $base);
                          }
                      }
                  }
              }
	     }
	     chdir($pwd);
   }

   for($i=0; $i< @extensions; $i++){
      $missing_file_list_file="missing_$extensions[$i]\_files\.list";
      $found_file_list_file="found_$extensions[$i]\_files\.list";
      push(@files_created, $missing_file_list_file, $found_file_list_file);
      open(MISS_LIST, ">$missing_file_list_file") || die "\n Can not create $missing_file_list_file \n";
      open(FOUND_LIST, ">$found_file_list_file") || die "\n Can not create $found_file_list_file \n";
      @missing_files=@{$missing_files{$extensions[$i]}};
      @found_files  =@{$found_files{$extensions[$i]}};
      for($j=0; $j<@missing_files; $j++){
         print MISS_LIST "$missing_files[$j]\n";
      }
      for($j=0; $j<@found_files; $j++){
         print  FOUND_LIST "$found_files[$j]\n";
      }
      close(MISS_LIST);
      close(FOUND_LIST);
   }
   print "\n @files_created are created \n\n";
   return(\@files_created);
}



#______________________________________________________________________________
# Title     : find_missing_mspa_files_for_pbla_gz
# Usage     :
#             @files=@{&find_missing_mspa_files_for_pbla_gz("e=$input_extensions")};
#             @files=@{&find_missing_mspa_files_for_pbla_gz(\@input_extensions)};
#
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub find_missing_mspa_files_for_pbla_gz{
	my($in_dir, $i, $j, $x, $k, $dir, @final_files, @possible_dirs, $sort_opt, $ext, @extensions,
		 $path_include, @in, $glob_given, @files_globed, @in_dir, $pwd, $extension_given,
		 %target_file_names, @target_file_names, @read_files, %ext_hash);
	$pwd=`pwd`; chomp($pwd);
	$in_dir=$pwd;
	@in=@_;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Directory entry and opts detection
	#_________________________________________
	for($k=0; $k < @in; $k++){
 	    if   ( $in[$k] eq '.'){ push(@in_dir,$pwd); splice(@in, $k, 1);  $k--; next }
		if( !(ref($in[$k]))){
	        print "\n# read_file_names_only: $in[$k] is not a reference";
			if($in[$k]=~/D=(\S+)/i){
					print "\n# read_file_names_only : $1 is used as input dir ";
					push(@in_dir, $1); splice(@in, $k, 1);    $k--; next;  }
			if( -d "$in[$k]" ){
				print "\n# read_file_names_only: $in[$k] is a dir";
				if($in[$k]=~/\/\S+$/){
							$path_include=1;  ## If the input dir has '/', I assume path should be added to out file names
							print "\n# read_file_names_only: \$path_include is set to 1";
			    }
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# Removes the last slash '/' of input dir name
			#________________________________________________
			if($in[$k]=~/\/$/){   chop($in[$k]);  }
						push(@in_dir, $in[$k]);
						splice(@in, $k, 1);    $k--; next;
		}
		if(!(-f $in[$k]) and $in[$k] =~ /^\-p *$/ ){ ## somehow, ' *' is essential
			$path_include=1;
			splice(@in, $k, 1); $k--;
								}elsif(!(-f $in[$k]) and $in[$k] =~ /^\-s *$/   ){$sort_opt=1; splice(@in, $k, 1); $k--;
								}else{
										 print "\n# (W) read_file_names_only: $in[$k] not a file, nor dir, a file extnsion?\n";
								}
	 }elsif(ref($in[$k])){
				if(ref($in[$k]) eq 'SCALAR'){
					 if( -d ${$in[$k]}){
							 if(${$in[$k]}=~/\/$/){ chop(${$in[$k]}) }
							 push(@in_dir,${$in[$k]});
							 splice(@in, $k, 1);
							 $k--;
					 }elsif(!(-f $in[$k]) and ${$in[$k]} =~ /^\-p$/ ){$path_include=1; splice(@in, $k, 1); $k--;
					 }elsif(!(-f $in[$k]) and ${$in[$k]} =~ /^\-s$/ ){$sort_opt=1; splice(@in, $k, 1); $k--;}
				}elsif(ref($in[$k]) eq 'ARRAY'){
					 @target_file_names=@{$in[$k]}; splice(@in, $k, 1); $k--;
					 for($x=0; $x < @target_file_names; $x++){  # making a hash out of @array
							 $target_file_names{$target_file_names[$x]}=$target_file_names[$x];
					 }
				}
	 }
	}
	if(@in_dir < 1){ push(@in_dir, $pwd) }

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Main READING PART. I have @in_dir and @extensions now
	#_________________________________________________________________

	for($k=0; $k< @in_dir; $k++){
		 chdir($in_dir[$k]) or die "\n# read_file_names_only: could not get into $in_dir[$k]\n";
	     opendir(DIR1, ".");
		 @read_files = readdir(DIR1);
	     print "\n# read_file_names_only: content of \@read_files in $in_dir[$k] : @read_files\n" if $verbose;
	     if(@read_files < 1){ print "\n# read_file_names_only: ERROR??, \@read_files is empty\n\n\n"; }
	     for($i=0; $i < @read_files; $i ++){
               if( -f "$read_files[$i]" ){
                   if( $read_files[$i] =~ /(\S+)\.pbla\.gz$/){
                       if(-s "$1\.mspa"){
                       }else{
                            push(@final_files, "$read_files[$i]" );
                       }
                   }
               }

	     }
	     chdir($pwd);
   }
   @final_files=sort @final_files if $sort_opt == 1;
   return(\@final_files);
}



#______________________________________________________________________________
# Title     : find_fssp_DALI
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : holm@ebi.ac.uk,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub find_fssp_DALI {
        local($string)=@_;

        $hit=0;
        $heading=`grep PDBid $TABLE2`;
        if($string) { &grep_string_in_PDB_file($string, $hit); }
        print "<HR>\n";
        if($hit>=0) {print "Click on a hyperlink to view the structural alignments.\n";}
        else {
print<<"EOT";
No matches were found in the simple search.
EOT
        }
}



#______________________________________________________________________________
# Title     : find_boundaries_in_arrays
# Usage     :
# Function  :
# Example   :
# Keywords  : convert_array_to_ranges, translate_array_to_ranges
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub find_boundaries_in_arrays{
    my($i, @array_with_numbers, @boundaris_as_ranges, $range_start);
    @array_with_numbers=@{$_[0]};
    $range_start=$array_with_numbers[$i];
    push(@boundaris_as_ranges, $range_start);
    for($i=0; $i< @array_with_numbers; $i++){
       if($array_with_numbers[$i] + 1 != $array_with_numbers[$i+1]){
           push(@boundaris_as_ranges, $array_with_numbers[$i], $array_with_numbers[$i+1]);
       }

    }
    if(!$boundaris_as_ranges[$#boundaris_as_ranges]){ pop(@boundaris_as_ranges) }
    return(\@boundaris_as_ranges);
}



#______________________________________________________________________________
# Title     : find_common_seq_names
# Usage     :($INT_sq1, $INT_sq2, $sq1, $sq2)=@{&find_common_seq_names(\@seq_names)};
# Function  : returns the common sequence names(usually Intermediates in search)
#             The sequence names can be XXXX or XXXX_30-66
#             Accepts 4 strings in an array.
# Example   :
# Keywords  : get_common_seq_names, get_identical_seq_names,
#             get_2_most_similar_strings_in_array
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub find_common_seq_names{
		my @seq_names=sort @{$_[0]};
		my ($i, $previous_seq_name, @seq_names_out, $previous_seq_name_orig,
		    $highest_iden, @name_chars2, @name_chars1);
		for($i=0; $i< @seq_names; $i++){
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # If seq names have RANGE information XXX_10-30
			 #______________________________________________
			 if($seq_names[$i]=~/(\S+)_(\d+)\-\d+/){
					if($1 eq $previous_seq_name){
					    if($i == 1){
					       if($previous_seq_name_start > $2){
										 @seq_names_out=($seq_names[$i], $previous_seq_name_orig, $seq_names[$i+1], $seq_names[$i+2]);
					       }else{
										 @seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i+1], $seq_names[$i+2]);
								 }
							}elsif($i > 1){
								 @seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i-2], $seq_names[$i+1]);
							}
					}
					$previous_seq_name=$1;
					$previous_seq_name_start=$2;
					$previous_seq_name_orig=$seq_names[$i];
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # If seq names DO NOT have range info  XXXXXX
			 #______________________________________________
			 }elsif($seq_names[$i] eq $seq_names[$i+1]){
					if($i == 0){
							@seq_names_out=($seq_names[$i], $seq_names[$i+1], $seq_names[$i-1], $seq_names[$i+2]);
					}elsif($i == 1){
							@seq_names_out=($seq_names[$i], $seq_names[$i+1], $seq_names[$i-2], $seq_names[$i-1]);
					}elsif($i == 2){
							@seq_names_out=($seq_names[$i], $seq_names[$i+1], $seq_names[$i-3], $seq_names[$i-2]);
					}
					return(\@seq_names_out)
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # If seq names DO NOT have range info and there is not identical seq pairs
			 #__________________________________________________________________________________
			 }else{
			    my ($counter, $j, $non_char_count);
					$counter=1;
					@name_chars1=split(//, $previous_seq_name);
					@name_chars2=split(//, $seq_names[$i]);
					for($j=0; $j < @name_chars1; $j++){
							if($name_chars1[$j] eq $name_chars2[$j]){
									$counter++;
									if($counter > $highest_iden){
											$highest_iden=$counter;
											if($i == 1){
													@seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i+1], $seq_names[$i+2]);
											}elsif($i == 2){
													@seq_names_out=($previous_seq_name_orig, $seq_names[$i], $seq_names[$i-2], $seq_names[$i+1]);
											}elsif($i == 3){
													@seq_names_out=($seq_names[$i-1], $seq_names[$i], $seq_names[$i-3], $seq_names[$i-2]);
													return(\@seq_names_out);
											}
									}
							}else{ ## When there is non-identical char comes, skip it.
									#$non_char_count++;
									next;
							}
					}
					$previous_seq_name=$seq_names[$i];
					$previous_seq_name_orig=$seq_names[$i];
			 }
		}
		return(\@seq_names_out);
}



#______________________________________________________________________________
# Title     : find_source_perl_library
# Usage     : $source_library=${&find_source_perl_library};
# Function  : gets the default perl sub source library from ENV setenv
# Example   :
# Keywords  :
# Options   :
# Author    : jong@ebi.ac.uk
# Version   : 1.2
#------------------------------------------------------------------------------
sub find_source_perl_library{
     my($source_library);
     print "\n# $0: You did not use \"s=\" option for \$source_library\n";
     print "\n#     I am trying to retrieve your default source lib. \n";

     if( defined( $ENV{'MY_PERL_LIB'} ) ){
            $source_library=$ENV{'MY_PERL_LIB'};
     }elsif( defined( $ENV{'BIO_PERL'} ) ){
            $bioperl_lib=$ENV{'BIO_PERL'};
     }elsif(-e "/Bio/Perl/Bioinf.pl"){
            $source_library="/Bio/Perl/Bioinf.pl";
     }elsif(-e "/perl/Bioinf.pl"){
            $source_library="/perl/Bioinf.pl";
     }elsif(-e "$ENV{HOME}\/Perl/Bioinf.pl"){
            $source_library="$ENV{HOME}\/Perl/Bioinf.pl";
     }elsif(-e "/gn0/jong/Perl/B.pl"){
            $source_library="/gn0/jong/Perl/B.pl";
     }elsif(-e "/home/jong/Perl/B.pl"){
            $source_library="/home/jong/Perl/B.pl";
     }elsif(-e "/Perl/Bioinf.pl"){
            $source_library="/Perl/Bioinf.pl";
     }elsif(-e "/Perl/B.pl"){
            $source_library="/Perl/B.pl";
     }elsif(-e "B.pl"){
            $source_library="B.pl";
     }elsif(-e "/usr/Perl/B.pl"){
            $source_library="/usr/Perl/B.pl";
     }elsif(-e "/ss0/sat/Script/B.pl"){
            $source_library="/ss0/sat/Script/B.pl";
     }elsif(-e "/ss0/agb/Script/B.pl"){
            $source_library="/ss0/agb/Script/B.pl";
     }else{
            print "\n $0 can not find source library, please set BIO_PERL env\n";
     }
     return(\$source_library);
}

#______________________________________________________________
# Title     : find_central_seq_mspa_chunk
# Usage     : This finds the correct mspa chunk with given seq name
#             and big original or any mspa chunk
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub find_central_seq_mspa_chunk{
	 my $central_seq=${$_[0]};
	 my @MSP=@{$_[1]};
	 my ($j, $range, @MSP_1);
	 for($j=0; $j<@MSP; $j++){
	  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  #                   $1                 $2     $3    $4      $5     $6    $7     $8
	  #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
	  #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
	  if($MSP[$j]=~/^\s*(\d+)\s+\d+\.*\d*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(.+)/i){
		 if( ($4 eq $central_seq) && ($4 ne $7) ){
			$range="$2 \- $3";
			push(@MSP_1,  $range);
		 }
	  }
	 }
	 return(\@MSP_1);
}



#______________________________________________________________
# Title     : find_central_sequence
# Usage     :
# Function  : accepts mspa file and finds the central sequence.
#             central sequence is in the centre of all the member
#             sequences in a group or cluster
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub find_central_sequence{
	#"""""""""""""""""< handle_arguments{ head Ver 3.9 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	 my (%score, $out, @Keys);
	 for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];

	  if($debug eq 1){ print "\n inputfile is $input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_mspa_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  open(FILE_1,"$input_file");
	  @MSP=<FILE_1>;

	  for($j=0; $j<@MSP; $j++){
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 #                   $1                 $2     $3    $4      $5     $6    $7     $8
		 #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
		 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
		 if($MSP[$j]=~/^\s*(\d+)\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(.+)/){
			if($4 eq $7){
			   $seq_name=$7;
			}elsif( ($4 ne $7) && ( defined($seq_name) ) ){
			   $score{$seq_name}+= $1;
			}
		 }
	  }
	 }
	 @Keys=keys %score;
	 for($i=0; $i< @Keys; $i++){
	  if($score{$Keys[$i]} > $largest){
		 $largest=$score{$Keys[$i]};
		 $out=$Keys[$i];
	  }
	 }
	 return(\$out);
}



#______________________________________________________________________________
# Title     : ftp_put_files_given_from_a_list
# Usage     : &ftp_put_files_given_from_a_list(\$file_list, $ftp_server_name, $target_directory,
#                                  $username='jong', $passwd='asdfjik');
#
# Function  : transfer file over ftp (file names are given as a list)
# Example   :
#             &ftp_put_files_given_from_a_list(\$file_list,
#                                               $ftp_server_name,
#                                               $target_directory,
#                                               $username='jong',
#                                               $passwd='9890ukk',
#                                              );
# Keywords  : transfer_files_with_ftp
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub ftp_put_files_given_from_a_list{
    my($i, $ftp_server_name, $ftp, $target_directory, $username, $passwd,
       $file_list, @files, $type);
    use Net::FTP;
    $file_list=${$_[0]} || $_[0];
    $ftp_server_name=${$_[1]} || $_[1];
    $target_directory=${$_[2]} || $_[2];
    $username=${$_[3]} || $_[3];
    $passwd  =${$_[4]} || $_[4];
    if($_[5]){        $type =${$_[5]} || $_[5];
    }else{            $type = 'I' ;     }

    $ftp=Net::FTP->new("$ftp_server_name") || die "Cant connect to $ftp_server_name\n";
    $ftp->login($username, $passwd) || die "Cant login as $username with $passwd\n\n";
    $ftp->cwd($target_directory);
    unless(-s $file_list){
       print "\n I need a list file for files: \$file_list has \"$file_list\"\n"; die;
    }
    $num_of_files=@files=@{&open_list_file_ARRAY(\$file_list)};
    for($i=0; $i < @files; $i++){
       $ftp->put($files[$i]);
       print "$i/$num_of_files Putting $files[$i] to $ftp_server_name as $username using ftp_put_files_given_from_a_list\n";
    }
    $ftp->quit();
}


#______________________________________________________________________________
# Title     : ftp_get_files_given_from_a_list
# Usage     : &ftp_put_files_given_from_a_list(\$file_list, $ftp_server_name, $server_directory,
#                                  $username='jong', $passwd='asdfjik', $client_directory);
#
# Function  : transfer file over ftp (file names are given as a list)
# Example   :
#             &ftp_get_files_given_from_a_list(\@files,
#                                               $ftp_server_name,
#                                               $server_directory,
#                                               $username='jong',
#                                               $passwd='9890ukk',
#                                               $client_directory
#                                              );
# Keywords  : transfer_files_with_ftp
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.3
#------------------------------------------------------------------------------
sub ftp_get_files_given_from_a_list{
    my($i, $ftp_server_name, $ftp, $server_directory, $username, $passwd,
       @files, $type, $client_directory);
    use Net::FTP;
    @files=@{$_[0]};
    $ftp_server_name =${$_[1]} || $_[1];
    $server_directory=${$_[2]} || $_[2];
    $username=${$_[3]} || $_[3];
    $passwd  =${$_[4]} || $_[4];
    $client_directory=${$_[5]} || $_[5];

    if($_[6]){        $type =${$_[6]} || $_[6];
    }else{            $type = 'I' ;     }

    $ftp=Net::FTP->new("$ftp_server_name") || die "Cant connect to $ftp_server_name\n";
    $ftp->login($username, $passwd) || die "Cant login as $username with $passwd\n\n";
    $ftp->cwd($server_directory);
    unless(@files){
       print "\n I need a file list: \@files\n"; die;
    }
    for($i=0; $i < @files; $i++){
       $local_file="$client_directory\/$files[$i]";
       $ftp->get($files[$i], $local_file);
       print "($i) getting $files[$i] from $ftp_server_name as $username. $local_file\n";
    }
    $ftp->quit();
}



#______________________________________________________________
# Title     : write_dof_files
# Usage     : &write_dof_files(\@mspas);
#             while @mspas means mspa file names
# Function  : write Alex's domfam file. it prints out tilde lines
#             if the seqlet matched are below threshold defined.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#             v  for verbose STDOUT
#             n  for NO seq start and end number display
#             t= for teshold (eg, t=40  for Blastp(or ssearch) score 40 threshold)
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub write_dof_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my $thresh = 0;
	 my @mspas=@file;
	 my (@mspa_name, $mspa_file, @list, $leng, $scale);
	 $thresh=$vars{'t'} if defined( $vars{'t'} ) ;
	 $scale =$vars{'s'};

	 for($i=0; $i < @mspas; $i++){ ### @mspas should have mspa file names
	  my($x, $leng, $line_size, $o, $I_len, $SC, %count);
	  my @mspa_name=split(/\./, $mspas[$i]);
	  my $base=$mspa_name[0];
	  my $out_dof_file="$base\_$thresh\.dof";
	  open(DOF, ">$out_dof_file");
	  open(MSP, "$mspas[$i]");
	  my @output=<MSP>;

	  ###### Getting automatic $scale ~~~~~~~~~~~~~~~~~~~~~~~
	  unless($scale=~/\d+/){
		 for($j=0; $j< @output; $j++){
			if($output[$j]=~/^\s*\S+\s+\S+\s+1\s+(\d+)\s+\S+/){
			   $leng=$1 if ($1 > $leng);
			}
		 }
		 if($leng > 1300){ $scale = 20;
		 }else{
		    $scale=int($leng / (log($leng)*10) );
		 }
		 if($scale < 5){
			$scale=5;
		 }
	  }

	  if($output[0]=~/^$/){ splice(@output, 0, 1); }
	  if($remove=~/r/){ shift(@output); }  ## removing the first line

	  #######======  Drawing the top line ###########
	  $line_size=int($leng/$scale);

	  #######======  SCALE writing =======###########
	  print DOF "\n NAME       LENG  FROM- TO  ";
	  print     "\n NAME       LENG  FROM- TO  "  if ($char_opt=~/v/);
	  my $div=int($leng/$scale);
	  my $Scaled=int($div/$scale);
	  for($x=1; $x< $leng; $x+=$div){
		 $I_len=length($x);
		 $SC=$Scaled-$I_len;
		 print DOF $x."."x$SC;
		 print     $x."."x$SC if ($char_opt=~/v/);
	  }
	  #####~~~~ Processing MSP file lines ~~~~~~~~
	  for ($o=0; $o< @output; $o++){
		 my $each_mspa_line=$output[$o];
		 my ($line,$score, $start1, $end1, $query, $start2, $end2,
		     $put_blank_line, $no_num, $target_seq,$first_time,
		     $S2L, $E2L, $L);
		 if($each_mspa_line =~/^$/){
			print DOF "\n";
			print "\n"  if ($char_opt=~/v/);
			$first_time=1;
			next;
		 }elsif($each_mspa_line =~/^\s*(\d+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
			$score=$1;
		    #if($score < $thresh){
		    #   print "\n>$4 score too low"; next;
		    #}
			$start1    =$2;
			$end1      =$3;
			$query     =$4;
			$start2    =$5;
			$end2      =$6;
			$target_seq=$7;
			$target_leng=length($fasta{$target_seq});
			if($count{$target_seq} >= 1){
			   $first_time=0;
			   $put_blank_line=0;
			}elsif($count{$target_seq} < 1){
			   $first_time=1;
			}
			$count{$target_seq}++;
		 }
		 my $S=int($start1/$scale);
		 my $E=int($end1/$scale);
		 $L=$E-$S+1;
		 if($char_opt=~/n/i){
			  if($score < $thresh){
				 $line=" "x$S."\~"x$L;
			  }else{
				 $line=" "x$S."\-"x$L;
			  }
		 }else{
			  $S2L=length($start2);
			  $E2L=length($end2);
			  $L=$L-$S2L-$E2L;
			  if($L < 1){ $L=1 }
			  if($score < $thresh){
				 $line=" "x$S.$start2."\~"x$L.$end2;
			  }else{
				 $line=" "x$S.$start2."\-"x$L.$end2;
			  }
		 }

		 ### Actual writing ####
		 if($first_time==1){
			$first_time=0;
				#    Name leng strt-end   |---------------------------------------------------------
			  printf DOF "\n\>%-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1, $line;
			  printf  ("\n\>%-11s %-4d %-4d\-%4d %-${line_size}s",
			   $target_seq, $target_leng, $start1, $end1, $line)  if ($char_opt=~/v/);
		 }elsif($first_time !=1){
			if($put_blank_line==1){
			  print DOF "\n";
			  printf DOF "\n %-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1,$line;
			  print  "\n"  if ($char_opt=~/v/);
			  printf  ("\n %-11s %-4d %-4d\-%4d %-${line_size}s",
				 $target_seq, $target_leng, $start1, $end1,$line)  if ($char_opt=~/v/);
		   }else{
			  printf DOF "\n %-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1,$line;
			  printf ( "\n %-11s %-4d %-4d\-%4d %-${line_size}s",
				$target_seq, $target_leng, $start1, $end1,$line)  if ($char_opt=~/v/);
		   }
		 }

	  }
	  print DOF "\n\n";
	  print "\n"  if ($char_opt=~/v/);
	  print "\n# ~~~~~ lines mean match regions with below threshold ($thresh)" if $thresh > $score;
	  print "\n# $out_dof_file   is created \n";
	 }

}



#______________________________________________________________
# Title     : make_filtered_list
# Usage     :
# Function  : this is the core of check_genome_cluster.pl
#             finds good linkage seqlets in mspa files
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub make_filtered_list{ #####################################33
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 $|=1;
	 my ($result, @mspa,%temp_mspa, @num_of_all_links,$link_counter,$diff1, $diff2);
	 my $num_seq=0;
	 my @sizes_of_seqlets;
	 open(MSP, "$file[0]");
	 FIRST_FOR:for($c=0; $c< @file; $c++){
	 my %temp_mspa;
	 #print "\nFirst\(${c}\)th INPUT file processing\n";
	 my @mspa1=<MSP>;
	 my (@mspa, $MSP);
	 for($i=0; $i< @mspa1; $i++){
	   if($mspa1[$i]=~/^\s*(\d+)\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\w+)\s+(\d+)\s+(\d+)\s+(\w+)/i){
		  if($1 > $mspa_value){
		    $temp_mspa{$4} .= $mspa1[$i];
		  }
	   }
	 }
	 close(MSP);
	 #print "\n", %temp_mspa, "\n";

	 @mspa=values %temp_mspa;
	 $num_seq=@mspa;
	 print "\nTOP Number of seq is: $num_seq \n";

	 #""""""  Real algorithm starts HERE##''''''''''''''''''''''''''''''''''''''''

	 # optimization: I found no need to go through all mspa chunk. One is enough by experience
	 #MSP1: for($i=0; $i< @mspa; $i++){ # @mspa has (mspachunk1, mspachunk2...)
	 if($fast==1){ $mspa_chunk_num=1
	 }else{ $mspa_chunk_num=@mspa }

	 MSP1: for($i=0; $i< $mspa_chunk_num; $i++){ # @mspa has (mspachunk1, mspachunk2...)
 	    my $pos=$i+1;
	    my @seqlets1=split(/\n+/,  $mspa[$i]);
	    print "   MSP1 ${i}th  MSP chunk is handled #######","\n";
	    my $temp=@seqlets1-1;
	    SEQLET1: for($j=1; $j < @seqlets1; $j++){
		   my @OUTPUT=&follow_seqlet_link($seqlets1[$j], @mspa);
		   my $depth_of_linking=${$OUTPUT[0]};
		   my $size_of_common_seqlet=${$OUTPUT[1]};
		   if(($depth_of_linking==@mspa)&&($size_of_common_seqlet > $threshold)){
		      $result=1;
		      if($fast == 1){
				 last FIRST_FOR;
		         #goto EXIT_1;
		      }
		   }
		   #push(@num_of_all_links, $depth_of_linking);
		   #push(@sizes_of_seqlets, $size_of_common_seqlet);
	    }
	 }
	 }
	 #print "\n All searched links: \n", "@num_of_all_links", "\n";
	 #print "\n Sizes of common seqlet\n", "@sizes_of_seqlets\n";
	 EXIT_1:
	 return(\$result, \$num_seq);
}


#______________________________________________________________
# Title     : follow_seqlet_link
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub follow_seqlet_link{
	 my $seqlet_line=shift;
	 my @mspa=@_;
	 my ($i, $j, $link_counter, @common_range,$seqlet_very_ori, @ranges_very_ori,
	  @new_ranges, $seqlet_ori, $matched_ori, @ranges1, @ranges2);
	 if($seqlet_line=~/^\s*\d+\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\w+)\s+(\d+)\s+(\d+)\s+(\w+)/i){
	  $seqlet_ori =$3;
	  $seqlet_very_ori=$3;
	  $matched_ori=$6;
	  @ranges1=($1, $2, $4, $5);
	  @ranges_very_ori=@ranges1;
	 }
	 my $visited .= "$seqlet_ori ";
	 #print "\n\=======$seqlet_ori \@ranges1 is @ranges1 =================================\n";

	 MSP: for($i=0; $i<@mspa; $i++){
	  my @seqlets1=split(/\n+/,  $mspa[$i]);
	  #my @common_range;
	  SEQLET1: for($j=1; $j < @seqlets1; $j++){
	     my $seqlet_num=$j;
	     my @temp= &get_mspa_range($seqlets1[$j]) if $seqlets1[$j]=~/\S/;
	     my @ranges2=@{$temp[0]}; # has (1 2 3 4);
	     my $seq2        = $temp[1];
	     my $matched_seq2= $temp[2];
	     #splice(@seqlets1, $j, 1);
	     #$j--;
		 if($seq2 eq $seqlet_ori){
		    next MSP; # to the next MSP chunk
		 }elsif(($seq2 eq $matched_ori)&&($visited !~ /$matched_seq2/)){
			if($verbose=~/v$/i){
	          print "\n    SEQLET1_________________________________________\(${pos}th MSP chunk\)\n";
	          print "    SEQLET1 $temp number of seqlets  for the ${pos}th mspa file\n";
			  print "    SEQLET1_________________________________________\(${seqlet_num}th seqlet )\n";
			  print "    QUERY seqlet is $temp[1]: ", "@ranges2[0..1]  MATCH seqlet is $temp[2]: ", "@ranges2[2..3]","\n";
			  print "    Target seqlet is $seq2 : @ranges2\n";
			}
			if(($ranges1[2] >= $ranges2[0])&&    ##     =======
			   ($ranges1[3] <= $ranges2[1]) ){   ##  ==============
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n      MSP2      \@ranges1 is ", "@ranges1" if($verbose=~/v/i);
			   #print "\n                \@ranges2 is ", "@ranges2" if($verbose=~/v/i);
			   $diff1=$ranges1[2] - $ranges2[0];
			   $diff2=$ranges2[1] - $ranges1[3];
			   @new_ranges =($ranges1[2], $ranges1[3], ($ranges2[2]+$diff1), ($ranges2[3]-$diff2) );
			   @ranges1=(@new_ranges); #, $ranges2[2], $ranges2[3]);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   if($verbose=~/v/i){
			      print "\n      FIRST elsif    Finalout @new_ranges \$link_counter=$link_counter", "\n";
			      print "                                         \$num_seq = $num_seq\n";
			   }
			   if( ($link_counter+2) >= @mspa){
				  #print  "\n   All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     Common range: ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_mspa_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] <= $ranges2[0])&& ##    --------------
				   ($ranges1[3] >= $ranges2[1]) ){ ##       --------
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   @new_ranges =($ranges2[0], $ranges2[1], $ranges2[2], $ranges2[3],);
			   #print "\n      Second elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   #print "                                         \$num_seq = $num_seq\n";
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";

			   if( ($link_counter+2) >= @mspa){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_mspa_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] <= $ranges2[0])&&   #    ======
				   ($ranges1[3] <= $ranges2[1]) &&   ##      =======
				   ($ranges1[3] >= $ranges2[0]) ){
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   #print "                                         \$num_seq = $num_seq\n";
			   $diff2=$ranges2[1] - $ranges1[3];
			   @new_ranges=($ranges2[0], $ranges1[3],  $ranges2[2], ($ranges2[3]-$diff2));
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   #print "\n      Third elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   if( ($link_counter+2) >= @mspa){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_mspa_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] >= $ranges2[0])&&  #        ======
				  ($ranges1[3] >= $ranges2[1])&&   ##  =======
				  ($ranges1[2] <= $ranges2[1]) ){
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   $diff1=$ranges1[2] - $ranges2[0];
			   @new_ranges=($ranges1[2], $ranges2[1], ($ranges2[0]+$diff1), $ranges2[1]);
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   #print "\n      Fourth elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   #print "                                         \$num_seq = $num_seq\n";
			   if( ($link_counter+2) >= @mspa){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_mspa_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}else{
			   if($verbose=~/v/i){
			      print "\nX X X X X   Link broken ", @new_ranges, " \$link_counter=$link_counter\n";
			   }
			   next SEQLET1;
			}
		 }
	  }
	 }
	 EXIT:
	 $final_num_of_seq_linked=$link_counter+2;
	 if($final_num_of_seq_linked==@mspa){
	  $seqlet_leng=$new_ranges[$#common_range]-$new_ranges[$#common_range-1]+1;
	  #print "\n Common Seqlet size:  $seqlet_leng \n";
	 }else{
	  $seqlet_leng=0;
	 }
	 #print "\nLINKING seq num for seqlet $seqlet_very_ori \(","@ranges_very_ori","\) is $final_num_of_seq_linked \n";
	 return(\$final_num_of_seq_linked, \$seqlet_leng);
}


#________________________________________________________________________________
# Title     : convert_clu_to_mspa
# Usage     : @written_mspa_files=@{&convert_clu_to_mspa(\$single_linkage_file)};
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big mspa file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.mspa file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : clu_2_sso_2_mspa, cluster_to_mspa, cluster_to_sso_to_mspa
#              clu_to_sso_to_mspa
# Options   :
# Category  :
# Version   : 2.7
#--------------------------------------------------------------------------------
sub convert_clu_to_mspa{
    my($i, $j, $k, $s, $u, $v, $p, $m, $n, $y, @possible_extensions, $single_file_name,
           @seq_names, @final_files, @U_L_case, $file, @file, @name_types,
           @poss_sub_dir_heads, @written_mspa_files, $Lean_output, $subdir_char_size,
           $no_of_ext_to_be_checked, $extension_type_found);

    $subdir_char_size=2; # default
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Opening cluster file (xx.clu)
    # %clus looks like this:  2-507     YGR041W YLR353W
    #                         3-308     YDR222W YDR346C YLR225C
    #                         2-184     YCL066W YCR040W
    #______________________________________________________________
    my $clu=${$_[0]} || $_[0];
    $Lean_output=${$_[1]} || $_[1];

    if($verbose){   print "\n# convert_clu_to_mspa : \"$clu\" is given and I am processing it with clu_to_sso_to_mspa\n" if defined $clu;     }
    my %clus=%{&open_clu_files(\$clu)};
    my @clusters= keys %clus;
    my $num_of_cluster=@clusters=@{&sort_by_cluster_size(\@clusters)};

    print "# (i) $0: convert_clu_to_mspa: No. of cluster=$num_of_cluster after open_clu_files \n" if $verbose;

    #&show_array(\@clusters) if $verbose;
    #&show_hash(\%clus) if $verbose;
    @possible_extensions=('mspa', 'mspa.gz', 'msso', 'msso.gz','fsso', 'pbla', 'pbla.gz',
                                                               'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
    @U_L_case=('\U', '\L', ' ');  ## !! the ' ' is necessary

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
    # Making each SINGLE linkage clu to MSP file format to be ready for divclus
    #______________________________________________________________________________
    for($i=0; $i< @clusters; $i++){
	   my (@seq_names, @final_files, $clus_name, $big_out_mspa, @mspa_hashes);
	   $clus_name=$clusters[$i];
	   unless($single_file_name=~/\S/){
						 $big_out_mspa="$clus_name\_cluster\.mspa"; #<<<----- final output name
	   }else{
						 $big_out_mspa=$single_file_name;
	   }
	   push(@written_mspa_files, $big_out_mspa); ## This is the output of this sub

	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   #  If $clus_name.mspa is already there, skip
	   #_____________________________________________
	   if( (-s $big_out_mspa) > 100  and !$over_write ){
               print "\n# (i) convert_clu_to_mspa : $big_out_mspa MSP file already exists, skipping\n";
               print "#    Use  \$over_write option \'o\' to start all over again or \n";
               print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
               next ;
       }
       $num_of_seq_member=@seq_names=split(/\s+/, $clus{$clusters[$i]}); # @seq_names has (HIU001, HI002, HI333, MJ111, etc)
       print "# $0: convert_clu_to_mspa: No. of seq member=$num_of_seq_member after split \n" if $verbose;

       $no_of_ext_to_be_checked=@possible_extensions;
       $extension_type_found=0;

       FOR0: for($j=0; $j < @seq_names; $j++){
							 my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low,
									$file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
									$file_name_prot_low_gz, $file_name_prot_up_gz);
									$each_seq_name=$seq_names[$j];
							 my @poss_sub_dir_heads=('.'); ## <<<<------- This is critically important, when 'D' opt is not used!

							 if($each_seq_name=~/(\S+)_\d+\-\d+$/){
									 $each_seq_name_range=$each_seq_name;
									 $each_seq_name=$1;
									 @name_types=($each_seq_name, $each_seq_name_range);
							 }else{
									 @name_types=($each_seq_name);
							 }
							 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							 #  Here I take chars from the sequ names, as dirs have fragments of chars
							 #_______________________________________________________________________________
							 for($s=1; $s <= $subdir_char_size ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
									 $sub_dir_head= substr($seq_names[$j], 0, $s);
									 unshift(@poss_sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
									 unshift(@poss_sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
							 }
							 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							 #  Checking all the possible subdirectories to crop all the sso files
							 #_______________________________________________________________________________

							 FOR1: for($p=0; $p <= @poss_sub_dir_heads; $p++){ ## Default has '.' will make things like '././fam_8_8.pbla.gz'

										$subd=$poss_sub_dir_heads[$p];               ## Also, the '<=' not '<' cures the same problem.
										#print "# (i) Checking sub dir $subd\n";
										FOR2 : for($e=$extension_type_found; $e < $no_of_ext_to_be_checked; $e++){
												 $ext=$possible_extensions[$e];
												 #print "      (i) \$ext at $subd is  $ext\n";
												 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
												 #  This makes all the possible lower upper case names
												 #______________________________________________________
												 for( $u=0; $u < @U_L_case; $u++){
														for($v=0; $v <@name_types; $v++){
															 $each_seq_name=$name_types[$v];
															 if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
															 }elsif($U_L_case[$u]=~/L/){ $each_seq_name="\L$each_seq_name";
															 }else{ $each_seq_name=$each_seq_name }

															 if(-s "$each_seq_name\.$ext"){
																		push(@final_files, "$each_seq_name\.$ext" ) ;
																		$extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
																		$found_search_prog_exention_used=$ext;
																		$found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
																		$found_search_prog_exention_used=$ext;
																		next FOR0
															 }elsif(-s "$each_seq_name\.$ext\.gz"){
																		push(@final_files, "$each_seq_name\.$ext\.gz" ) ;
																		$extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
																		$found_search_prog_exention_used=$ext;
																		$found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
																		$found_search_prog_exention_used=$ext;
																		next FOR0
															 }else{
                                                                  $file_wanted="\.\/$subd\/$each_seq_name\.$ext";
                                                                  if(-s $file_wanted){
                                                                          push( @final_files, $file_wanted);
                                                                          $extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
                                                                          $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                                                          $found_search_prog_exention_used=$ext;
                                                                          last FOR1;
                                                                  }elsif(-s "$file_wanted\.gz"){
                                                                          push( @final_files, "$file_wanted\.gz");
                                                                          $extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
                                                                          $found_search_prog_exention_used=$ext;
                                                                          $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                                                          next FOR0;
                                                                  }
															 }
														}
												 }
										} # FOR2
							 } # FOR1
				 } # FOR0

                 if(@final_files < 1){
					print "\n# (E) convert_clu_to_mspa :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
					print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
					next;
				 }
				 $write_each_mspa_to_disk='';
                 #@final_files=sort(@final_files); ## this is not really necessary

				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 #  Check if small mspa files have already made in previous steps
				 #________________________________________________________________
				 if($final_files[0]=~/(\S+)\.mspa/){ ##  concatenate mspa into big_mspa
						 $search_file_base=$1;
                         open(BIG_MSP_FILE_C, ">$big_out_mspa");
                         print BIG_MSP_FILE_C "# Latest write by convert_clu_to_mspa in $0\n";
                         for($y=0; $y< @final_files; $y++){

                             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                             # Opens single MSP file. Check if they were gzipped or not before open
                             #________________________________________________________________________
                             if($final_files[$y]=~/(\S+)\.gz$/){
                                 system("gzip -d $final_files[$y]");
                                 open(SINGLE_MSP, "$1") || warn "\n\n Can not open SINGLE_MSP $final_files[$y]";
                             }else{                          open(SINGLE_MSP, "<$final_files[$y]") || warn "\n\n Can not open SINGLE_MSP $final_files[$y]";
                             }
                             while(<SINGLE_MSP>){
							      print BIG_MSP_FILE_C $_;
							 }
						 }
						 close(BIG_MSP_FILE_C);
						 close(SINGLE_MSP);
						 push(@written_mspa_files, $big_out_mspa);

						 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
						 # Doing something for L option ($Lean_output)
						 #___________________________________________________
						 if($Lean_output and -d $found_real_subdir_name and $found_search_prog_exention_used){
                             for($y=0; $y< @seq_names; $y++){
                                     unlink("$found_real_subdir_name\/$seq_names[$y]\.$found_search_prog_exention_used");
                                     unlink("$found_real_subdir_name\/$seq_names[$y]\.$found_search_prog_exention_used\.gz");
                             }
						 }elsif($Lean_output){
                             for($y=0; $y< @seq_names; $y++){
                                     unlink("$seq_names[$y]\.$found_search_prog_exention_used");
                                     unlink("$seq_names[$y]\.$found_search_prog_exention_used\.gz");
                             }
						 }
				 }else{
                      if($write_each_mspa_to_disk){
                           print "\# $0 : going to run open_sso_files with $write_each_mspa_to_disk opt\n";
                           $big_out_mspa=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_mspa_to_disk,
                                                                                                                                           "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_mspa, $over_write)};
                           if(-s $big_out_mspa > 200){  print "\n# $0: SUCCESS to create $big_out_mspa :) :) :-) :-) ?\n"; }
                      }else{
                           print "\n# convert_clu_to_mspa: I am running open_sso_files. \n";
                           @mspa_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_mspa_to_disk,
                                                         "u=$upper_expect_limit", $new_format, $add_range,
                                                         $add_range2, $big_out_mspa, $over_write)};
                           &write_mspa_files(@mspa_hashes, $big_out_mspa); ## concatenates all the hash ref to one
                      }
				 }
		 }## end of  for($i=0; $i< @clusters; $i++){
		 return(\@written_mspa_files);
}# end of






#________________________________________________________________________________
# Title     : clu_to_sso_to_mspa
# Usage     : &clu_to_sso_to_mspa(\$clu);
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big mspa file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.mspa file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : convert_clu_to_sso_to_mspa, clu_2_sso_2_mspa, cluster_to_mspa, cluster_to_sso_to_mspa
# Options   :
# Version   : 1.8
#--------------------------------------------------------------------------------
sub clu_to_sso_to_mspa{
		 my($i, $j, $k, $s, $u, $p, $m, $n, $y, @possible_extensions, @seq_names,
				@final_files, @U_L_case, $file, @file, @written_mspa_files);

		 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		 # Opening cluster file (xx.clu)
		 # %clus looks like this:  2-507     YGR041W YLR353W
		 #                         3-308     YDR222W YDR346C YLR225C
		 #                         2-184     YCL066W YCR040W
		 #______________________________________________________________
		 my $clu=${$_[0]} || $_[0];
		 if($verbose){
				 print "\n# clu_to_sso_to_mspa : \"$clu\" is given
						 and I am processing it with clu_to_sso_to_mspa\n" if defined $clu;
		 }
		 my %clus=%{&open_clu_files(\$clu)};
		 my @keys= keys %clus;
		 my $num_of_cluster=@keys=@{&sort_by_cluster_size(\@keys)};

		 print "# $0: clu_to_sso_to_mspa: No. of cluster=$num_of_cluster after open_clu_files \n";

		 &show_array(\@keys) if $verbose;
		 &show_hash(\%clus) if $verbose;
		 @possible_extensions=('mspa', 'sso', 'msso', 'msso.gz',
							'pbla.gz', 'pbla', 'fsso', 'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
		 @U_L_case=('\U', '\L');

		 for($i=0; $i< @keys; $i++){
				my (@seq_names, @final_files, $clus_name, $big_out_mspa, @mspa_hashes);
				$clus_name=$keys[$i];
				unless($single_file_name=~/\S/){
						$big_out_mspa="$clus_name\_cluster\.mspa"; #<<<----- final output name
				}else{
						$big_out_mspa=$single_file_name;
				}
				push(@written_mspa_files, $big_out_mspa); ## This is the output of this sub

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				#  If $clus_name.mspa is already there, skip
				#_____________________________________________
				if( (-s $big_out_mspa) > 100  and !$over_write ){
						print "\n# clu_to_sso_to_mspa : $big_out_mspa MSP file already exists, skipping\n";
						print "#    Use  \$over_write option \'o\' to start all over again or \n";
						print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
						next ;
				}
				$num_of_seq_member=@seq_names=split(/\s+/, $clus{$keys[$i]}); # @seq_names has (HIU001, HI002, HI333, MJ111, etc)
				print "# $0: clu_to_sso_to_mspa: No. of seq member=$num_of_seq_member after split \n" if $verbose;

				FOR0: for($j=0; $j < @seq_names; $j++){
					 my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low, @sub_dir_heads,
							 $file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
							 $file_name_prot_low_gz, $file_name_prot_up_gz);

					 $each_seq_name=$seq_names[$j];
					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					 #  Here I take chars from the sequ names, as dirs have fragments of chars
					 #_______________________________________________________________________________
					 for($s=1; $s <=2 ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
							 $sub_dir_head= substr($seq_names[$j], 0, $s);
							 push(@sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
							 push(@sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
					 }
					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					 #  Checking all the possible subdirectories to crop all the sso files
					 #_______________________________________________________________________________
					 FOR1: for($p=0; $p < @sub_dir_heads; $p++){
							 $subd=$sub_dir_heads[$p];
							 FOR2 : for($e=0; $e < @possible_extensions; $e++){
										$ext=$possible_extensions[$e];
										#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
										#  This makes all the possible lower upper case names
										#______________________________________________________
										for( $u=0; $u < @U_L_case; $u++){
												if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
												}else{                   $each_seq_name="\L$each_seq_name"; }

												if(-s "$each_seq_name\.$ext"){   push(@final_files, "$each_seq_name\.$ext" ) ; next FOR0 }
												elsif(-s "$each_seq_name\.$ext\.gz"){ push(@final_files, "$each_seq_name\.$ext\.gz" ) ; next FOR0 }
												else{
														$file_wanted="\.\/$subd\/$each_seq_name\.$ext";
														if(-s $file_wanted){
																push( @final_files, $file_wanted); next FOR0 }
														elsif(-s "$file_wanted\.gz"){
																push( @final_files, "$file_wanted\.gz");
																next FOR0
														}
												}
										}
							 } # FOR2
					 } # FOR1

				} # FOR0

				print "\n# @final_files \n=============> $big_out_mspa  \n\n" if $verbose;

				if(@final_files < 1){
					 print "\n# clu_to_sso_to_mspa :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
					 print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
					 next;
				}
				$write_each_mspa_to_disk='w';

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				#  Check if small mspa files have already made in previous steps
				#________________________________________________________________
				if($final_files[0]=~/\.mspa\s*$/){ ##  concatenate mspa into big_mspa
						 open(BIG_MSP_FILE, ">$big_out_mspa");
						 for($y=0; $y< @final_files; $y++){
                             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                             # Opens single MSP file. Check if they were gzipped or not before open
                             #________________________________________________________________________
                             if($final_files[$y]=~/\.gz$/){  open(SINGLE_MSP, "|gunzip -d $final_files[$y]") || warn "\n\n Can not open SINGLE_MSP $final_files[$y]";
                             }else{                          open(SINGLE_MSP, "<$final_files[$y]") || warn "\n\n Can not open SINGLE_MSP $final_files[$y]";
                             }
                             while(<SINGLE_MSP>){
							      print BIG_MSP_FILE_C $_;
							 }
						 }
						 close(BIG_MSP_FILE);
						 close(SINGLE_MSP);
						 push(@written_mspa_files, $big_out_mspa);

				}else{
						if($write_each_mspa_to_disk){
								 print "\# $0 : going to run open_sso_files with $write_each_mspa_to_disk opt\n";
								 $big_out_mspa=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_mspa_to_disk,
															 "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_mspa, $over_write)};
								 if(-s $big_out_mspa > 200){  print "\n# $0: SUCCESS to create $big_out_mspa :) :) :-) :-) ?\n"; }
						}else{
								 print "\n# clu_to_sso_to_mspa: I am running open_sso_files. \n";
								 @mspa_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_mspa_to_disk,
															 "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_mspa, $over_write)};

								 &write_mspa_files(@mspa_hashes, $big_out_mspa); ## concatenates all the hash ref to one
						}
				}
		 }
		 return(\@written_mspa_files);
}# end of


#________________________________________________________________________________
# Title     : convert_clu_to_sso_to_mspa
# Usage     : &clu_to_sso_to_mspa(\$clu);
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big mspa file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.mspa file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : clu_2_sso_2_mspa, cluster_to_mspa, cluster_to_sso_to_mspa
#              clu_to_sso_to_mspa
# Options   :
# Category  :
# Version   : 1.8
#--------------------------------------------------------------------------------
sub convert_clu_to_sso_to_mspa{
		 my($i, $j, $k, $s, $u, $p, $m, $n, $y, @possible_extensions, @list,
					@final_files, @U_L_case, $file, @file, @written_mspa_files);

		 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		 # Opening cluster file (xx.clu)
		 # %clus looks like this:  2-507     YGR041W YLR353W
		 #                         3-308     YDR222W YDR346C YLR225C
		 #                         2-184     YCL066W YCR040W
		 #______________________________________________________________
		 my $clu=${$_[0]} || $_[0];
		 if($verbose){
					 print "\n# clu_to_sso_to_mspa : \"$clu\" is given
													 and I am processing it with clu_to_sso_to_mspa\n" if defined $clu;
		 }
		 my %clus=%{&open_clu_files(\$clu)};
		 my @keys= keys %clus;
		 my $num_of_cluster=@keys=@{&sort_by_cluster_size(\@keys)};

		 print "# $0: clu_to_sso_to_mspa: No. of cluster=$num_of_cluster after open_clu_files \n" if $verbose;

		 &show_array(\@keys) if $verbose;
		 &show_hash(\%clus) if $verbose;
		 @possible_extensions=('mspa', 'sso', 'msso', 'msso.gz','fsso', 'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
		 @U_L_case=('\U', '\L');

		 for($i=0; $i< @keys; $i++){
				 my (@list, @final_files, $clus_name, $big_out_mspa, @mspa_hashes);
				 $clus_name=$keys[$i];
				 unless($single_file_name=~/\S/){
							$big_out_mspa="$clus_name\_cluster\.mspa"; #<<<----- final output name
				 }else{
							$big_out_mspa=$single_file_name;
				 }
				 push(@written_mspa_files, $big_out_mspa); ## This is the output of this sub

				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 #  If $clus_name.mspa is already there, skip
				 #_____________________________________________
				 if( (-s $big_out_mspa) > 100  and !$over_write ){
						 print "\n# clu_to_sso_to_mspa : $big_out_mspa MSP file already exists, skipping\n";
						 print "#    Use  \$over_write option \'o\' to start all over again or \n";
						 print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
						 next ;
				 }
				 $num_of_seq_member=@list=split(/\s+/, $clus{$keys[$i]}); # @list has (HIU001, HI002, HI333, MJ111, etc)
				 print "# $0: clu_to_sso_to_mspa: No. of seq member=$num_of_seq_member after split \n" if $verbose;

				 FOR0: for($j=0; $j < @list; $j++){
									my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low, @sub_dir_heads,
										 $file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
										 $file_name_prot_low_gz, $file_name_prot_up_gz);

									$each_seq_name=$list[$j];
									#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									#  Here I take chars from the sequ names, as dirs have fragments of chars
									#_______________________________________________________________________________
									for($s=1; $s <=2 ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
											 $sub_dir_head= substr($list[$j], 0, $s);
											 push(@sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
											 push(@sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
									}
									#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									#  Checking all the possible subdirectories to crop all the sso files
									#_______________________________________________________________________________
									FOR1: for($p=0; $p < @sub_dir_heads; $p++){
											 $subd=$sub_dir_heads[$p];
											 FOR2 : for($e=0; $e < @possible_extensions; $e++){
														$ext=$possible_extensions[$e];
														#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														#  This makes all the possible lower upper case names
														#______________________________________________________
														for( $u=0; $u < @U_L_case; $u++){
																	if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
																	}else{                   $each_seq_name="\L$each_seq_name"; }

																	if(-s "$each_seq_name\.$ext"){   push(@final_files, "$each_seq_name\.$ext" ) ; next FOR0 }
																	elsif(-s "$each_seq_name\.$ext\.gz"){ push(@final_files, "$each_seq_name\.$ext\.gz" ) ; next FOR0 }
																	else{
																			 $file_wanted="\.\/$subd\/$each_seq_name\.$ext";
																			 if(-s $file_wanted){
																											 push( @final_files, $file_wanted); next FOR0 }
																			 elsif(-s "$file_wanted\.gz"){
																											 push( @final_files, "$file_wanted\.gz");
																											 next FOR0
																			 }
																	}
														}
											 } # FOR2
									} # FOR1

				 } # FOR0

				 print "\n# @final_files \n=============> $big_out_mspa  \n\n" if $verbose;

				 if(@final_files < 1){
							print "\n# clu_to_sso_to_mspa :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
							print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
							next;
				 }
				 # $write_each_mspa_to_disk='w';

				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 #  Check if small mspa files have already made in previous steps
				 #________________________________________________________________
				 if($final_files[0]=~/\.mspa\s*$/){ ##  concatenate mspa into big_mspa
						 open(BIG_MSP_FILE, ">$big_out_mspa");
						 print "\n # Written by  convert_clu_to_sso_to_mspa in $0\n";
						 for($y=0; $y< @final_files; $y++){
								 open(SINGLE_MSP, "$final_files[$y]");
								 while(<SINGLE_MSP>){
										 print BIG_MSP_FILE $_;
								 }
						 }
						 close(BIG_MSP_FILE);
						 close(SINGLE_MSP);
						 push(@written_mspa_files, $big_out_mspa);

				 }else{
						 if($write_each_mspa_to_disk){
									print "\# $0 : going to run open_sso_files with $write_each_mspa_to_disk opt\n";
									$big_out_mspa=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_mspa_to_disk,
																																					"u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_mspa, $over_write)};
									if(-s $big_out_mspa > 200){  print "\n# $0: SUCCESS to create $big_out_mspa :) :) :-) :-) ?\n"; }
						 }else{
									print "\n# clu_to_sso_to_mspa: I am running open_sso_files. \n";
									@mspa_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_mspa_to_disk,
																																					"u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_mspa, $over_write)};

									&write_mspa_files(@mspa_hashes, $big_out_mspa); ## concatenates all the hash ref to one
						 }
				 }
		 }
		 return(\@written_mspa_files);
}# end of


#______________________________________________________________________________
# Title     : sso_to_mspa
# Usage     : &sso_to_mspa(@ARGV, $single_out_opt);
# Function  : This takes sso file(s) and produces MSP file. It
#             concatenate sso file contents when more than one
#             sso file is given.
# Example   : &sso_to_mspa(@ARGV, 'OUT.mspa', $single_out_opt);
# Warning   : This capitalize all the input file names when
#              producing xxxxx.mspa. xxxxx.sso -> XXXX.sso
# Keywords  : sso_file_to_mspa_file, convert_sso_to_mspa,
# Options   : _  for debugging.
#             #  for debugging.
#             v  for showing the MSP result to screen
#             s  for making single MSP file for each sso file
#                    as well as big MSP file which has all sso
#             u= for upper expectation value limit
#             l= for lower expect val limit
#             s= for single file name input eg. "s=xxxxx.mspa"
#             n  for new format (mspa2 format)
#             r  for adding range
#             r2 for adding ranges in all sequence names
#
# Returns   : the file names created (xxxx.mspa, yyyy.mspa,,,,)
# Argument  :
# Category  :
# Version   : 2.6
#-----------------------------------------------------------------------------
sub sso_to_mspa{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($upper_expect_limit, $lower_expect_limit)=(50, 0);
	 my (%sso, @sso, @SSO, $big_out_mspa1,  @final_out, $big_out_mspa2,
	   $create_sso, $single_out_opt, $add_range, $add_range2, $big_out_mspa,
	   $Evalue_thresh, $new_format, $Score_thresh, $margin, $single_file_name);
	if($vars{'u'}=~/([\.\d]+)/){ $upper_expect_limit = $vars{'u'} };
	if($vars{'l'}=~/([\.\d]+)/){ $lower_expect_limit = $vars{'l'} };
	if($vars{'t'}=~/(\d+)/){ $Score_thresh  = $vars{'t'} };
	if($vars{'m'}=~/(\d+)/){ $margin  = $vars{'m'} };
	if($vars{'s'}=~/\S/){ $single_file_name  = $vars{'s'} };
	if($char_opt=~/r2/){  $add_range='r'; $add_range2='r2' }
	if($char_opt=~/r/){   $add_range = 'r' }
	if($char_opt=~/c/){   $create_sso = 'c' }
	if($char_opt=~/s/){   $single_out_opt='s' }
	if($char_opt=~/n/){   $new_format='n' }
	 print "\n# File given to sso_to_mspa is \"@file\", Normally xxx.sso file names\n";

	 if($single_file_name=~/\S/){
	   $big_out_mspa=$single_file_name;
	 }else{
	   for($i=0; $i < @file; $i++){
		   if($file[$i]=~/\.mspa$/){ ## when output file name is given
			   $big_out_mspa=$file[$i];
			   splice(@file, $i, 1);
			   $i--;
		   }elsif($file[$i]=~/^(\d+\-\d+)([_\d]*)\.[mfs]?sso/){  ## creates xxxx.mspa file name from xxxx.sso
			   $big_out_mspa1="\U$1"."$2"."\.mspa";
			   $big_out_mspa2="\U$1".".mspa";
		   }elsif($file[$i]=~/^(\S+)\.[mfs]?sso$/){
			   $big_out_mspa1="\U$1"."\.mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
			   print "\n# sso_to_mspa: File matched  xxxx.sso  format \n";
		   }elsif($file[$i]=~/^(\S+)\.out$/){
			   $big_out_mspa1="\U$1"."\.mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
			   print "\n# sso_to_mspa: File matched  xxxx.out  format \n";
		   }elsif($file[$i]=~/^(\S+)\.p[rot\,]*\.ts\.gz/){
			   $big_out_mspa1="\U$1".".mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
		   }elsif($file[$i]=~/^(\S+)\.ts\.gz/){
			   $big_out_mspa1="\U$1".".mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
		   }elsif($file[$i]=~/^(\S+)\.out\.gz/ or $file[$i]=~/^(\S+)\.[mfs]?sso\.gz/){
			   $big_out_mspa1="\U$1".".mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
		   }
	   }
	 }
	 if(defined($big_out_mspa)){
	   $big_out_mspa1=$big_out_mspa2=$big_out_mspa;
	   print "\n# \$big_out_mspa is defined as \'$big_out_mspa\'\n";
	 }else{
	   print "\n# sso_to_mspa: You did not define the big MSP file out format, so $big_out_mspa1 \n";
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (1) When File was given to this sub routine
	 #__________________________________________
	 if(@file == 1){   ## ONE single file input??
	  print "# one file @file is given, OUT will be: $big_out_mspa1 \n";
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	          "u=$upper_expect_limit",
			  "l=$lower_expect_limit",
			  "m=$margin",
			  $new_format,
			  "s=$big_out_mspa")};
	  push(@final_out, &write_mspa_files(@sso, $big_out_mspa1,
	        $single_out_opt, $add_range) );

	 }elsif(@file > 1){ ## MOre than 1 file input??
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	        "l=$lower_expect_limit",
	        "u=$upper_expect_limit",
	        "m=$margin",
	        $new_format)};
	  push(@final_out, @{&write_mspa_files(@sso, $big_out_mspa2,
			$single_out_opt, $add_range)} ); ## concatenates all the hash ref to one
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (2) When NO File but ARRAY is given
	 #      Here, you can have SSO files created
	 #__________________________________________
	 elsif(@array >=1){
	  print "\n# In sso_to_mspa, \@array is given rather than \@file";
	  @sso=@{&open_sso_files(@array, "u=$upper_expect_limit", $add_range2,
			  "l=$lower_expect_limit", $add_range, $create_sso,
			  "m=$margin", $new_format)};
	  push(@final_out, @{&write_mspa_files(@sso, $big_out_mspa,
						  $single_out_opt, $add_range)} );
	 }
	 return(\@final_out);
}



#______________________________________________________________________________
# Title     : convert_sso_to_mspa
# Usage     : &convert_sso_to_mspa(@ARGV, $single_out_opt);
# Function  : This takes sso file(s) and produces MSP file. It
#             concatenate sso file contents when more than one
#             sso file is given.
# Example   : &convert_sso_to_mspa(@ARGV, 'OUT.mspa', $single_out_opt);
# Warning   : This capitalize all the input file names when
#              producing xxxxx.mspa. xxxxx.sso -> XXXX.sso
# Keywords  : sso_file_to_mspa_file, convert_sso_to_mspa,
# Options   : _  for debugging.
#             #  for debugging.
#             v  for showing the MSP result to screen
#             s  for making single MSP file for each sso file
#                    as well as big MSP file which has all sso
#             u= for upper expectation value limit
#             l= for lower expect val limit
#             s= for single file name input eg. "s=xxxxx.mspa"
#             n  for new format (mspa2 format)
#             r  for adding range
#             r2 for adding ranges in all sequence names
#
# Returns   : the file names created (xxxx.mspa, yyyy.mspa,,,,)
# Argument  :
# Category  :
# Version   : 2.6
#-----------------------------------------------------------------------------
sub convert_sso_to_mspa{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($upper_expect_limit, $lower_expect_limit)=(50, 0);
	 my (%sso, @sso, @SSO, $big_out_mspa1,  @final_out, $big_out_mspa2,
	   $create_sso, $single_out_opt, $add_range, $add_range2, $big_out_mspa,
	   $Evalue_thresh, $new_format, $Score_thresh, $margin, $single_file_name);
	if($vars{'u'}=~/([\.\d]+)/){ $upper_expect_limit = $vars{'u'} };
	if($vars{'l'}=~/([\.\d]+)/){ $lower_expect_limit = $vars{'l'} };
	if($vars{'t'}=~/(\d+)/){ $Score_thresh  = $vars{'t'} };
	if($vars{'m'}=~/(\d+)/){ $margin  = $vars{'m'} };
	if($vars{'s'}=~/\S/){ $single_file_name  = $vars{'s'} };
	if($char_opt=~/r2/){  $add_range='r'; $add_range2='r2' }
	if($char_opt=~/r/){   $add_range = 'r' }
	if($char_opt=~/c/){   $create_sso = 'c' }
	if($char_opt=~/s/){   $single_out_opt='s' }
	if($char_opt=~/n/){   $new_format='n' }
	 print "\n# File given to convert_sso_to_mspa is \"@file\", Normally xxx.sso file names\n";

	 if($single_file_name=~/\S/){
	   $big_out_mspa=$single_file_name;
	 }else{
	   for($i=0; $i < @file; $i++){
		   if($file[$i]=~/\.mspa$/){ ## when output file name is given
			   $big_out_mspa=$file[$i];
			   splice(@file, $i, 1);
			   $i--;
		   }elsif($file[$i]=~/^(\d+\-\d+)([_\d]*)\.[mfs]?sso/){  ## creates xxxx.mspa file name from xxxx.sso
			   $big_out_mspa1="\U$1"."$2"."\.mspa";
			   $big_out_mspa2="\U$1".".mspa";
		   }elsif($file[$i]=~/^(\S+)\.[mfs]?sso$/){
			   $big_out_mspa1="\U$1"."\.mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
			   print "\n# convert_sso_to_mspa: File matched  xxxx.sso  format \n";
		   }elsif($file[$i]=~/^(\S+)\.out$/){
			   $big_out_mspa1="\U$1"."\.mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
			   print "\n# convert_sso_to_mspa: File matched  xxxx.out  format \n";
		   }elsif($file[$i]=~/^(\S+)\.p[rot\,]*\.ts\.gz/){
			   $big_out_mspa1="\U$1".".mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
		   }elsif($file[$i]=~/^(\S+)\.ts\.gz/){
			   $big_out_mspa1="\U$1".".mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
		   }elsif($file[$i]=~/^(\S+)\.out\.gz/ or $file[$i]=~/^(\S+)\.[mfs]?sso\.gz/){
			   $big_out_mspa1="\U$1".".mspa";
			   $big_out_mspa2="\U$1"."_all".".mspa";
		   }
	   }
	 }
	 if(defined($big_out_mspa)){
	   $big_out_mspa1=$big_out_mspa2=$big_out_mspa;
	   print "\n# \$big_out_mspa is defined as \'$big_out_mspa\'\n";
	 }else{
	   print "\n# convert_sso_to_mspa: You did not define the big MSP file out format, so $big_out_mspa1 \n";
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (1) When File was given to this sub routine
	 #__________________________________________
	 if(@file == 1){   ## ONE single file input??
	  print "# one file @file is given, OUT will be: $big_out_mspa1 \n";
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	          "u=$upper_expect_limit",
			  "l=$lower_expect_limit",
			  "m=$margin",
			  $new_format,
			  "s=$big_out_mspa")};
	  push(@final_out, &write_mspa_files(@sso, $big_out_mspa1,
	        $single_out_opt, $add_range) );

	 }elsif(@file > 1){ ## MOre than 1 file input??
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	        "l=$lower_expect_limit",
	        "u=$upper_expect_limit",
	        "m=$margin",
	        $new_format)};
	  push(@final_out, @{&write_mspa_files(@sso, $big_out_mspa2,
			$single_out_opt, $add_range)} ); ## concatenates all the hash ref to one
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (2) When NO File but ARRAY is given
	 #      Here, you can have SSO files created
	 #__________________________________________
	 elsif(@array >=1){
	  print "\n# In convert_sso_to_mspa, \@array is given rather than \@file";
	  @sso=@{&open_sso_files(@array, "u=$upper_expect_limit", $add_range2,
			  "l=$lower_expect_limit", $add_range, $create_sso,
			  "m=$margin", $new_format)};
	  push(@final_out, @{&write_mspa_files(@sso, $big_out_mspa,
						  $single_out_opt, $add_range)} );
	 }
	 return(\@final_out);
}


#______________________________________________________________________________
# Title     : backup_config_files
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub backup_config_files{
    my($HOME_dir, $backup_directory, $date, @target_files, $file, $file_name,
       $destination, @backedup_files);
    $HOME_dir="$ENV{'HOME'}";
    $backup_directory=${$_[0]};
    unless($backup_directory){ $backup_directory= "$HOME_dir/Backup/Conf"; }
    $date=${&get_date_text};
    $backup_directory="$HOME_dir".'/Backup/Conf';
    @target_files=("/etc/hosts", "/etc/fstab", "/etc/passwd",
                   "/etc/httpd/conf/access.conf",
                   "/etc/httpd/conf/srm.conf",
                   "/etc/httpd/conf/httpd.conf",
                   "/etc/group", "$HOME_dir/.bashrc",
                   "$HOME_dir/.fvwm2rc", "$HOME_dir/.profile",
                   "$HOME_dir/.Ted/tedrc", "$HOME_dir/.xinitrc",
                   );
    for $file (@target_files){
      $file_name=${&extract_file_name(\$file)};
      $destination="$backup_directory\/$file_name\_$date";
      &copy_files($file, $destination);
      if(-s $destination){
         print "\n# $destination\t has been made ";
         push(@backedup_files, $destination);
      }
    }
    print "\n";
    return(\@backedup_files);
}



#________________________________________________________________________________
# Title     : bla_to_msf  (this is not used. Use convert_bla_to_msf)
# Usage     : @msf_file_made=@{&bla_to_msf(\@bla_file)};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced
# Example   :
# Keywords  : convert_bla_to_msf
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub bla_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($e_val_threshold)=0.0005;
		my(@template_query_seq, @keys, %alignment_hash, %alignment_hash_query,
			 %alignment_hash_subject);
		$choose_iteration=1;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Opening file
		#______________________________________________
		for($i=0; $i< @file; $i++){
				$file_base_name=${&get_base_names($file[$i])};
				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){
						if(/^Query=(\S+)/){
								$query_seq=$1;   last;
						}
				}
				close(BLAST_OUTPUT);

				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						#  Finds the query sequence, resets $start_point and next line
						#____________________________________________
						if(/^Searching\.\.\.\.\.\.\.\.\.\.\./){
																										 $present_iteration++;
																										 if($present_iteration > $choose_iteration){
																													last
																										 }else{
																													%alignment_hash_subject=%alignment_hash_query=();
																										 }
						}elsif(/^\>\s*(\S+)/){
																										 $subject_seq=$1;
																										 $start_point='';
																										 if($alignment_hash_subject{$subject_seq}){
																												 $seq_already_in=1;
																												 $subject_seq='';
																												 next;
																										 }
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, match the line to get expectation value
						#________________________________________________________________
						elsif($subject_seq
							and /^\s*Score\s*\=\s*(\S+)\s*bits.+\,\s*Expect\s*=\s*(\S+)/i){
								$expect_value=$2;
								unless($alignment_hash_subject{$subject_seq} or $expect_value > $e_val_threshold){
										$alignment_hash_subject{"$subject_seq"}="$expect_value ";
										$alignment_hash_query{"$subject_seq"}="$expect_value ";
								}
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Query seq line,
						#   0.0005 = $e_val_threshold
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Query\s+(\d+)\s+(\S+)\s+\d+/){
								if($start_point){
										$alignment_hash_query{"$subject_seq"}.=$2;
								}else{ # If this is the first match of 'query', put dashes according to the start point
										$start_point=$1;
										$alignment_hash_query{"$subject_seq"}.="_"x($start_point-1).$2;
										$alignment_hash_subject{"$subject_seq"}.="_"x($start_point-1);
								}

						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Sbjct seq line
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Sbjct\s+\d+\s+(\S+)\s+\d+/){
								$alignment_hash_subject{"$subject_seq"}.=$1;
						}
				}
				close(BLAST_OUTPUT);

				# now in %alignment_hash, I have many pairs like:
				# --------VAVCQNMGIGK--DGNLPWPPLRNEYKYFQR
				# --------WARKNKLGWGFELKGSMPSAPLITEQTYFKD
				# -----------------------KTWFSIPEKNRPLK
				# -----------------------KTWEEIPALDKELK

				$output_msf="$file_base_name\.msf";

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
				# sorting the keys and then the first column of hash value(which is evals) and then
				# keys by keys names when the E vals are equal
				# This way, I get:
				# 0 : d8dfr__ query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 1 : d8dfr__ sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 2 : nr_DYR_CHICK query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 3 : nr_DYR_CHICK sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				#    ...
				#___________________________________________________________________
				@keys= map{ $_->[1] }
							 sort { $a->[0] <=> $b->[0] }
							 map{ $alignment_hash_subject{$_}=~/^(\S+)/ or $_=~/^(\S+)/ ; [$1, $_] }
							 sort keys %alignment_hash_subject;

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
				# Making the final alignments by adjusting gaps in every pairwise step comparison
				# keys   are 'd8dfr__ query',          'd8dfr__ sbjct',,,
				# values are '7e-92 VRSLNSIVAVCQ....', '7e-92 VRSLNSIVAVCQN....'
				#________________________________________________
				$template_query=$alignment_hash_query{$keys[0]};

				if($alignment_hash_query{$keys[0]}=~/^\S+\s+(\S+)/){   @template_query_seq=split(//, $1);       }

				print "\n", @template_query_seq, "\n" if $verbose;


				for($j=0; $j < @keys; $j++){
						my($k, $evalue, @gapped_position, $query_seq, $g);

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# matching query entry and inserting gaps
						#__________________________________________________
						$query_name=$keys[$j];
						if($alignment_hash_query{$query_name}=~/^(\S+)\s+(\S+)$/){
								$evalue=$1;
								$query_seq=$2;       }
						if($query_seq !~/\-/){  next   }

						my @splited_query_seq=split(//, $query_seq);

						$longest_query_seq=@splited_query_seq if @splited_query_seq > $longest_query_seq;

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# skip gaps at the beginning
						#_____________________________________________________
						if($splited_query_seq[0] eq '_'){
								for($k=0; $k < @splited_query_seq; $k++){
										if($splited_query_seq[$k] ne '_' and $splited_query_seq[$k] ne '-'){
												last;
										}
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# Finding all the gapped position and saving them
						#___________________________________________________
						for( $s=$k-1; $s < @splited_query_seq; $s++){
								if($splited_query_seq[$s] eq '-'){
										push(@gapped_position, $s);
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# (1) TEMPLATE: matching sbjct entry and inserting gaps
						#__________________________________________________
						if(@gapped_position < 1){ next }
						for($g=0; $g< @gapped_position; $g++){
								$char_posi=$template_query_seq[$gapped_position[$g]] ;
								if($char_posi ne '-'){
										splice(@template_query_seq, $gapped_position[$g], 0, '-');
								}
						}
						print "\n# gaps are @gapped_position \n" if $verbose;
						@gapped_position=();
						next;
				}

				#print "\n      ", @template_query_seq, "\n The raw subject lines are:\n";

				if($verbose){
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_subject{$keys[$k]}, "\n";
						}
						print "\n The raw QUERY lines\n";
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_query{$keys[$k]}, "\n";
						}
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
				# Fixing subject sequences according to template and query seqs
				#____________________________________________________________________
				for($g=0; $g < @keys; $g++){
						 $subject_name=$keys[$g];

						 my($evalue, @splited_subject_seq, @splited_query_seq);

						 $alignment_hash_query{  $subject_name}=~/^(\S+)\s+(\S+)/;
						 @splited_query_seq  =split(//, $2);
						 $alignment_hash_subject{$subject_name}=~/^(\S+)\s+(\S+)/;
						 @splited_subject_seq=split(//, $2);
						 $evalue=$1;

						 for($t=0; $t< @template_query_seq; $t++){
								 if($template_query_seq[$t] ne '-' ){
										 next
								 }elsif($template_query_seq[$t] eq '-'){

										 $char_of_the_position=$splited_query_seq[$t];
										 if($char_of_the_position ne '-' and $char_of_the_position ne '_'){

												 #print "\n# \$t is $t";
												 #print "\n# \$evalue is $evalue\n ==>";
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n ==>";
												 splice(@splited_subject_seq, $t, 0, '-');
												 splice(@splited_query_seq, $t, 0, '-');
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n";
												 next;
										 }elsif($char_of_the_position eq '_'){
												 splice(@splited_subject_seq, 0, 0, '_');
												 splice(@splited_query_seq, 0, 0, '_');

										 }elsif($char_of_the_position eq '-'){
												 next;
										 }
								 }
						 }
						 $new_subject_seq=join('', @splited_subject_seq);
						 $new_query_seq  =join('', @splited_query_seq);
						 #$alignment_hash{$keys[$g]}="$evalue $new_subject_seq";
						 #$alignment_hash{$keys[$g-1]}="$evalue $new_query_seq";
						 $alignment_hash_subject{$subject_name}="$evalue $new_subject_seq";
						 $alignment_hash_query{$subject_name}  ="$evalue $new_query_seq";
				}


				print "\n";print @template_query_seq, "\n" if $verbose;

				for($h=0; $h< @keys; $h++){
						 $subject_name=$keys[$h];
						 $alignment_hash_subject{$subject_name}=~/^(\S+)\s+(\S+)/;
						 #print "\n $alignment_hash_query{$subject_name}";
						 print "\n $alignment_hash_subject{$subject_name}";
						 $final_seq_out{$subject_name}=$2;
				}
				&write_msf(\%final_seq_out, \$output_msf);
				push(@final_out, $output_msf);
		}
		return(\@final_out);
}

#________________________________________________________________________________
# Title     : convert_bla_to_msf
# Usage     : @msf_file_made=@{&convert_bla_to_msf(\@bla_file)};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced
# Example   :
# Keywords  : convert_bla_to_msf
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub convert_bla_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($e_val_threshold)=0.0005;
		my(@template_query_seq, @keys, %alignment_hash, %alignment_hash_query,
			 %alignment_hash_subject);
		$choose_iteration=1;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Opening file
		#______________________________________________
		for($i=0; $i< @file; $i++){
				$file_base_name=${&get_base_names($file[$i])};
				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){
						if(/^Query=(\S+)/){
								$query_seq=$1;   last;
						}
				}
				close(BLAST_OUTPUT);

				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						#  Finds the query sequence, resets $start_point and next line
						#____________________________________________
						if(/^Searching\.\.\.\.\.\.\.\.\.\.\./){
								$present_iteration++;
								if($present_iteration > $choose_iteration){
										last
								}else{
										%alignment_hash_subject=%alignment_hash_query=();
								}
						}elsif(/^\>\s*(\S+)/){
								$subject_seq=$1;
								$start_point='';
								if($alignment_hash_subject{$subject_seq}){
										$seq_already_in=1;
										$subject_seq='';
										next;
								}
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, match the line to get expectation value
						#________________________________________________________________
						elsif($subject_seq
							and /^\s*Score\s*\=\s*(\S+)\s*bits.+\,\s*Expect\s*=\s*(\S+)/i){
								$expect_value=$2;
								unless($alignment_hash_subject{$subject_seq} or $expect_value > $e_val_threshold){
										$alignment_hash_subject{"$subject_seq"}="$expect_value ";
										$alignment_hash_query{"$subject_seq"}="$expect_value ";
								}
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Query seq line,
						#   0.0005 = $e_val_threshold
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Query\s+(\d+)\s+(\S+)\s+\d+/){
								if($start_point){
										$alignment_hash_query{"$subject_seq"}.=$2;
								}else{ # If this is the first match of 'query', put dashes according to the start point
										$start_point=$1;
										$alignment_hash_query{"$subject_seq"}.="_"x($start_point-1).$2;
										$alignment_hash_subject{"$subject_seq"}.="_"x($start_point-1);
								}

						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Sbjct seq line
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Sbjct\s+\d+\s+(\S+)\s+\d+/){
								$alignment_hash_subject{"$subject_seq"}.=$1;
						}
				}
				close(BLAST_OUTPUT);

				# now in %alignment_hash, I have many pairs like:
				# --------VAVCQNMGIGK--DGNLPWPPLRNEYKYFQR
				# --------WARKNKLGWGFELKGSMPSAPLITEQTYFKD
				# -----------------------KTWFSIPEKNRPLK
				# -----------------------KTWEEIPALDKELK

				$output_msf="$file_base_name\.msf";

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
				# sorting the keys and then the first column of hash value(which is evals) and then
				# keys by keys names when the E vals are equal
				# This way, I get:
				# 0 : d8dfr__ query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 1 : d8dfr__ sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 2 : nr_DYR_CHICK query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 3 : nr_DYR_CHICK sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				#    ...
				#___________________________________________________________________
				@keys= map{ $_->[1] }
							 sort { $a->[0] <=> $b->[0] }
							 map{ $alignment_hash_subject{$_}=~/^(\S+)/ or $_=~/^(\S+)/ ; [$1, $_] }
							 sort keys %alignment_hash_subject;

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
				# Making the final alignments by adjusting gaps in every pairwise step comparison
				# keys   are 'd8dfr__ query',          'd8dfr__ sbjct',,,
				# values are '7e-92 VRSLNSIVAVCQ....', '7e-92 VRSLNSIVAVCQN....'
				#________________________________________________
				$template_query=$alignment_hash_query{$keys[0]};

				if($alignment_hash_query{$keys[0]}=~/^\S+\s+(\S+)/){   @template_query_seq=split(//, $1);       }

				print "\n", @template_query_seq, "\n" if $verbose;


				for($j=0; $j < @keys; $j++){
						my($k, $evalue, @gapped_position, $query_seq, $g);

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# matching query entry and inserting gaps
						#__________________________________________________
						$query_name=$keys[$j];
						if($alignment_hash_query{$query_name}=~/^(\S+)\s+(\S+)$/){
								$evalue=$1;
								$query_seq=$2;       }
						if($query_seq !~/\-/){  next   }

						my @splited_query_seq=split(//, $query_seq);

						$longest_query_seq=@splited_query_seq if @splited_query_seq > $longest_query_seq;

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# skip gaps at the beginning
						#_____________________________________________________
						if($splited_query_seq[0] eq '_'){
								for($k=0; $k < @splited_query_seq; $k++){
										if($splited_query_seq[$k] ne '_' and $splited_query_seq[$k] ne '-'){
												last;
										}
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# Finding all the gapped position and saving them
						#___________________________________________________
						for( $s=$k-1; $s < @splited_query_seq; $s++){
								if($splited_query_seq[$s] eq '-'){
										push(@gapped_position, $s);
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# (1) TEMPLATE: matching sbjct entry and inserting gaps
						#__________________________________________________
						if(@gapped_position < 1){ next }
						for($g=0; $g< @gapped_position; $g++){
								$char_posi=$template_query_seq[$gapped_position[$g]] ;
								if($char_posi ne '-'){
										splice(@template_query_seq, $gapped_position[$g], 0, '-');
								}
						}
						print "\n# gaps are @gapped_position \n" if $verbose;
						@gapped_position=();
						next;
				}

				#print "\n      ", @template_query_seq, "\n The raw subject lines are:\n";

				if($verbose){
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_subject{$keys[$k]}, "\n";
						}
						print "\n The raw QUERY lines\n";
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_query{$keys[$k]}, "\n";
						}
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
				# Fixing subject sequences according to template and query seqs
				#____________________________________________________________________
				for($g=0; $g < @keys; $g++){
						 $subject_name=$keys[$g];

						 my($evalue, @splited_subject_seq, @splited_query_seq);

						 $alignment_hash_query{  $subject_name}=~/^(\S+)\s+(\S+)/;
						 @splited_query_seq  =split(//, $2);
						 $alignment_hash_subject{$subject_name}=~/^(\S+)\s+(\S+)/;
						 @splited_subject_seq=split(//, $2);
						 $evalue=$1;

						 for($t=0; $t< @template_query_seq; $t++){
								 if($template_query_seq[$t] ne '-' ){
										 next
								 }elsif($template_query_seq[$t] eq '-'){

										 $char_of_the_position=$splited_query_seq[$t];
										 if($char_of_the_position ne '-' and $char_of_the_position ne '_'){

												 #print "\n# \$t is $t";
												 #print "\n# \$evalue is $evalue\n ==>";
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n ==>";
												 splice(@splited_subject_seq, $t, 0, '-');
												 splice(@splited_query_seq, $t, 0, '-');
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n";
												 next;
										 }elsif($char_of_the_position eq '_'){
												 splice(@splited_subject_seq, 0, 0, '_');
												 splice(@splited_query_seq, 0, 0, '_');

										 }elsif($char_of_the_position eq '-'){
												 next;
										 }
								 }
						 }
						 $new_subject_seq=join('', @splited_subject_seq);
						 $new_query_seq  =join('', @splited_query_seq);
						 #$alignment_hash{$keys[$g]}="$evalue $new_subject_seq";
						 #$alignment_hash{$keys[$g-1]}="$evalue $new_query_seq";
						 $alignment_hash_subject{$subject_name}="$evalue $new_subject_seq";
						 $alignment_hash_query{$subject_name}  ="$evalue $new_query_seq";
				}


				print "\n";print @template_query_seq, "\n" if $verbose;

				for($h=0; $h< @keys; $h++){
						 $subject_name=$keys[$h];
						 $alignment_hash_subject{$subject_name}=~/^(\S+)\s+(\S+)/;
						 #print "\n $alignment_hash_query{$subject_name}";
						 print "\n $alignment_hash_subject{$subject_name}";
						 $final_seq_out{$subject_name}=$2;
				}
				&write_msf(\%final_seq_out, \$output_msf);
				push(@final_out, $output_msf);
		}
		return(\@final_out);
}



#________________________________________________________________________________
# Title     : convert_html_bla_to_mspa
# Usage     : %hash_out_final=%{&convert_html_bla_to_mspa(\$file, [$Lean_output])};
# Function  : reads in PSI blast output and produces MSP file format.
#             Takes all the good hits below certain threshold in multiple iteration
#             Reports the best evalue with a given sequence name
# Example   : %hash_out=%{&convert_html_bla_to_mspa(\$file)};
# Keywords  : pbla_to_mspa, blast_to_mspa, bla_2_mspa, blastp_to_mspa_format,
#             blast_to_mspa_format, convert_html_bla_to_mspa, convert_html_bla_to_mspa_files
#             bla_to_mspa
# Options   :
#   $pdbd_seq_only  d   for getting dxxxx_ like seq names only(pdb40d names for examp)
#   $all_seq  a         for forcing all seq conversion
#   $which_iteration= by i=    # choose which iteration result you want to take
#   $which_iteration   as just a digit
#   $report_only_the_best=b by b -b
#   $take_only_the_last_iteration=l by l
#   $accumulative_hits_eval_thresh= by e=
#   $genome_seq_only=g      by g
#   $nrdb_seq_only=n        by n
#   $evalue_thresh=         by E=
#   $Accumulate_matches=A   by A -A
#   $Lean_output=L          by L -L  # to remove search output to unclutter
#
# Author    : Sarah Teichmann and Jong Park, jong@biosophy.org
# Version   : 4.2
#--------------------------------------------------------------------------------
sub convert_html_bla_to_mspa{
	 my($i, $j, $k, @lines, $match_string_count,  $line_count, $query_string_count,
			$match_length, $Lean_output,
			$lines, $duplicated_match_count, $new_sorted_name, $sorted_name, $verbose,
			$pdbd_seq_only, $entry_found, $which_iteration, $report_only_the_best,
			$genome_seq_only, $all_seq, $header_found, $accumulative_hits_eval_thresh,
			$take_only_the_last_iteration, $original_query, $nrdb_seq_only,
			$get_the_final_iteration, $read_entry_lines, $verbose, $Accumulate_matches);
	 my $match_leng_thresh=10;
	 ### This localization is critial NOT my, as I use a sub which relies on this
	 local(%hash_out, %accumulative_hits, $file, $score, $score_ori, $evalue,
				 $evalue_ori, $seq_id, $query_range_start, $query_range_stop,
				 $query, $match_string_start, $match_string_stop, $matched,
				 $read_point_found);
	 $duplicated_match_count=0;

	 my $evalue_thresh=$accumulative_hits_eval_thresh=1; ## default eval threshes
	 $query='query_seq'; ## default query seq name, to avoid blank name

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # Processing the input arguments to get file and options etc
	 #_____________________________________________________________
	 for (@_){
			 if(ref $_ eq 'ARRAY'){ @lines =@{$_};
			 }elsif( ref $_ eq 'SCALAR' and -s ${$_} ){ $file=${$_};
			 }elsif( -s $_ ){            $file=$_;
			 }elsif(/^\s*d\s*$/){          $pdbd_seq_only='d'; $all_seq=''; $genome_seq_only='';
					 print "\n $0: convert_html_bla_to_mspa,  You set \$pdbd_seq_only option, I will skip others.\n";
			 }elsif(/^\s*[i=]*(\d+)\s*$/){ $which_iteration=$1;
			 }elsif(/^\s*b\s*$/){          $report_only_the_best='b';
			 }elsif(/^\s*a\s*$/){          $all_seq='a'; $genome_seq_only=''; $pdbd_seq_only=''; $nrdb_seq_only='';
			 }elsif(/^\s*g\s*$/){          $genome_seq_only='g'; $all_seq=''; $pdbd_seq_only='';$nrdb_seq_only='';
			 }elsif(/^\s*n\s*$/){          $nrdb_seq_only='n'; $all_seq=''; $pdbd_seq_only=''; $genome_seq_only='';
			 }elsif(/^\s*l\s*$/){          $take_only_the_last_iteration='l'; }
			 if(/^\s*v\s*$/){          $verbose='v'; }
			 if(/^\s*L\s*$/){          $Lean_output='L'; }
			 if(/e=(\S+)/){          $accumulative_hits_eval_thresh=$1; }
			 if(/E=(\S+)/){          $evalue_thresh=$1;			 }
			 if(/SEQ_NAME=(\S+)/){   $query=$original_query=$SEQ_NAME=$1;  }
			 if(/A$/){               $Accumulate_matches='A';  }
	 }
	 unless($which_iteration){  $get_the_final_iteration=1 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	 # If the input file is gzipped, uncompress it to text file and then open
	 #__________________________________________________________________
	 if($file=~/\.gz\s*$/){
			 open(BLA_FILE, "gunzip -c $file|") || die "\n# $0: Failed to open $file\n";
			 if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;			 }
	 }else{
			 open(BLA_FILE, "$file") || die "\n# $0: convert_html_bla_to_mspa : Failed to open $file\n";
			 if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;			 }
	 }
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # UP to NOW is frivalous option handling stuff
	 #_______________________________________________________

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # (1) Main reading in .pbla file (or any extension)
	 #____________________________________________________________________________
	 while(<BLA_FILE>){
			$line_count++;      $lines=$_;  ## putting $_ to $lines var
			if($lines=~/^\s*$/ or $lines=~/^ \s+Length\s+\=\s+\d+\s*$/){      next     }  ## skipping some junk lines

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (1.1) If I reach the end of the opened file, I use &put_mspa_lines_to_hash_from_bla sub to write the final mspa line and finish
			#________________________________________________________________________________________________________________
			if( eof ){
					 if( $read_point_found <= $which_iteration  or  $get_the_final_iteration){
							 #print "     # (i) <<<< The end of file reached, writing $sorted_name \n\n";
							 @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
																				$sorted_name, $query_range_start, $query_range_stop,$match_string_start,
																				$match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
																				$take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
							 %hash_out=         %{$out_from_put_mspa_lines[0]};
							 %accumulative_hits=%{$out_from_put_mspa_lines[1]};
							 $read_point_found= $out_from_put_mspa_lines[2];
							 last;
					 }
			}

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (2) Extracting query seq name(this is the only place to get it)
			#____________________________________________________________
			if($lines=~/^\s*Query=\s+(\S+)/){ $query=$original_query=$1;    next    }
			# Following is to handle the HTML version of PSI output
			if($lines=~/\<\S\>\s*Query=\<\S\>/i){ $query=$original_query=$SEQ_NAME;  next } # <b>Query=</b>
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~
			# (3) 'Searching......done'  line indicates new search step(iteration)
			#_________________________________________________________________________
			if( $lines=~/^\s*Searching\.\.\.+[done]?/i ){
					$read_point_found++;
					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
					#  (3.3) Following is the KEY part for controlling iteration
					#__________________________________________________________
					if( $which_iteration and $read_point_found < $which_iteration){
							 #print "\n# (INFO) skipped, \$which_iteration: ($which_iteration), \$read_point_found: ($read_point_found)" if $verbose;
							 $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
							 $query_range_stop=$query_range_start=$match_string_stop=$mspa_line=$new_sorted_name='';
							 $entry_found=$duplicated_match_count=0;

							 if( !$Accumulate_matches){
									 %hash_out=(); ## this is to remove any discarded pairs in the iteration
							 }
							 #print "\n# (INFO) ===> New iteration ====\$read_point_found: $read_point_found, \$which_iteration:$which_iteration\n";
							 next;
					}elsif( $which_iteration and $read_point_found == $which_iteration){
							 $read_entry_lines=1;  next;
					}elsif( $which_iteration and $read_point_found >  $which_iteration){
							 @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
																				$sorted_name, $query_range_start, $query_range_stop,$match_string_start,
																				$match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
																				$take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
							 %hash_out=         %{$out_from_put_mspa_lines[0]};
							 %accumulative_hits=%{$out_from_put_mspa_lines[1]};
							 $read_point_found= $out_from_put_mspa_lines[2];
							 last;
					}elsif(!$which_iteration){
							 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
							 # (3.4) Default situation
							 #____________________________________________________________
							 #print "\n# (WARN) You did not set \$which_iteration option \n\n" if $verbose;
							 if($read_point_found > 1){
											 #print "\n (3.3) Writing the last entry $sorted_name BEFORE next Searching........ line\n";
											 @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
																								$sorted_name, $query_range_start, $query_range_stop,$match_string_start,
																								$match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
																								$take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
											 %hash_out=         %{$out_from_put_mspa_lines[0]};
											 %accumulative_hits=%{$out_from_put_mspa_lines[1]};
											 $read_point_found= $out_from_put_mspa_lines[2];
							 }
							 $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
							 $query_range_stop=$query_range_start=$match_string_stop=$mspa_line=$new_sorted_name='';
							 $entry_found=$duplicated_match_count=0;
							 if( !$Accumulate_matches){  %hash_out=(); $entry_found=0; $duplicated_match_count=0;     }
							 $read_entry_lines=1; ## this is set by 'Searching......' line
							 next;
					}
			}
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (4) '> xxxx '  New sequence entry, '>' starts
			#__________________________________________________________
			elsif($read_entry_lines and $lines=~/uid\=\d+\"\>.+\|([^\|]+)[\|]?\<\/a\>/i){
					$temp_match=$1;
					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					# (4.0) To get the pdbd seq names only 'dxxx__' sort of thing
					#________________________________________________________
					if($pdbd_seq_only and ($temp_match !~/^pdb_\S+/ and $temp_match !~/^[cde]\d\S+/)  ){
							$entry_found=0; print "\n# NOT pdb seq\n";    next;
					}elsif($genome_seq_only and $temp_match !~/^gn_\S+/){
							$entry_found=0; print "\n# NOT genome seq\n"; next
					}elsif($nrdb_seq_only and $temp_match !~/^nr_\S+/){
							$entry_found=0; print "\n# NOT nrdb\n";      next
					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
					# (4.1) This is the DEFAULT
					#_____________________________________________________________
					}else{ ## This is default and equivalent to have the $all_seq option on.
							$entry_found=1;
							if($match_string_count){ ## $match_string_count is incremented only by 'Sbjct' line
										@out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
																						 $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
																						 $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
																						 $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
										%hash_out=         %{$out_from_put_mspa_lines[0]};
										%accumulative_hits=%{$out_from_put_mspa_lines[1]};
										$read_point_found= $out_from_put_mspa_lines[2];
										$match_string_count=0;
										$duplicated_match_count=0;
							}

							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							#  Only with new seq entry, I count the pair occurrances
							#__________________________________________________________________
							$query=$original_query; $query_string_count='';
							$matched=$temp_match; ## this should be here, after if
							$sorted_name=join(' ', sort($query, $matched) );
					}
					$match_string_count=0;
			}
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (5) Matching  Score =  325 bits (824), Expect = 6e-89           << 2 >>
			#_________________________________________________________________
			elsif( ($entry_found and $lines=~/^\s*Score\s*\=\s*(\S+)\s*bits\s+\(\S+\)\,\s*Expect\s*=\s*(\S+)/i)
					or ($entry_found and $lines=~/^\s*Score\s*\=\s*(\S+)\s*bits.+\,\s*Expect\s*=\s*(\S+)/i)){

					$score_ori=$1;
					$evalue_ori=$2;
					if($evalue_ori=~/^e\-\d\d\d/){ $evalue_ori="1".$evalue_ori; } ## bug fix for short eval in blast distribution

					if($match_string_count){ # $match_string_count is increased when Sbjct word is found
							if($evalue > $evalue_thresh){ $evalue=$evalue_ori; $score=$score_ori; next }
							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
							# When Only the first match(best evalue) is required, write mspa line and reset $entry_found var
							#_________________________________________________________________________________________________
							if($report_only_the_best){
									#print "      (5)  \$report_only_the_best is set\n" if $verbose;
									@out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
																					 $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
																					 $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
																					 $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
									%hash_out=         %{$out_from_put_mspa_lines[0]};
									%accumulative_hits=%{$out_from_put_mspa_lines[1]};
									$read_point_found= $out_from_put_mspa_lines[2];
									$entry_found=0; next;
							}else{
									#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~`
									# duplicated match count means, query matched more than one region of a match seq
									#__________________________________________________________________________________
									$duplicated_match_count++;
									$sorted_name="$sorted_name $duplicated_match_count";
									#print " ====(5) Multiple region for \"$new_sorted_name\" is found =========== e= $evalue\n" if $verbose;
									@out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
																					 $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
																					 $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
																					 $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
									%hash_out=         %{$out_from_put_mspa_lines[0]};
									%accumulative_hits=%{$out_from_put_mspa_lines[1]};
									$read_point_found= $out_from_put_mspa_lines[2];
							}
							$score=$score_ori; $evalue=$evalue_ori;
					}else{
							#print "     (5) \$match_string_count is not set NO write \$evalue_ori $evalue_ori\n" if $verbose;
							$evalue=$evalue_ori; $score=$score_ori;
					} ## to next line

					sub reset_all_the_vars{
						 #print "            !!!!  Reseting all the vars !!!!\n" if $verbose;
						 $query_string_count=$score=$evalue=$seq_id=$query_range_stop=$query_range_start='';
						 $match_string_stop=$mspa_line=$new_sorted_name=$match_string_count='';
					}
			}

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
			# (6) Matching   Identities = 158/158 (100%), Positives = 158/158 (100%)    ,
			#____________________________________________________________________________________
			elsif( $entry_found and $lines=~/^\s*Identities\s+=\s+\S+\/(\S+)\s+\(\s*(\S+)\s*\%\)/i){
					$query_string_count=$match_string_count=0;
					$seq_id=$2/100;
					$match_length=$1;
					if($match_length < $match_leng_thresh){
							#print "     (6) \$match_leng_thresh $match_leng_thresh > \$match_length $match_length" if $verbose;
							$entry_found=0;
							$match_string_count=1;
							next;
					}else{
							#print "     (6) $sorted_name : ABOVE leng thresh. \$seq_id= $seq_id, \$match_length= $match_length\n" if $verbose;
					}
			}
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (7) Matching 'Query: 2 GIRAATSQEINELT..' line    ,
			#_________________________________________________________________
			elsif($entry_found and $lines=~/^\s*Query\:?\s+(\d+)\s+\D+\s+(\d+)/){
					$query_string_count++;
					$query_line_found=1;
					if($query_string_count==1){      $query_range_start=$1;   $query_range_stop =$2;
					}elsif($query_string_count > 1){ $query_range_stop=$2;     }
					#print "        (7) Query: line found: $query\_$query_range_start\-$query_range_stop\n" if $verbose;
			}
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (8) Matching 'Sbjct: 2 GIRAATSQEINELT..' line
			#_________________________________________________________________
			elsif($entry_found and $query_line_found and $lines=~/^\s*Sbjct\:?\s+(\d+)\s+[\w\-]+\s+(\d+)/i){
					$match_string_count++;
					$subject_line_found=1;
					if($match_string_count==1){      $match_string_start=$1;
																					 $match_string_stop =$2;
					}elsif($match_string_count > 1){ $match_string_stop=$2;      }
					#print "        (8) Sbjct: line found: $temp_match\_$match_string_start\-$match_string_stop\n" if $verbose;
			}
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (9) Matching '   Database: ' line    ,                << END >>
			#_________________________________________________________________
			elsif( ($entry_found and  $lines=~/^\s+Database:\s+\S+/) or eof){ # the very last write
					if($evalue > $evalue_thresh){ last
					}else{
							#print "        <<<< The end of file reached, writing $sorted_name\n" if $verbose;
							@out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
																			 $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
																			 $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
																			 $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
							%hash_out=         %{$out_from_put_mspa_lines[0]};
							%accumulative_hits=%{$out_from_put_mspa_lines[1]};
							$read_point_found= $out_from_put_mspa_lines[2];
							last;
					}
			}
	 }
	 close(BLA_FILE);
	 unless( $take_only_the_last_iteration){
			 print "\n# >> ACCUMULATIVE HITS are reported as you did not set \$take_only_the_last_iteration opt!!\n";
			 %hash_out=(%hash_out, %accumulative_hits);
	 }
	 #&show_hash(\%hash_out) if $verbose;
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
	 # CLeaning up the BLA file if $Lean_output is set
	 #_____________________________________________________
	 $gzipped_search_file="$file\.gz";
	 if($Lean_output ){ ## If Lean_out opt is set and $file exists and %hash_out is not empty, remove $file
			 if(-s $file){
					 unlink($file);  ## removes fam_8_8.pbla etc,
			 }elsif(-s $gzipped_search_file){
					 unlink($gzipped_search_file); ## removes fam_8_8.pbla.gz etc,
			 }else{
					 print "\n# (E) convert_html_bla_to_mspa: tried to remove search out file for \$Lean_output opt,
							 but failed. Something is wrong. Think! or report to jong\@salt2.med.harvard.edu,
							 jong\@mrc-lmb.cam.ac.uk, sat\@mrc-lmb.cam.ac.uk, jong_p\@hotmail.com\n";
							 die;
			 }

	 }
	 return(\%hash_out);
}


#________________________________________________________________________________
# Title     : convert_bla_HTML_to_mspa
# Usage     : %hash_out_final=%{&convert_bla_HTML_to_mspa(\$file, [$Lean_output])};
# Function  : reads in PSI blast output and produces MSP file format.
#             Takes all the good hits below certain threshold in multiple iteration
#             Reports the best evalue with a given sequence name
# Example   : %hash_out=%{&convert_bla_HTML_to_mspa(\$file)};
# Keywords  : pbla_to_mspa, blast_to_mspa, bla_2_mspa, blastp_to_mspa_format,
#             blast_to_mspa_format, convert_bla_HTML_to_mspa, convert_bla_HTML_to_mspa_files
#             bla_to_mspa, convert_bla_to_mspa
# Options   :
#   $pdbd_seq_only  d   for getting dxxxx_ like seq names only(pdb40d names for examp)
#   $all_seq  a         for forcing all seq conversion
#   $which_iteration= by i=    # choose which iteration result you want to take
#   $which_iteration   as just a digit
#   $report_only_the_best=b by b -b
#   $take_only_the_last_iteration=l by l
#   $accumulative_hits_eval_thresh= by e=
#   $genome_seq_only=g      by g
#   $nrdb_seq_only=n        by n
#   $evalue_thresh=         by E=
#   $Accumulate_matches=A   by A -A
#   $Lean_output=L          by L -L  # to remove search output to unclutter
#
# Author    : Sarah Teichmann and Jong Park, jong@salt2.med.harvard.edu
# Version   : 4.7
#--------------------------------------------------------------------------------
sub convert_bla_HTML_to_mspa{
    my($i, $j, $k, @lines, $match_string_count,  $line_count, $query_string_count,
       $match_length, $Lean_output,
       $lines, $duplicated_match_count, $new_sorted_name, $sorted_name, $verbose,
       $pdbd_seq_only, $entry_found, $which_iteration, $report_only_the_best,
       $genome_seq_only, $all_seq, $header_found, $accumulative_hits_eval_thresh,
       $take_only_the_last_iteration, $original_query, $nrdb_seq_only,
       $get_the_final_iteration, $read_entry_lines, $verbose, $Accumulate_matches,
       %good_matches_list, $matched_seq_name, $true_seq_name_found);
    my $match_leng_thresh=10;
    ### This localization is critial NOT my, as I use a sub which relies on this
    local(%hash_out, %accumulative_hits, $file, $score, $score_ori, $evalue,
          $evalue_ori, $seq_id, $query_range_start, $query_range_stop,
          $query, $match_string_start, $match_string_stop, $matched,
          $read_point_found);
    $duplicated_match_count=0;

    my $evalue_thresh=$accumulative_hits_eval_thresh=1; ## default eval threshes
    $query=$original_query='query_seq'; ## default query seq name, to avoid blank name

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Processing the input arguments to get file and options etc
    #_____________________________________________________________
    for (@_){
        if(ref $_ eq 'ARRAY'){ @lines =@{$_};
        }elsif( ref $_ eq 'SCALAR' and -s ${$_} ){ $file=${$_};
        }elsif( -s $_ ){            $file=$_;
        }elsif(/^\s*d\s*$/){          $pdbd_seq_only='d'; $all_seq=''; $genome_seq_only='';
            print "\n $0: convert_bla_HTML_to_mspa,  You set \$pdbd_seq_only option, I will skip others.\n";
        }elsif(/^\s*b\s*$/){          $report_only_the_best='b';
        }elsif(/^\s*a\s*$/){          $all_seq='a'; $genome_seq_only=''; $pdbd_seq_only=''; $nrdb_seq_only='';
        }elsif(/^\s*g\s*$/){          $genome_seq_only='g'; $all_seq=''; $pdbd_seq_only='';$nrdb_seq_only='';
        }elsif(/^\s*n\s*$/){          $nrdb_seq_only='n'; $all_seq=''; $pdbd_seq_only=''; $genome_seq_only=''; }
        if(/^\s*l\s*$/){          $take_only_the_last_iteration='l'; }
        if(/^\s*v\s*$/){          $verbose='v'; }
        if(/^\s*L\s*$/){          $Lean_output='L'; }
        if(/e=(\S+)/){          $accumulative_hits_eval_thresh=$1; }
        if(/E=(\S+)/){          $evalue_thresh=$1;          }
        if(/SEQ_NAME=(\S+)/i){   $query=$original_query=$SEQ_NAME=$1;  }
        if(/A$/){               $Accumulate_matches='A';  }
        if(/^\s*i=\s*(\d+)\s*$/){ $which_iteration=$1; }
     }
     unless($which_iteration){  $get_the_final_iteration=1 }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
     # If the input file is gzipped, uncompress it to text file and then open
     #__________________________________________________________________
     if($file=~/\.gz\s*$/){
         open(BLA_FILE, "gunzip -c $file|") || die "\n# $0: Failed to open $file\n";
         if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;
         }elsif($query eq 'query_seq' and $file=~/(\S+)\.\S+\.gz/){    $query=$original_query=$1;      }
     }else{
         open(BLA_FILE, "$file") || die "\n# $0: convert_bla_HTML_to_mspa : Failed to open $file\n";
         if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;
         }elsif($query eq 'query_seq' and $file=~/(\S+)\.\S+$/){     $query=$original_query=$1;       }
     }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # UP to NOW is frivalous option handling stuff
     #_______________________________________________________

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # (1) Main reading in .pbla file (or any extension)
     #____________________________________________________________________________
     while(<BLA_FILE>){
            $line_count++;      $lines=$_;  ## putting $_ to $lines var
            if($lines=~/^\s*$/ or $lines=~/^ \s+Length\s+\=\s+\d+\s*$/){      next     }  ## skipping some junk lines


            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (1.1) If I reach the end of the opened file, I use &put_mspa_lines_to_hash_from_bla sub to write the final mspa line and finish
            #________________________________________________________________________________________________________________
            if( eof ){
                 if( $read_point_found <= $which_iteration  or  $get_the_final_iteration){
                      #print "     # (i) <<<< The end of file reached, writing $sorted_name \n\n";
                      @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                                         $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                                         $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                                         $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                      %hash_out=         %{$out_from_put_mspa_lines[0]};
                      %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                      $read_point_found= $out_from_put_mspa_lines[2];
                      last;
                 }
            }

            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (2) Extracting query seq name(this is the only place to get it)
            #__________________________________________________________________
            if($lines=~/^\s*Query=\s*(\S+)/){ $query=$original_query=$1; $true_seq_name_found=1; next    }
            # Following is to handle the HTML version of PSI output
            if(!$true_seq_name_found and $lines=~/Query=\<\S\>/i){
                  $query=$original_query=$SEQ_NAME;  next } # <b>Query=</b>

            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~
            # (3) 'Searching......done'  line indicates new search step(iteration)
            #_________________________________________________________________________
            if( $lines=~/^\s*Searching\.\.\.\.+[done]?/i ){
                 %good_matches_list=();
                 $read_point_found++;

                 $entry_found=0;
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 #  (3.3) Following is the KEY part for controlling iteration. HTML format result is irrelevant to this
                 #________________________________________________________________________________________________________
                 if( $which_iteration and $read_point_found < $which_iteration){
                      #print "\n# (INFO) skipped, \$which_iteration: ($which_iteration), \$read_point_found: ($read_point_found)" if $verbose;
                      $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
                      $query_range_stop=$query_range_start=$match_string_stop=$mspa_line=$new_sorted_name='';
                      $entry_found=$duplicated_match_count=0;

                      if( !$Accumulate_matches){
                              %hash_out=(); ## this is to remove any discarded pairs in the iteration
                      }
                      #print "\n# (INFO) ===> New iteration ====\$read_point_found: $read_point_found, \$which_iteration:$which_iteration\n";
                      next;
                 }elsif( $which_iteration and $read_point_found == $which_iteration){
                      $read_entry_lines=$summary_lines_found=1;
                      next;
                 }elsif( $which_iteration and $read_point_found >  $which_iteration){
                      @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                           $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                           $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                           $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                      %hash_out=         %{$out_from_put_mspa_lines[0]};
                      %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                      $read_point_found= $out_from_put_mspa_lines[2];
                      last;

                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # If you did not set the which iteration option
                 #_________________________________________________________
                 }elsif(!$which_iteration){
                      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                      # (3.4) Default situation
                      #____________________________________________________________
                      #print "\n# (WARN) You did not set \$which_iteration option \n\n" if $verbose;
                      if($read_point_found > 1){
                            #print "\n (3.3) Writing the last entry $sorted_name BEFORE next Searching........ line\n";
                            @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                                               $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                                               $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                                               $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                            %hash_out=         %{$out_from_put_mspa_lines[0]};
                            %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                            $read_point_found= $out_from_put_mspa_lines[2];
                      }
                      $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
                      $query_range_stop=$query_range_start=$match_string_stop=$mspa_line=$new_sorted_name='';
                      $entry_found=$duplicated_match_count=0;
                      if( !$Accumulate_matches){  %hash_out=(); $entry_found=0; $duplicated_match_count=0;     }
                      $read_entry_lines=$summary_lines_found=1; ## this is set by 'Searching......' line
                      next;
                 }
            # $summary_lines_found is set by Searching............ line
            }elsif($summary_lines_found and $lines=~/good_GI\" VALUE = \S+>(\S+)<\/a> .+\d+\s*<\/a>\s+([\d\-e]+)\s*$/){
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
            # reading the search summary lines. Save time by selecting which match to parse
            # VALUE = "350102">prf||0409309A</a>  hemoglobin alpha [Loris tardigradus]                   <a href = #350102> 92</a>  4e-19
            #_________________________________________________________________________________________
                 $matched_seq_name=$1;
                 $match_E_value=$2;
                 if($matched_seq_name=~/pdb\|(\S+)\|(\S+)$/i){ $matched_seq_name="$1$2"
                 }elsif($matched_seq_name=~/^gi\|\S*?\|?([^\|]+)$/i
                    or  $matched_seq_name=~/^\S+\|\S*\|([^\|]+)$/){ $matched_seq_name=$1 }

                 if($match_E_value <= $evalue_thresh){
                     if($pdbd_seq_only and ($matched_seq_name=~/^pdb_/
                        or $matched_seq_name=~/^[cde]\d\w{3,6}/)
                        or $matched_seq_name=~/^ds[\d\_]+$/){
                         $good_matches_list{$matched_seq_name}=$matched_seq_name;
                     }elsif(!$pdbd_seq_only){
                         $good_matches_list{$matched_seq_name}=$matched_seq_name;
                     }
                 }else{
                     next;
                 }
            }elsif($read_entry_lines and $lines=~/\&.+uid=\d+\">(\S+)<\/a> .+/){
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (4) &uid=00350102">prf||0409309A</a> hemoglobin alpha [Loris tardigradus]
            #__________________________________________________________
                    $temp_match=$1;
                    if($temp_match=~/pdb\|(\S+)\|(\S+)$/i){ $temp_match="$1$2"
                    }elsif($temp_match=~/^gi\|\S*?\|?([^\|]+)$/i
                       or  $temp_match=~/^\S+\|\S*\|([^\|]+)$/){ $temp_match=$1 }
                    $summary_lines_found=0;
                    unless($good_matches_list{$temp_match}){ $entry_found=0; next }

                    $entry_found=1;
                    if($match_string_count){ ## $match_string_count is incremented only by 'Sbjct' line
                          @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                                           $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                                           $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                                           $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                          %hash_out=         %{$out_from_put_mspa_lines[0]};
                          %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                          $read_point_found= $out_from_put_mspa_lines[2];
                          $match_string_count=0;
                          $duplicated_match_count=0;
                    }

                    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                    #  Only with new seq entry, I count the pair occurrances
                    #__________________________________________________________________
                    $query=$original_query; $query_string_count='';
                    $matched=$temp_match; ## this should be here, after if
                    $sorted_name=join(' ', sort($query, $matched) );
                    $match_string_count=0;
            }elsif($entry_found){
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # (5) Matching  Score =  325 bits (824), Expect = 6e-89           << 2 >>
                #________________________________________________________________________________
                if( $lines=~/^\s*Score\s*\=\s*(\S+)\s*bits\s+\(\S+\)\,\s*Expect\s*=\s*(\S+)/i
                    or $lines=~/^\s*Score\s*\=\s*(\S+)\s*bits.+\,\s*Expect\s*=\s*(\S+)/i){

                    $score_ori=$1;
                    $evalue_ori=$2;
                    if($evalue_ori=~/^e\-\d\d\d/){ $evalue_ori="1".$evalue_ori; } ## bug fix for short eval in blast distribution

                    if($match_string_count){ # $match_string_count is increased when Sbjct word is found
                            if($evalue > $evalue_thresh){ $evalue=$evalue_ori; $score=$score_ori; next }
                            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                            # When Only the first match(best evalue) is required, write mspa line and reset $entry_found var
                            #_________________________________________________________________________________________________
                            if($report_only_the_best){
                                    #print "      (5)  \$report_only_the_best is set\n" if $verbose;
                                    @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                                                     $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                                                     $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                                                     $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                                    %hash_out=         %{$out_from_put_mspa_lines[0]};
                                    %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                                    $read_point_found= $out_from_put_mspa_lines[2];
                                    $entry_found=0; next;
                            }else{
                                    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~`
                                    # duplicated match count means, query matched more than one region of a match seq
                                    #__________________________________________________________________________________
                                    $duplicated_match_count++;
                                    $sorted_name="$sorted_name $duplicated_match_count";
                                    #print " ====(5) Multiple region for \"$new_sorted_name\" is found =========== e= $evalue\n" if $verbose;
                                    @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                                                     $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                                                     $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                                                     $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                                    %hash_out=         %{$out_from_put_mspa_lines[0]};
                                    %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                                    $read_point_found= $out_from_put_mspa_lines[2];
                            }
                            $score=$score_ori; $evalue=$evalue_ori;
                    }else{
                            #print "     (5) \$match_string_count is not set NO write \$evalue_ori $evalue_ori\n" if $verbose;
                            $evalue=$evalue_ori; $score=$score_ori;
                    } ## to next line

                    sub reset_all_the_vars{
                         #print "            !!!!  Reseting all the vars !!!!\n" if $verbose;
                         $query_string_count=$score=$evalue=$seq_id=$query_range_stop=$query_range_start='';
                         $match_string_stop=$mspa_line=$new_sorted_name=$match_string_count='';
                    }
            }
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
            # (6) Matching   Identities = 158/158 (100%), Positives = 158/158 (100%)    ,
            #____________________________________________________________________________________
            elsif( $lines=~/^\s*Identities\s+=\s+\S+\/(\S+)\s+\(\s*(\S+)\s*\%\)/i){
                 $query_string_count=$match_string_count=0;
                 $seq_id=$2/100;
                 $match_length=$1;
                 if($match_length < $match_leng_thresh){
                         #print "     (6) \$match_leng_thresh $match_leng_thresh > \$match_length $match_length" if $verbose;
                         $entry_found=0;
                         $match_string_count=1;
                         next;
                 }else{
                         #print "     (6) $sorted_name : ABOVE leng thresh. \$seq_id= $seq_id, \$match_length= $match_length\n" if $verbose;
                 }
            }
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (7) Matching 'Query: 2 GIRAATSQEINELT..' line    ,
            #_________________________________________________________________
            elsif($lines=~/^\s*Query\:?\s+(\d+)\s+\D+\s+(\d+)/){
                 $query_string_count++;
                 $query_line_found=1;
                 if($query_string_count==1){      $query_range_start=$1;   $query_range_stop =$2;
                 }elsif($query_string_count > 1){ $query_range_stop=$2;     }
                 #print "        (7) Query: line found: $query\_$query_range_start\-$query_range_stop\n" if $verbose;
            }
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (8) Matching 'Sbjct: 2 GIRAATSQEINELT..' line
            #_________________________________________________________________
            elsif($query_line_found and $lines=~/^\s*Sbjct\:?\s+(\d+)\s+[\w\-]+\s+(\d+)/i){
                 $match_string_count++;
                 $subject_line_found=1;
                 if($match_string_count==1){      $match_string_start=$1;
                                                                                  $match_string_stop =$2;
                 }elsif($match_string_count > 1){ $match_string_stop=$2;      }
                 #print "        (8) Sbjct: line found: $temp_match\_$match_string_start\-$match_string_stop\n" if $verbose;
            }
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (9) Matching '   Database: ' line    ,                << END >>
            #_________________________________________________________________
            elsif( $lines=~/CPU time:/ or $lines=~/^\s+Database:\s+\S+/ or eof){ # the very last write
                 if($evalue > $evalue_thresh){ last
                 }else{
                      #print "        <<<< The end of file reached, writing $sorted_name\n" if $verbose;
                      @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                                       $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                                       $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                                       $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                      %hash_out=         %{$out_from_put_mspa_lines[0]};
                      %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                      $read_point_found= $out_from_put_mspa_lines[2];
                      last;
                 }
            }elsif($lines=~/^\s+\*+\s+No hits found\s+\*+/i){
                 print "\n $lines \n";
                 last;
            }
         }
     }
     close(BLA_FILE);
     unless( $take_only_the_last_iteration){
             print "\n# >> ACCUMULATIVE HITS are reported as you did not set \$take_only_the_last_iteration opt!!\n";
             %hash_out=(%hash_out, %accumulative_hits);
     }
     #&show_hash(\%hash_out) if $verbose;
     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
     # CLeaning up the BLA file if $Lean_output is set
     #_____________________________________________________
     $gzipped_search_file="$file\.gz";
     if($Lean_output ){ ## If Lean_out opt is set and $file exists and %hash_out is not empty, remove $file
          if(-s $file){
                  unlink($file);  ## removes fam_8_8.pbla etc,
          }elsif(-s $gzipped_search_file){
                  unlink($gzipped_search_file); ## removes fam_8_8.pbla.gz etc,
          }else{
                  print "\n# (E) convert_bla_HTML_to_mspa: tried to remove search out file for \$Lean_output opt,
                          but failed. Something is wrong. Think! or report to jong\@salt2.med.harvard.edu,
                          jong\@mrc-lmb.cam.ac.uk, sat\@mrc-lmb.cam.ac.uk, jong_p\@hotmail.com\n";
                          die;
          }
     }
     return(\%hash_out);
}


#________________________________________________________________________________
# Title     : convert_bla_to_MSPA_file_format
# Usage     : %hash_out_final=%{&convert_bla_to_MSPA_file_format(\$file, [$Lean_output])};
# Function  : reads in PSI blast output and produces MSPA file format.
#             Takes all the good hits below certain threshold in multiple iteration
#             Reports the best evalue with a given sequence name
# Example   : %hash_out=%{&convert_bla_to_MSPA_file_format(\$file)};
# Keywords  : pbla_to_mspaa, blast_to_mspaa, bla_2_mspaa, blastp_to_mspaa_format,
#             blast_to_mspaa_format, convert_bla_to_MSPA_file_format, convert_bla_to_MSPA_file_format_files
#             bla_to_mspaa
# Options   :
#   $pdbd_seq_only  d   for getting dxxxx_ like seq names only(pdb40d names for examp)
#   $all_seq  a         for forcing all seq conversion
#   $which_iteration= by i=    # choose which iteration result you want to take
#   $which_iteration   as just a digit
#   $report_only_the_best=b by b -b
#   $take_last_iter_PSI_BLA=l by l
#   $PSI_BLA_ACCUMU_hits_eval_thresh= by e=
#   $genome_seq_only=g      by g
#   $nrdb_seq_only=n        by n
#   $Evalue_thresh=         by E=
#   $Accumulate_matches=A   by A -A
#   $Lean_output=L          by L -L  # to remove search output to unclutter
#
# Author    : Sarah Teichmann and Jong Park, jong@salt2.med.harvard.edu
# Version   : 5.5
#--------------------------------------------------------------------------------
sub convert_bla_to_MSPA_file_format{
    my($i, $j, $k, @lines, $match_string_count,  $line_count, $query_string_count,
       $match_length, $Lean_output, $SEQ_NAME, $original_query,
       $duplicated_match_count, $new_sorted_name, $sorted_name, $verbose,
       $pdbd_seq_only, $which_iteration, $report_only_the_best,
       $genome_seq_only, $all_seq, $header_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
       $take_last_iter_PSI_BLA, $nrdb_seq_only, $system_mem_size,
       $get_the_final_iteration, $read_entry_lines, $verbose, $Accumulate_matches,
       $CONVERGED_sign_found, $Evalue_limit, $entry_and_alignment_found, $query,
       );
    my $match_leng_thresh=10;
    ### This localization is critial NOT my, as I use a sub which relies on this
    local(%hash_out, %accumulative_hits, $file, $score, $score_ori, $evalue,
          $evalue_ori, $seq_id, $query_range_start, $query_range_stop,
          $match_string_start, $match_string_stop, $matched, $matched_seq_name,
          $read_point_found, $summary_lines_found, $entry_found, %good_matches_list);
    $duplicated_match_count=0;
    $Evalue_thresh=40;
    $Evalue_limit=5;
    $PSI_BLA_ACCUMU_hits_eval_thresh=0.0001; ## default eval threshes
    $query=$original_query='query_seq'; ## default query seq name, to avoid blank name
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Processing the input arguments to get file and options etc
    #_____________________________________________________________
    for (@_){
        if(ref $_ eq 'ARRAY'){ @lines =@{$_};
        }elsif( ref $_ eq 'SCALAR' and -s ${$_} ){ $file=${$_};
        }elsif( -s $_ ){            $file=$_;
        }elsif(/^ *d *$/){          $pdbd_seq_only='d'; $all_seq=''; $genome_seq_only='';
            print "\n $0: convert_bla_to_MSPA_file_format,  You set \$pdbd_seq_only option, I will skip others.\n";
        }elsif(/^ *b *$/){          $report_only_the_best='b';
        }elsif(/^ *a *$/){          $all_seq='a'; $genome_seq_only=''; $pdbd_seq_only=''; $nrdb_seq_only='';
        }elsif(/^ *g *$/){          $genome_seq_only='g'; $all_seq=''; $pdbd_seq_only='';$nrdb_seq_only='';
        }elsif(/^ *n *$/){          $nrdb_seq_only='n'; $all_seq=''; $pdbd_seq_only=''; $genome_seq_only=''; }
        if(/^ *l *$/){          $take_last_iter_PSI_BLA='l'; $Accumulate_matches='' }
        if(/^ *v *$/){          $verbose='v'; }
        if(/^ *L *$/){          $Lean_output='L'; }
        if(/^\s*e=(\S+)/){          $evalue_thresh=$1; }
        if(/^\s*SEQ_NAME=(\S+)/i){  $query=$original_query=$SEQ_NAME=$1;  }
        if(/^\s*E=(\S+)\s*/){       $Evalue_limit=$1;          }
        if(/^\s*A$/){           $Accumulate_matches='A'; $take_last_iter_PSI_BLA=''; }
        if(/^ *i= *(\d+) *$/){ $which_iteration=$1; }
     }
     unless($which_iteration){  $get_the_final_iteration=1 }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
     # If the input file is gzipped, uncompress it to text file and then open
     #__________________________________________________________________
     if($file=~/\.gz *$/){
         open(BLA_FILE, "gunzip -c $file|") || die "\n# $0: Failed to open $file\n\n\n";
         if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;          }
     }else{
         open(BLA_FILE, "$file") || die "\n# !! $0: convert_bla_to_MSPA_file_format : Failed to open \"$file\"\n\n\n";
         if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;          }
     }

     ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
     ###                                                                           ###
     ###  (1) Main reading in .pbla file (or any extension)                        ###
     ###   by putting pattern matches which occur most, I can save comparisons     ###
     ###___________________________________________________________________________###
     while(<BLA_FILE>){
         unless(/^(\S+) .{30,} +\d+ +([\d\.e\-]+)\s*$/ or /^\s*$/){ ## remove summary and NEW lines to reduce memory takeup
            push(@lines, $_);
         }
     }
     close(BLA_FILE);
     BLA2: for ($i=0; $i< @lines; $i++){
         if($lines[$i]=~/^Query=\s+(\S+)/){  $query=$original_query=$1; $i++;
         }elsif($lines[$i]=~/\<\S\>\s{,4}Query=\<\S\>/i){  $query=$original_query=$SEQ_NAME; $i++;  }
         if($lines[$i]=~/^Database:\s+(\S+)/){         $source_DB=$1;  $i++;       }
         if($lines[$i]=~/^Searching\.\..+[done]?/i){   $which_iteration++;
            splice(@lines, 0, $i); $i=0;
         }

         MATCH_FOUND:
         if($lines[$i]=~/^\>(\S+)/){
              $matched=$1;
              if($matched !~/\|/){
              }elsif($matched=~/pdb\|(\S+)\|(\S+)$/i){ $matched="$1$2"
              }elsif($matched=~/^gi\|\S*?\|?([^\|]+)$/i
                 or  $matched=~/^\S+\|\S*\|([^\|]+)$/){ $matched=$1 }

              $i+=2;
              until( $lines[$i]=~/^\s+Score/){  $i++;  }
              if( $lines[$i]=~/^\s+Score\s+=\s+(\S+)\s+bits\s+\(\S+\)\,\s+Expect\s+=\s+(\S+)/i
                  or /^\s+Score\s+=\s+(\S+)\s+bits.+\,\s+Expect\s+=\s+(\S+)/i ){
                  ($score, $evalue)=($1, $2);
              }

              if($evalue=~/^e\-\d\d\d/){ $evalue="1".$evalue; } ## bug fix for short eval in blast distribution
              $i++;
              if( $lines[$i]=~/^\s+Identities\s+=\s+\S+\/(\S+)\s+\(\s*(\S+)\s*\%\)/i){
                  $match_length=$1;
                  if($match_length < $match_leng_thresh){  $matched=''; $i+=2; goto MATCH_FOUND;
                  }else{   $seq_id=$2/100;  }
              }
              $i++;

              MATCH_CONTINUED:
              if($lines[$i]=~/^Query\:\s+(\d+)\s+(\D+)\s+(\d+)/){
                   $query_string_count++;
                   $aligned_seq_query .= $2;
                   if($query_string_count==1){  ($query_range_start, $query_range_stop)=($1, $3);
                   }elsif($query_string_count > 1){ $query_range_stop=$3;     }
                   $i+=2;
                   if($lines[$i]=~/^Sbjct\:\s+(\d+)\s+(\D+)\s+(\d+)/i){
                        $match_string_count++;
                        $aligned_seq_match .= $2;
                        if($match_string_count==1){ ($match_string_start, $match_string_stop)=($1, $3);
                        }elsif($match_string_count > 1){ $match_string_stop=$3;      }
                   }
                   $i++;
                   goto MATCH_CONTINUED;
              }
              $sorted_name=join(' ', sort("$query\_$query_range_start\-$query_range_stop",
                                          "$matched\_$match_string_start\-$match_string_stop"));
              if($Evalue_thresh >= $evalue){
                  $read_point_found=$which_iteration;
                  if($hash_out{$sorted_name}=~/^\S+\s+(\S+)\s+\S+/){
                      if($1 > $evalue){
                          $hash_out{$sorted_name}=sprintf("%-s\t%-5s\t%-4s\t%-4s %-s %-s %-3s %-s\t%-8s\t%s %s<=>%s\n",
                                     $score, $evalue, $seq_id, $query_range_start, $query_range_stop,
                                     $query, $match_string_start, $match_string_stop, $matched,
                                     $read_point_found, $aligned_seq_query, $aligned_seq_match);
                      }
                  }else{
                      $hash_out{$sorted_name}=sprintf("%-s\t%-5s\t%-4s\t%-4s %-s %-s %-3s %-s\t%-8s\t%s %s<=>%s\n",
                                 $score, $evalue, $seq_id, $query_range_start, $query_range_stop,
                                 $query, $match_string_start, $match_string_stop, $matched,
                                 $read_point_found, $aligned_seq_query, $aligned_seq_match);
                  }
                  #print ALIGNMENT ">$sorted_name\n$aligned_seq_query\n$aligned_seq_match\n";
              }
              $query_string_count=$score=$evalue=$seq_id=$query_range_stop=$query_range_start=
              $match_string_stop=$mspaa_line=$new_sorted_name=$match_string_count=$aligned_seq_query=$aligned_seq_match='';
              goto MATCH_FOUND;
         }
     }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
     # CLeaning up the BLA file if $Lean_output is set
     #_____________________________________________________
     $gzipped_search_file="$file\.gz";
     if($Lean_output ){ ## If Lean_out opt is set and $file exists and %hash_out is not empty, remove $file
          if(-s $file){
                  unlink($file);  ## removes fam_8_8.pbla etc,
          }elsif(-s $gzipped_search_file){
                  unlink($gzipped_search_file); ## removes fam_8_8.pbla.gz etc,
          }else{
             print "\n# (E) convert_bla_to_MSPA_file_format: tried to remove search out file for \$Lean_output opt,
                   but failed. Something is wrong. Think! or report to jong\@salt2.med.harvard.edu,
                   jong\@mrc-lmb.cam.ac.uk, sat\@mrc-lmb.cam.ac.uk, jong_p\@hotmail.com\n";
                   exit;
          }
     }
     return(\%hash_out);
}





#________________________________________________________________________________
# Title     : convert_bla_to_mspa
# Usage     : %hash_out_final=%{&convert_bla_to_mspa(\$file, [$Lean_output])};
# Function  : reads in PSI blast output and produces MSP file format.
#             Takes all the good hits below certain threshold in multiple iteration
#             Reports the best evalue with a given sequence name
# Example   : %hash_out=%{&convert_bla_to_mspa(\$file)};
# Keywords  : pbla_to_mspa, blast_to_mspa, bla_2_mspa, blastp_to_mspa_format,
#             blast_to_mspa_format, convert_bla_to_mspa, convert_bla_to_mspa_files
#             bla_to_mspa
# Options   :
#   $pdbd_seq_only  d   for getting dxxxx_ like seq names only(pdb40d names for examp)
#   $all_seq  a         for forcing all seq conversion
#   $which_iteration= by i=    # choose which iteration result you want to take
#   $which_iteration   as just a digit
#   $report_only_the_best=b by b -b
#   $take_last_iter_PSI_BLA=l by l
#   $PSI_BLA_ACCUMU_hits_eval_thresh= by e=
#   $genome_seq_only=g      by g
#   $nrdb_seq_only=n        by n
#   $evalue_thresh=         by E=
#   $Accumulate_matches=A   by A -A
#   $Lean_output=L          by L -L  # to remove search output to unclutter
#
# Author    : Sarah Teichmann and Jong Park, jong@salt2.med.harvard.edu
# Version   : 5.2
#--------------------------------------------------------------------------------
sub convert_bla_to_mspa{
    my($i, $j, $k, @lines, $match_string_count,  $line_count, $query_string_count,
       $match_length, $Lean_output, $SEQ_NAME, $original_query,
       $duplicated_match_count, $new_sorted_name, $sorted_name, $verbose,
       $pdbd_seq_only, $which_iteration, $report_only_the_best,
       $genome_seq_only, $all_seq, $header_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
       $take_last_iter_PSI_BLA, $nrdb_seq_only,
       $get_the_final_iteration, $read_entry_lines, $verbose, $Accumulate_matches,
       $CONVERGED_sign_found, $Evalue_limit, $entry_and_alignment_found, $query);
    my $match_leng_thresh=10;
    ### This localization is critial NOT my, as I use a sub which relies on this
    local(%hash_out, %accumulative_hits, $file, $score, $score_ori, $evalue,
          $evalue_ori, $seq_id, $query_range_start, $query_range_stop,
          $match_string_start, $match_string_stop, $matched, $matched_seq_name,
          $read_point_found, $summary_lines_found, $entry_found, %good_matches_list);
    $duplicated_match_count=0;

    $Evalue_limit=5;
    $PSI_BLA_ACCUMU_hits_eval_thresh=0.0001; ## default eval threshes
    $query=$original_query='query_seq'; ## default query seq name, to avoid blank name
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Processing the input arguments to get file and options etc
    #_____________________________________________________________
    for (@_){
        if(ref $_ eq 'ARRAY'){ @lines =@{$_};
        }elsif( ref $_ eq 'SCALAR' and -s ${$_} ){ $file=${$_};
        }elsif( -s $_ ){            $file=$_;
        }elsif(/^\s*d\s*$/){          $pdbd_seq_only='d'; $all_seq=''; $genome_seq_only='';
            print "\n $0: convert_bla_to_mspa,  You set \$pdbd_seq_only option, I will skip others.\n";
        }elsif(/^\s*b\s*$/){          $report_only_the_best='b';
        }elsif(/^\s*a\s*$/){          $all_seq='a'; $genome_seq_only=''; $pdbd_seq_only=''; $nrdb_seq_only='';
        }elsif(/^\s*g\s*$/){          $genome_seq_only='g'; $all_seq=''; $pdbd_seq_only='';$nrdb_seq_only='';
        }elsif(/^\s*n\s*$/){          $nrdb_seq_only='n'; $all_seq=''; $pdbd_seq_only=''; $genome_seq_only=''; }
        if(/^\s*l\s*$/){          $take_last_iter_PSI_BLA='l'; $Accumulate_matches='' }
        if(/^\s*v\s*$/){          $verbose='v'; }
        if(/^\s*L\s*$/){          $Lean_output='L'; }
        if(/^\s*e=(\S+)/){          $PSI_BLA_ACCUMU_hits_eval_thresh=$1; }
        if(/^\s*SEQ_NAME=(\S+)/i){  $query=$original_query=$SEQ_NAME=$1;  }
        if(/^\s*E=(\S+)\s*/){       $Evalue_limit=$1;          }
        if(/^\s*A$/){           $Accumulate_matches='A'; $take_last_iter_PSI_BLA=''; }
        if(/^\s*i=\s*(\d+)\s*$/){ $which_iteration=$1; }
     }
     unless($which_iteration){  $get_the_final_iteration=1 }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
     # If the input file is gzipped, uncompress it to text file and then open
     #__________________________________________________________________
     if($file=~/\.gz\s*$/){
         open(BLA_FILE, "gunzip -c $file|") || die "\n# $0: Failed to open $file\n\n\n";
         if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;          }
     }else{
         open(BLA_FILE, "$file") || die "\n# !! $0: convert_bla_to_mspa : Failed to open \"$file\"\n\n\n";
         if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;          }
     }
     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # UP to NOW is frivalous option handling stuff
     #_______________________________________________________

     ###~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~###
     ###                                                                           ###
     ###  (1) Main reading in .pbla file (or any extension)                        ###
     ###   by putting pattern matches which occur most, I can save comparisons     ###
     ###___________________________________________________________________________###
     BLA: while(<BLA_FILE>){

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
         # This is just to skip garbage lines
         #_______________________________________________________
         if(/^\s*$/ or /^ \s+Length\s+\=\s+\d+\s*$/){      next     }  ## skipping some junk lines

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # (1) The most important READING HERE!
         #___________________________________________
         if($entry_and_alignment_found or ($summary_lines_found and /^\>\S/)){
              ($matched, $entry_and_alignment_found)
                  =&match_seq_entry_and_alignment_block_in_BLAST_output($original_query);
              $summary_lines_found=0;
         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # (3) '> xxxx '  New sequence entry, '>' starts
         #__________________________________________________________
         }elsif(/^(\S+)\s.{29,}\s\d+\s+(\S{3,8})\s*$/){   ## mind the size of space!!
              $name_matched=$1;
              $evalue=$2;
              &match_summary_head_lines_in_BLAST_output($name_matched, $evalue);
         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~
         # (4) 'Searching......done'  line indicates new search step(iteration)
         #_________________________________________________________________________
         }elsif( /^\s{0,4}Searching\.\.\.+[done]?/i ){
             $which_iteration=&match_Searching_dot_line_in_BLAST_output($which_iteration);
             $summary_lines_found=1;
             next BLA;
         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # (5) Check if it converged before the given -j value
         #________________________________________________________
         }elsif(/^\s*CONVERGED/){
             $CONVERGED_sign_found=1;
             $which_iteration=$read_point_found;
             $entry_and_alignment_found=1;
             next BLA;
         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # (6) Extracting query seq name(this is the only place to get it)
         #__________________________________________________________________
         }elsif(/^\s{0,4}Query=\s+(\S+)/){
              $query=$original_query=$1;
              next BLA;
         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # (6-1) Following is to handle the HTML version of PSI output
         #___________________________________________________________________
         }elsif(/\<\S\>\s*Query=\<\S\>/i){ $query=$original_query=$SEQ_NAME;
             next BLA;  # <b>Query=</b>
         }elsif(eof){
              @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
                    \%accumulative_hits, $SEQ_NAME,$matched,$evalue, $score, $seq_id,
                    $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                    $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                    $take_last_iter_PSI_BLA, $accumulative_hits_eval_thresh, $Evalue_limit)};
              %hash_out=         %{$out_from_put_mspa_lines[0]};
              %accumulative_hits=%{$out_from_put_mspa_lines[1]};
              $read_point_found= $out_from_put_mspa_lines[2];
              last;
         }elsif(/^\s+\*+\s+No hits found\s+\*+/i){
              print "\n $_ \n";
              last;
         }
     }
     close(BLA_FILE);

     unless( $take_last_iter_PSI_BLA){
         print "\n# >> ACCUMULATIVE HITS are reported as you did not set \$take_last_iter_PSI_BLA opt!!\n";
         %hash_out=(%hash_out, %accumulative_hits);
     }
     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
     # CLeaning up the BLA file if $Lean_output is set
     #_____________________________________________________
     $gzipped_search_file="$file\.gz";
     if($Lean_output ){ ## If Lean_out opt is set and $file exists and %hash_out is not empty, remove $file
          if(-s $file){
                  unlink($file);  ## removes fam_8_8.pbla etc,
          }elsif(-s $gzipped_search_file){
                  unlink($gzipped_search_file); ## removes fam_8_8.pbla.gz etc,
          }else{
             print "\n# (E) convert_bla_to_mspa: tried to remove search out file for \$Lean_output opt,
                   but failed. Something is wrong. Think! or report to jong\@salt2.med.harvard.edu,
                   jong\@mrc-lmb.cam.ac.uk, sat\@mrc-lmb.cam.ac.uk, jong_p\@hotmail.com\n";
                   die;
          }
     }
     return(\%hash_out);

            sub match_summary_head_lines_in_BLAST_output{
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                 # reading the search summary lines. Save time by selecting which match to parse
                 # "d2sga__ 2.35.1.1.3 Protease A [(Streptomyces griseus), strain k1]    273  7e-74"
                 #_________________________________________________________________________________________
                 $matched_seq_name=$_[0];
                 $evalue=$_[1];

                 if($matched_seq_name=~/pdb\|(\S+)\|(\S+)$/i){ $matched_seq_name="$1$2"
                 }elsif($matched_seq_name=~/^gi\|\S*?\|?([^\|]+)$/i
                    or  $matched_seq_name=~/^\S+\|\S*\|([^\|]+)$/){ $matched_seq_name=$1
                 }
                 if($evalue <= $Evalue_limit){
                     if($pdbd_seq_only and ($matched_seq_name=~/^pdb_/
                        or $matched_seq_name=~/^[cde]\d\w{3,6}/)
                        or $matched_seq_name=~/^ds[\d\_]+$/){
                         $good_matches_list{$matched_seq_name}=$matched_seq_name;
                     }elsif(!$pdbd_seq_only){
                         $good_matches_list{$matched_seq_name}=$matched_seq_name;
                     }
                 }
                 return($matched_seq_name, $evalue);
            }

            sub match_seq_entry_and_alignment_block_in_BLAST_output{
                $entry_and_alignment_found=1;
                $original_query=$_[0];

                if(/^\> {0,4}(\S+)/){
                     $temp_match=$1;
                     if($temp_match=~/pdb\|(\S+)\|(\S+)$/i){ $temp_match="$1$2"
                     }elsif($temp_match=~/^gi\|\S*?\|?([^\|]+)$/i
                        or  $temp_match=~/^\S+\|\S*\|([^\|]+)$/){ $temp_match=$1 }

                     unless($good_matches_list{$temp_match}){  next }
                     if($match_string_count){ ## $match_string_count is incremented only by 'Sbjct' line
                           @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
                                  \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                  $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                  $match_string_stop, $read_point_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
                                  $take_last_iter_PSI_BLA, $PSI_BLA_ACCUMU_hits_eval_thresh, $Evalue_limit)};
                           %hash_out=         %{$out_from_put_mspa_lines[0]};
                           %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                           $read_point_found= $out_from_put_mspa_lines[2];
                           $match_string_count=0;
                           $duplicated_match_count=0;
                     }

                     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                     #  Only with new seq entry, I count the pair occurrances
                     #__________________________________________________________________
                     $query=$original_query;
                     $query_string_count='';
                     $matched=$temp_match; ## this should be here, after if
                     $sorted_name=join(' ', sort($query, $matched) );
                     $match_string_count=0;
                }
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # (1) Matching  Score =  325 bits (824), Expect = 6e-89           << 2 >>
                #________________________________________________________________________________
                elsif( /^\s*Score\s*\=\s*(\S+)\s*bits\s+\(\S+\)\,\s*Expect\s*=\s*(\S+)/i
                    or /^\s*Score\s*\=\s*(\S+)\s*bits.+\,\s*Expect\s*=\s*(\S+)/i){
                    $score_ori=$1;  $evalue_ori=$2;
                    if($evalue_ori=~/^e\-\d\d\d/){ $evalue_ori="1".$evalue_ori; } ## bug fix for short eval in blast distribution

                    if($match_string_count){ # $match_string_count is increased when Sbjct word is found
                         if($evalue > $Evalue_limit){ $evalue=$evalue_ori; $score=$score_ori; }
                         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                         # When Only the first match(best evalue) is required, write mspa line and reset $entry_found var
                         #_________________________________________________________________________________________________
                         if($report_only_the_best){
                              #print "      (5)  \$report_only_the_best is set\n" if $verbose;
                              @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
                                      \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                      $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                      $match_string_stop, $read_point_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
                                      $take_last_iter_PSI_BLA, $PSI_BLA_ACCUMU_hits_eval_thresh, $Evalue_limit)};
                              %hash_out=         %{$out_from_put_mspa_lines[0]};
                              %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                              $read_point_found= $out_from_put_mspa_lines[2];
                              #$entry_found=0;
                         }else{
                              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~`
                              # duplicated match count means, query matched more than one region of a match seq
                              #__________________________________________________________________________________
                              $duplicated_match_count++;
                              $sorted_name="$sorted_name $duplicated_match_count";
                              @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
                                      \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                      $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                      $match_string_stop, $read_point_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
                                      $take_last_iter_PSI_BLA, $PSI_BLA_ACCUMU_hits_eval_thresh, $Evalue_limit)};
                              %hash_out=         %{$out_from_put_mspa_lines[0]};
                              %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                              $read_point_found= $out_from_put_mspa_lines[2];
                         }
                         $score=$score_ori; $evalue=$evalue_ori;
                    }else{
                         $evalue=$evalue_ori; $score=$score_ori;
                    } ## to next line

                    sub reset_all_the_vars{
                         #print "            !!!!  Reseting all the vars !!!!\n" if $verbose;
                         $query_string_count=$score=$evalue=$seq_id=$query_range_stop=$query_range_start='';
                         $match_string_stop=$mspa_line=$new_sorted_name=$match_string_count='';
                     }
                }
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                # (2) Matching   Identities = 158/158 (100%), Positives = 158/158 (100%)    ,
                #____________________________________________________________________________________
                elsif( /^ {0,4}Identities\s+=\s+\S+\/(\S+)\s+\(\s*(\S+)\s*\%\)/i){
                     $query_string_count=$match_string_count=0;
                     $seq_id=$2/100;
                     $match_length=$1;
                     if($match_length < $match_leng_thresh){  $match_string_count=1; }
                }
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # (3) Matching 'Query: 2 GIRAATSQEINELT..' line    ,
                #_________________________________________________________________
                elsif(/^ {0,4}Query\:?\s+(\d+)\s+\D+\s+(\d+)/){
                     $query_string_count++;
                     $query_line_found=1;
                     if($query_string_count==1){      $query_range_start=$1;   $query_range_stop =$2;
                     }elsif($query_string_count > 1){ $query_range_stop=$2;     }
                }
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # (4) Matching 'Sbjct: 2 GIRAATSQEINELT..' line
                #_________________________________________________________________
                elsif($query_line_found and /^ {0,4}Sbjct\:?\s+(\d+)\s+[\w\-]+\s+(\d+)/i){
                     $match_string_count++;
                     $subject_line_found=1;
                     if($match_string_count==1){      $match_string_start=$1;
                                                      $match_string_stop =$2;
                     }elsif($match_string_count > 1){ $match_string_stop=$2;      }
                }
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # (5) Matching '   Database: ' line    ,                << END >>
                #_________________________________________________________________
                elsif(/^\s+Database:\s+\S+/ or eof){ # the very last write
                     if($evalue > $Evalue_limit){
                     }else{
                          @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
                                \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                $match_string_stop, $read_point_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
                                $take_last_iter_PSI_BLA, $PSI_BLA_ACCUMU_hits_eval_thresh, $Evalue_limit)};
                          %hash_out=         %{$out_from_put_mspa_lines[0]};
                          %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                          $read_point_found= $out_from_put_mspa_lines[2];
                     }
                }
                return($matched, $entry_and_alignment_found);
             }

             sub match_Searching_dot_line_in_BLAST_output{
                  $which_iteration=$_[0];
                  %good_matches_list=();
                  $read_point_found++;
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                  #  (3.3) Following is the KEY part for controlling iteration
                  #__________________________________________________________
                  if( $read_point_found < $which_iteration){
                       $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
                       $query_range_stop=$query_range_start=$match_string_stop=$mspa_line=$new_sorted_name='';
                       $duplicated_match_count=0;
                       if( !$Accumulate_matches){  %hash_out=(); } ## this is to remove any discarded pairs in the iteration
                  }elsif( $read_point_found == $which_iteration){

                  }elsif( $which_iteration and $read_point_found >  $which_iteration){
                       @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
                                  \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                  $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                  $match_string_stop, $read_point_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
                                  $take_last_iter_PSI_BLA, $PSI_BLA_ACCUMU_hits_eval_thresh, $Evalue_limit)};
                       %hash_out=         %{$out_from_put_mspa_lines[0]};
                       %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                       $read_point_found= $out_from_put_mspa_lines[2];
                       last;
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # If you did not set the which iteration option
                  #_________________________________________________________
                  }elsif(!$which_iteration){
                       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                       # (3.4) Default situation
                       #____________________________________________________________
                       print "\n# (WARN) You did not set \$which_iteration option \n\n" if $verbose;
                       if($read_point_found > 1){
                             @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
                                    \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                    $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                    $match_string_stop, $read_point_found, $PSI_BLA_ACCUMU_hits_eval_thresh,
                                    $take_last_iter_PSI_BLA, $PSI_BLA_ACCUMU_hits_eval_thresh, $Evalue_limit)};
                             %hash_out=         %{$out_from_put_mspa_lines[0]};
                             %accumulative_hits=%{$out_from_put_mspa_lines[1]};
                             $read_point_found= $out_from_put_mspa_lines[2];
                       }
                       $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
                       $query_range_stop=$query_range_start=$match_string_stop=$mspa_line=$new_sorted_name='';
                       $entry_found=$duplicated_match_count=0;
                       if( !$Accumulate_matches){  %hash_out=(); $entry_found=0; $duplicated_match_count=0;     }
                  }
                  return($which_iteration);
             }


}


#______________________________________________________________________________
# Title     : put_commas_in_number
# Usage     :
# Function  :
# Example   :
# Keywords  : commify
# Options   :
# Author    :
# Category  :
# Returns   :
# Version   : 1
#------------------------------------------------------------------------------
sub put_commas_in_number{
    my($input_num, $input_num_rv);
    $input_num=reverse(${$_[0]}) || reverse($_[0]);
    $input_num=~s/(\d\d\d)(?=\d)(?!\d*\.)/$1\,/g;
    $input_num_rv=reverse($input_num);
    return(\$input_num_rv);
}



#______________________________________________________________________________
# Title     : put_numbers_in_bins
# Usage     : @num=@{&put_numbers_in_bins(\@num, <binsize>, <max_num>)};
# Function  : bin numbers
# Example   : @num=@{&put_numbers_in_bins(\@num, 20, 1000)};
# Keywords  : make_bar_charts_with_numbers, bin_numbers
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub put_numbers_in_bins{
    my($i, @numbers, @binned_numbers, $bin_number, $max_number, $final_cal_res);
    @numbers=@{$_[0]};
    $bin_number=$max_number=@numbers;
    if(ref($_[1]) eq 'SCALAR'){
         $bin_number=${$_[1]};
    }else{
       $bin_number=$_[1];
       print "\n# (W) put_numbers_in_bins needs bin number ref as 2nd arg";
    }
    if(ref($_[2]) eq 'SCALAR'){
         $max_number=${$_[2]};
    }else{
       $max_number=$_[2];
       print "\n# (W) put_numbers_in_bins needs bin number ref as 2nd arg\n";
    }
    for($i=0; $i< @numbers; $i++){
       $final_cal_res=($max_number/$bin_number)*int($numbers[$i]*($bin_number/$max_number)+1);
       push(@binned_numbers, $final_cal_res);
    }
    return(\@binned_numbers);
}

#______________________________________________________________________________
# Title     : put_this_process_in_the_background
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub put_this_process_in_the_background{
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Putting the editor into background from nik@tiuk.ti.com
    #__________________________________________________________
    die 0 if fork;         # basic background
    use POSIX qw(setsid);
    setsid();               # disassociate from terminal etc.
}



#______________________________________________________________________________
# Title     : put_mspa_lines_to_hash_from_bla
# Usage     : @out_from_put_mspa_lines=@{&put_mspa_lines_to_hash_from_bla(\%hash_out,
#                                        $query,$matched,$evalue, $score, $seq_id,
#                                        $sorted_name, $query_range_start,
#                                        $query_range_stop,$match_string_start,
#                                        $match_string_stop, $read_point_found,
#                                        $PSI_BLA_ACCUMU_hits_eval_thresh,
#                                        $take_last_iter_PSI_BLA)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.5
#------------------------------------------------------------------------------
sub put_mspa_lines_to_hash_from_bla{
    my (@finale_out, $sorted_name, $mspa_line, $evalue, $score, $matched,
        $seq_id, $query_range_start,$PSI_BLA_ACCUMU_hits_eval_thresh,
        $query_range_stop, $query, $match_string_start, $match_string_stop,
        $read_point_found, %hash_out, %accumulative_hits, $Evalue_thresh);
    $Evalue_thresh=10;
    %hash_out=%{$_[0]};         %accumulative_hits=%{$_[1]};
    $query=$_[2];               $matched=$_[3];
    $evalue=$_[4];              $score=$_[5];
    $seq_id=$_[6];              $sorted_name=$_[7];
    $query_range_start=$_[8];   $query_range_stop =$_[9];
    $match_string_start=$_[10]; $match_string_stop=$_[11];
    $read_point_found=$_[12];   $PSI_BLA_ACCUMU_hits_eval_thresh=$_[13];
    $take_last_iter_PSI_BLA=$_[14];
    $PSI_BLA_ACCUMU_hits_eval_thresh=$_[15];
    $Evalue_thresh=$_[16];
    $query  ="$query\_$query_range_start\-$query_range_stop";

    if($matched !~/^\S+\_\d+\-\d+\s*$/){         $matched="$matched\_$match_string_start\-$match_string_stop";
    }elsif($matched =~/^(\S+)\_\d+\-\d+\s*$/){   $matched="$1\_$match_string_start\-$match_string_stop"; }

    if($score=~/\S/ and $evalue=~/\S/ and $match_string_start=~/\S/ and $Evalue_thresh > $evalue){
        $mspa_line=sprintf("%-6s %-8s %-5s %-5s %-5s %-32s %-5s %-s\t%-s\t%-s\n",
                           $score, $evalue, $seq_id, $query_range_start, $query_range_stop,
                           $query, $match_string_start, $match_string_stop, $matched, $read_point_found);
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # This is where I really put the matches !!!
        #_____________________________________________________
        if($hash_out{$sorted_name}=~/^\S+\s+(\S+)\s+/){
            if($1 >= $evalue){
                $hash_out{$sorted_name}=$mspa_line;
            }
        }else{
            $hash_out{$sorted_name}=$mspa_line;
        }
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # This part is to rescue the hits dropped by matrix migration
        #_________________________________________________________________
        if(!$take_last_iter_PSI_BLA and $evalue <= $PSI_BLA_ACCUMU_hits_eval_thresh ){
            if($accumulative_hits{$sorted_name}){
                if($accumulative_hits{$sorted_name}=~/^[\t ]*\S+[\t ]+(\S+)[\t ]/){
                     if($evalue < $1){
                             $accumulative_hits{$sorted_name}=$mspa_line;   }   }
            }else{ $accumulative_hits{$sorted_name}=$mspa_line;     }
        }
    }else{
    }
    @finale_out=(\%hash_out, \%accumulative_hits, $read_point_found, $query,
                 $matched, $evalue, $score, $seq_id, $sorted_name,
                 $query_range_start, $query_range_stop, $match_string_start, $match_string_stop  );
    return(\@finale_out);
}







#________________________________________________________________________________
# Title     : convert_bla_multaln_to_msf
# Usage     : @msf_file_made=@{&convert_bla_multaln_to_msf(\@bla_file, [i=2])};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced,
#              when you use m6 option for PSI blast
#             this adds '00x' extensions to the repeatedly occurring seq names
#
# Example   : @msf_file_made=@{&convert_bla_multaln_to_msf(\@bla_file,
#                                              $verbose, "i=$iteration")};
# Keywords  : psi_blast_to_msf, psi_blast_multaln_to_msf
# Options   :
#   i=$iteration
#   v  for verbose
# Author    :
# Category  :
# Version   : 1.6
#--------------------------------------------------------------------------------
sub convert_bla_multaln_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($e_val_threshold)=0.0005;
		my(@template_query_seq, @keys, $present_iteration, $blank_line_counter,
			 %alignment_hash_subject, $seq_order, $choose_iteration, %final_output_hash,
			 $seq_name, %seq_names_in_block, $put_alphabet_to_number_only_name);
		$choose_iteration=1;
		if($vars{'i'}=~/(\d+)/){
			 $choose_iteration=$1;
		}
		if($char_opt=~/v/){ $verbose='v' }
		if($char_opt=~/a/){ $put_alphabet_to_number_only_name='a' }

		print "\n# $0: bla_multaln_to_msf, \$choose_iteration is $choose_iteration\n";

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Opening file
		#______________________________________________
		for($i=0; $i< @file; $i++){
				$file_base_name=${&get_base_names($file[$i])};
				print "\n# bla_multaln_to_msf: processing $file[$i]\n";
				my($present_iteration, %seq_names_in_block, $seq_name_ori, $sequence);
				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						#  Finds the query sequence, resets $start_point and next line
						#____________________________________________
						if(/^Query=\s*(\S+)/){
								$query_seq=$1;
								print "\n# The query sequence is: $query_seq\n";
						}elsif(/^Searching\.\.\.\.\.\.\.\./ or eof){ ### to make sure it gets the last one
								$present_iteration++;
								if($present_iteration > $choose_iteration){
										%final_output_hash=%alignment_hash_subject;
										last;
								}else{
										%final_output_hash=%alignment_hash_subject;
										%alignment_hash_subject=();
										%seq_names_in_block=();
										$seq_order='';
								}
						}elsif(/^(QUERY)\s+\d*\s*(\S\S+)\s*\d*$/){
								%seq_names_in_block=();
								$seq_name=$1;
								$seq_order=$seq_name;
								$seq_names_in_block{$seq_name}++;
								$alignment_hash_subject{$seq_name} .= $2; ## bug fix '.'
						}elsif(/^(\S+)\s+\d+\s+(\S+)\s+\d+\s*$/){
								$seq_name=$seq_name_ori=$1;
								$sequence=$2;

								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
								# This is to replace number only seq names to alphabetical ones
								#_____________________________________________________________
								if($put_alphabet_to_number_only_name and $seq_name=~/^\d+$/){
										$seq_name='T'.$seq_name;
										$seq_name_ori='T'.$seq_name_ori;
								}

								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
								# This is to handle multiple identical seq name entry
								#_____________________________________________________
								if($seq_names_in_block{$seq_name} > 0){
									 $seq_name=$seq_name."00$seq_names_in_block{$seq_name}";
								}
								$seq_order.=" $seq_name";
								$alignment_hash_subject{$seq_name}.=$sequence;
								$seq_names_in_block{$seq_name_ori}++; ## NOTE that it is $seq_name_ori not  $seq_name
						}elsif(/^(\S+)\s+(\-+)\s*$/){
								$seq_name=$seq_name_ori=$1;
								$sequence=$2;

								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
								# This is to replace number only seq names to alphabetical ones
								#_____________________________________________________________
								if($put_alphabet_to_number_only_name and $seq_name=~/^\d+$/){
										$seq_name='T'.$seq_name;
										$seq_name_ori='T'.$seq_name_ori;
								}

								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
								# This is to handle multiple identical seq name entry
								#_____________________________________________________
								if($seq_names_in_block{$seq_name} > 0){
									 $seq_name=$seq_name."00$seq_names_in_block{$seq_name}";
								}
								$seq_order.=" $seq_name";
								$alignment_hash_subject{$seq_name}.=$sequence;
								$seq_names_in_block{$seq_name_ori}++;  ## NOTE that it is $seq_name_ori not  $seq_name
						}

				}
				close(BLAST_OUTPUT);
				print "\n# finished reading in BLAST output(@file) \n";

				# now in %alignment_hash, I have many pairs like:
				# --------VAVCQNMGIGK--DGNLPWPPLRNEYKYFQR
				# --------WARKNKLGWGFELKGSMPSAPLITEQTYFKD
				# -----------------------KTWFSIPEKNRPLK
				# -----------------------KTWEEIPALDKELK
				print "\n# the seq order is: $seq_order\n" if $verbose;

				$output_msf="$file_base_name\.msf";
				print "\n# $0: running write_msf subroutine\n";
				&write_msf(\%final_output_hash, \$output_msf, "o=$seq_order");
				print "\n# $0: $output_msf is created\n";
				push(@final_out, $output_msf);
		}
		return(\@final_out);
}








#______________________________________________________________
# Title     : get_sub_hash
# Usage     : %sub_hash=%{&get_sub_hash(\%FASTA, \@list)};
# Function  : fetches hash keys and values by giving keys to
#             a hash
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : subhash, sub_hash, get_hash_elements, fetch_sub_hash
#             take_sub_hash, get_hash_by_keys, get_sub_hash_by_keys
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_sub_hash{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (%out_hash, @out_hash_all_ref);
	for($i=0; $i < @hash; $i++){
	   my %hash=%{$hash[$i]};
	   my @keys = keys %hash;
	   for($j=0; $j < @raw_string; $j++){
		  for($l=0; $l < @keys; $l++){
			if($keys[$l] eq $raw_string[$j]){
				$out_hash{$keys[$l]}=$hash{$keys[$l]};
			}
		  }
	   }
	   push(@out_hash_all_ref, \%out_hash);
	}
	if(@out_hash_all_ref > 1){
	   return(@out_hash_all_ref);
	}else{
	   return($out_hash_all_ref[0]);
	}
}



#______________________________________________________________
# Title     : get_smallest_file
# Usage     : $smallest_file_name=${&get_largest_file(@ARGV)};
# Function  : checks the size of files and returns the smallest
#             one's name. If a file is not present in pwd or
#             specified absolute path, it ignores it.
# Example   :
# Keywords  : choose_smallest_file, smallest_file, find_smallest_file
#             get_the_smallest_file, choose_the_smallest_file,
#             fetch_smallest_file, take_smallest_file, get_smaller_file,
# Options   : _  for debugging.
#             #  for debugging.
#             e  for extract the smallest from the input array
#                       leaving it one element less, in this case
#                       there will be two returning refs.
# Category  :
# Version   : 1.3
#--------------------------------------------------------------
sub get_smallest_file{
		my @in;
		if(ref $_[0] eq 'ARRAY'){
	 @in = @{$_[0]};
		}else{
	 @in = @_;
		}
		my $smallest=10000000000;
		my ($smallest_file, $i, $extract_opt);

		for($i=0; $i< @in; $i++){
	if(($in[$i]=~/^\-?e$/i)&&(!(-f $in[$i])) ){
	   $extract_opt=1;
	   splice(@in, $i, 1);
	   $i--;
	}
		}
		for($i=0; $i< @in; $i++){
	my $size=(-s $in[$i]);
	if($size < $smallest){
	   $smallest=$size;
	   if($extract_opt ==1){
		  print "\$extract_opt is $extract_opt \n";
		  push(@in, $smallest_file) if defined($smallest_file);
		  $smallest_file = splice(@in, $i, 1);
		  print "\n $smallest_file \n";
		  $i--;
	   }else{
		  $smallest_file=$in[$i];
	   }
	}
		}
		if($extract_opt==1){
				 return(\$smallest_file, \@in);
		}else{ return(\$smallest_file); }
}

#______________________________________________________________
# Title     : get_largest_file
# Usage     : $largest_file_name=${&get_largest_file(@ARGV)};
# Function  : checks the size of files and returns the largest
#             one's name. If a file is not present in pwd or
#             specified absolute path, it ignores it.
# Example   :
# Keywords  : choose_largest_file, largest_file, find_largest_file
#             get_the_largest_file, choose_the_largest_file, get_biggest_file
#             fetch_largest_file, take_largest_file, get_bigger_file, get_larger_file
# Options   : _  for debugging.
#             #  for debugging.
#             e  for extract the largest from the input array
#                       leaving it one element less, in this case
#                       there will be two returning refs.
# Category  :
# Version   : 1.4
#--------------------------------------------------------------
sub get_largest_file{
		my @in;
		if(ref $_[0] eq 'ARRAY'){
	 @in = @{$_[0]};
		}else{
	 @in = @_;
		}

		my ($largest_file, $largest, $i, $extract_opt);
		for($i=0; $i< @in; $i++){
	if(($in[$i]=~/^\-?e$/i)&&(!(-f $in[$i])) ){
	   $extract_opt=1;
	   splice(@in, $i, 1);
	   $i--;
	}
		}
		for($i=0; $i< @in; $i++){
	my $size=(-s $in[$i]);
	if($size > $largest){
						 $largest=$size;
						 if($extract_opt ==1){
										print "\$extract_opt is $extract_opt \n";
										push(@in, $largest_file) if defined($largest_file);
										$largest_file = splice(@in, $i, 1);
										print "\n $largest_file \n";
										$i--;
						 }else{
										$largest_file=$in[$i];
						 }
				}
		}
		if($extract_opt==1){
	 return(\$largest_file, \@in);
		}else{ return(\$largest_file); }
}



#______________________________________________________________
# Title     : get_sequence_complexity
# Usage     : print "\n", ${&get_sequence_complexity(\$seq)};
# Function  : caculates the single sequence's sequence complexity
#             If the seq given is larger than 20, it divides it into
#             frags of 20 aa and gets the average of it.
# Example   :  ${&get_sequence_complexity(\$seq)};
#             while $seq='TTTTTACDEFGHIKLMNPQRSTVWYAAAAACCCADFADFA'
# Warning   :
# Keywords  : sequence_complexity, calc_sequence_complexity,
#             calc_seq_complexity, get_seq_complexity, seg, get_sequence_entropy
# Options   : _  for debugging.
#             #  for debugging.
#             'w=' for window size as the first arg
# Returns   : Ref. for a scalar digit.
# Argument  : ref. of string.
# Category  :
# Version   : 1.3
#--------------------------------------------------------------
sub get_sequence_complexity{
	 my ($complexity, @seq,$i, $j, @frag);
	 my $win=20;
	 if(ref($_[0]) eq 'ARRAY'){
	  @seq=@{$_[0]};
	 }else{
	  $seq=${$_[0]} || $_[0];
	  @seq=split(//, $seq);
	 }
	 if(defined($_[1])){  $win=${$_[1]} || $_[1]; }

	 if(@seq <= $win){
	 my (%seq, @keys);
	 for($i=0; $i< @seq; $i++){
		$seq{$seq[$i]}++;
	 }
	 @keys= keys %seq;
	 $complexity=@keys/@seq;
	 }else{
	 my @frag=@{&divide_array(\@seq, "s=$win")};
	 my @complexity=();
	 for($i=0; $i < @frag; $i++){
		my (%seq, @keys);
		my @arr=@{$frag[$i]};
		for($j=0; $j< @arr; $j++){
		   $seq{$arr[$j]}++;
		}
		@keys=keys %seq;
		push(@complexity, @keys/$win);
	 }
	 $complexity=${&array_average(\@complexity)};
	 }
	 return(\$complexity);
}


#______________________________________________________________________________
# Title     : calculate_sequence_composition_identity
# Usage     : $ID=${&calculate_sequence_composition_identity(\%hash1, \%hash2)};
# Function  :
# Example   :
#             %hash1=qw(H 1 E 3 C 6);
#             %hash2=qw(H 5 E 2 C 3);
# Keywords  : calculate_composition_identiry, get_composition_identity_of_secture
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub calculate_sequence_composition_identity{
    my(%input_hash1, %input_hash2, %duplicate, $composi_ID,
       $rate_diff_sum, %ratio1, %ratio2, $sum_hash1_occur,
       $sum_hash2_occur, $i, @residue_keys, $final_rate_diff_sum);
    %input_hash1=%{$_[0]};
    %input_hash2=%{$_[1]};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # getting all the residue types (such as H, E, C....).
    #______________________________________________________
    @residue_keys= (keys %input_hash1, keys %input_hash2);
    @residue_keys = grep { ! $duplicate{$_}++ } @residue_keys;

    for($i=0; $i<@residue_keys; $i++){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Calculating the composition similarity (dot product of 2 vectors to get a scalar) %secture_composition_hash
       #_____________________________________________________________________________________________________________
       $sum_hash1_occur+=$input_hash1{$residue_keys[$i]};
       $sum_hash2_occur+=$input_hash2{$residue_keys[$i]};
    }
    if($sum_hash1_occur and $sum_hash2_occur){
       for($i=0; $i<@residue_keys; $i++){
          $ratio1{$residue_keys[$i]}=$input_hash1{$residue_keys[$i]}/$sum_hash1_occur;
          $ratio2{$residue_keys[$i]}=$input_hash2{$residue_keys[$i]}/$sum_hash2_occur;
          $rate_diff_sum += abs($ratio1{$residue_keys[$i]} - $ratio2{$residue_keys[$i]});
       }
       $final_rate_diff_sum=$rate_diff_sum/2;
       $composi_ID = 1 - $final_rate_diff_sum;
    }else{
       die "\n Something is wrong with \%hash1 and \%hash2 \n\n";
    }
    #print "==       $composi_ID ", %input_hash1, " " , %input_hash2, "\n";
    $composi_ID  = sprintf("%-.2f", $composi_ID);
    return(\$composi_ID);
}


#______________________________________________________________________________
# Title     : calculate_protein_structural_domain_interactibility_from_Y2H
# Usage     : $0 pdb100d_1_48.pdbg or pdb100d_1_48.mpfa
# Function  : This takes SCOP domain def file (such as SFINP or MPFA file: pdb100d_1_48.mpfa
#              and calculates the interactibility of superfamilies.
# Example   :
# Keywords  : calculate_SCOP_structural_domain_interactibility
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub calculate_protein_structural_domain_interactibility_from_Y2H{
    my($i, $j, $SFINP_file, @superfam, @sorted, %superfam, $num_member,
       $matched_line, @matched, %duplicate);
    $SFINP_file=$_[0];
    open(SFINP, "<$SFINP_file") || die;
    while(<SFINP>){
        if(/^(\d\S+)\s+(\d\S+)/){
            $superfam1=$1;
            $superfam2=$2;
            $superfam1=~s/_/\./g;
            $superfam2=~s/_/\./g;
            if($superfam1=~/\S/ and $superfam2=~/\S/){
                push(@{$superfam{$superfam1}}, $superfam2);
                push(@{$superfam{$superfam2}}, $superfam1);
            }
        }
    }

    $num_superfam=@superfam=sort keys %superfam;
    for($i=0; $i< @superfam; $i++){
        %duplicate=();
        $num_matches=@matched=grep { ! $duplicate{$_}++ } @{$superfam{$superfam[$i]}};
        $matched_line=join(',', @matched);
        #push(@protein_inter_versatility, "$superfam[$i]\t$num_matches\t@matched");
        push(@protein_inter_versatility, "$superfam[$i]\,$num_matches\,$matched_line");
    }
    @sorted=map {$_->[0]} sort { $b->[1]<=>$a->[1] } map {[$_, /^\d+.\d+.\d+\s+(\d+)/]} @protein_inter_versatility;
    for(@sorted){
       print "$_\n";
    }
    print "\n# number of total superfam: $num_superfam\n";
}



#______________________________________________________________________________
# Title     : calculate_protein_structural_domain_interactibility
# Usage     : $0 pdb100d_1_48.pdbg or pdb100d_1_48.mpfa
# Function  : This takes SCOP domain def file (such as PDBG or MPFA file: pdb100d_1_48.mpfa
#              and calculates the interactibility of superfamilies.
# Example   :
# Keywords  : calculate_SCOP_structural_domain_interactibility
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub calculate_protein_structural_domain_interactibility{
    my($i, $j, $PDBG_file, @superfam, %superfam, $num_member);
    $PDBG_file=$_[0];
    open(PDBG, "<$PDBG_file") || die;
    while(<PDBG>){  if(/>(\S+)\s(\d+\.\d+\.\d+)/){   push(@{$superfam{$2}}, $1);  }   }

    @superfam=sort keys %superfam;

    for($i=0; $i< @superfam; $i++){
       my($whole_protein, $domain_protein, @members, $interactibility);
       $num_member=@members=@{$superfam{$superfam[$i]}};
       #print "\n@members";
       for($j=0; $j< @members; $j++){
          if($members[$j]=~/__$/){
             $whole_protein++;
          }elsif($members[$j]=~/_$/){
             $domain_protein++;
          }elsif($members[$j]=~/[\._]\d$/){ ## >e1avo.7n 1.2
             #$domain_protein++;
             $whole_protein++;
          }elsif($members[$j]=~/[a-z]\d$/i){
             $domain_protein++;
          }
       }
       if($domain_protein < 1){
          $interactibility='All whole chain protein';
          $whole_chain_protein_num++;
          $whole_chain_protein_num_Mem+=$num_member;
       }else{
          $interactibility=sprintf("%-3.2f", $whole_protein/$domain_protein);
          if($interactibility == 0){
             $domainonly_protein_num++;
             $domainonly_protein_num_Mem+=$num_member;
          }else{
             $interactible_protein_domain++;
             $interactible_protein_domain_Mem+=$num_member;
          }
       }
       print "\n$superfam[$i] -> $interactibility ($whole_protein/$domain_protein)";
    }

    $non_interactible_protein=$domainonly_protein_num + $whole_chain_protein_num;
    $non_interactible_protein_Mem=$domainonly_protein_num_Mem + $whole_chain_protein_num_Mem;
    print "\n\n \$domainonly_protein_num is $domainonly_protein_num";
    print "\n \$interactible_protein_domain $interactible_protein_domain (\$non_interactible_protein: $non_interactible_protein)";
    print "\n \$whole_chain_protein_num is $whole_chain_protein_num\n\n";

    $total_Mem_interact =($domainonly_protein_num_Mem+$interactible_protein_domain_Mem+$whole_chain_protein_num_Mem);
    print "\n\n \$domainonly_protein_num_Mem is $domainonly_protein_num_Mem";
    print "\n \$interactible_protein_domain_Mem $interactible_protein_domain_Mem ($non_interactible_protein)";
    print "\n \$whole_chain_protein_num_Mem is $whole_chain_protein_num_Mem (total: $total_Mem_interact)\n\n";

}


#______________________________________________________________________________
# Title     : calculate_versatility_of_domain_interaction
# Usage     : &calculate_versatility_of_domain_interaction($PSDIP_file, $PDBG_file);
# Function  : generates various analysis files
# Example   :
#   PSDIP file is like:
#   PROTEIN_NONINTERACT     d168lb_ d168lc_ 168l  4.0 0 b: c:
#   PROTEIN_NONINTERACT     d168lc_ d168ld_ 168l  4.7 0 c: d:
#   PROTEIN_INTERACT        d168ld_ d168le_ 168l  4.2 5 d: e:
#   PROTEIN_INTERACT        d175la_ d175lb_ 175l a: b:
#   PROTEIN_INTERACT        d176la_ d176lb_ 176l a: b:
#
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub calculate_versatility_of_domain_interaction{
    my($PSDIP_file,  $PDBG_file, %pdbg, @sorted_pair, %superfam, %superfams,
       $i, %psdip, $superfam1, $superfam2, %superfam_pair_interact, $file1, $file2,
       $total_self_self_inter_pair, %self_nelf_inter, %self_self_inter,
       %superfam_count, @superfams, %superfam_versatility, $num_self_self_inter,
       $ratio_self_inter, $total_r_nelf_inter, $total_r_self_inter,
       @SF_mem_no_vs_Interact_PAIR_no1, @SF_mem_no_vs_Interact_PAIR_no2,
       @SF_mem_no_vs_Interact_PAIR_no3, $member_no, $match_SF_num,$PDBG_type,
       %non_redun_superfam_match, @superfam_matched, %superfam_pair_interact,
       %pdbg_desc, %total_superfam_number, $total_non_interaction_pair, $total_pairing,
       $total_interact_superfam_num);
    $file1=${$_[0]} || $_[0];   $file2 =${$_[1]} || $_[1];
    if($file1=~/\.[pdbg|mpfa]/ and $file2=~/\.psdip/){ $PDBG_file=$file1; $PSDIP_file=$file2
    }elsif($file1=~/\.psdip/ and $file2=~/\.[pdbg|mpfa]/){ $PDBG_file=$file2; $PSDIP_file=$file1 }
    unless(-s $PSDIP_file and -s $PDBG_file){ die "\n I need PSDIP and PDBG file <- calculate_versatility_of_domain_interaction\n";}

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Check what PDBG file(%) was used to add in output file names
    #_________________________________________________________________
    if($PDBG_file=~/pd\w+?(\d+)\w*_/){    $PDBG_type=$1;    }
    open(PDBG, "<$PDBG_file") || die;
    while(<PDBG>){
        if(/\>(\S+)\s(\d+\.\d+\.\d+)\S+\s+(.+)/){
           $pdbg{$1}=$2; $pdbg_desc{$2}.="\t$3\n";
           push(@{$superfam{$2}}, $1);
           $total_superfam_number{$2}++;
        }
    }
    close(PDBG);
    $total_superfam_number=keys %total_superfam_number;

    open(PSDIP, "<$PSDIP_file") || die;
    while(<PSDIP>){
       if(/PROTEIN_INTERACT\s+(\S+)\s+(\S+)/){
           $pdbd1=$1; $pdbd2=$2;
           @sorted_pair=sort($pdbd1, $pdbd2);
           $superfam1=$pdbg{$pdbd1};
           $superfam2=$pdbg{$pdbd2};
           if($superfam1 and $superfam2){
              push(@{$superfam_versatility{$superfam1}}, $superfam2);
              push(@{$superfam_versatility{$superfam2}}, $superfam1);
              if($pdbd1=~/__$/ and $pdbd2=~/__$/){
                  $whole_protein_domain_interact_pair++;
              }elsif($pdbd1=~/[a-z]_$/ and $pdbd2=~/[a-z]_$/){
                  $whole_chain_domain_interact_pair++;
              }else{
                  $whole_domain_domain_interact_pair++;
              }
           }else{ next }
           $psdip{"@sorted_pair"}="@sorted_pair";
           @sorted_superfam=sort ($superfam1, $superfam2);
           $superfam_pair_interact{"@sorted_superfam"}++;
           if($superfam1 eq $superfam2){
              $self_self_inter{$superfam1}=$superfam1;  $total_self_self_inter_pair++;
           }else{
              $self_nelf_inter{$superfam1}=$superfam2;  $total_self_nelf_inter_pair++;
           }
           $superfam_count{$superfam1}++;  $superfam_count{$superfam2}++;
       }elsif(/PROTEIN_NONINTERACT\s+(\S+)\s+(\S+)/){ $total_non_interaction_pair++ }
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Creating various files
    #__________________________________________________________________________
    open(SF_mem_no_vs_Interact_PAIR_no, ">SF_mem_no_vs_Interact_PAIR_no\_$PDBG_type\.txt") || die;
    open(SF_member_num_vs_Interact_SF_num, ">SF_member_num_vs_Interact_num\_$PDBG_type\.txt") || die;
    open(ALL_PAIR_vs_SUPERFAM_pair, ">ALL_PAIR_vs_SUPERFAM_pair\_$PDBG_type\.txt") || die;
    open(SUMMARY_FILE, ">versatility_summary_$PDBG_type\.piver") || die ; ## protein interaction versitility
    print SUMMARY_FILE "# made by: calculate_versatility_of_domain_interaction to make piver file\n";
    print SUMMARY_FILE "# Total superfam number: $total_superfam_number in $PDBG_file\n";

    $total_interact_superfam_num=@superfams=sort keys %superfam_versatility;
    for($i=0; $i< @superfams; $i++){
       my(%non_redun_superfam_match);
       unless($superfams[$i]=~/\S+/){ next; }
       $total_pair_inter_for_1_SF=@superfam_matched=@{$superfam_versatility{$superfams[$i]}};
       for($j=0; $j< @superfam_matched; $j++){
           $non_redun_superfam_match{$superfam_matched[$j]}=$superfam_matched[$j];
       }
       $match_SF_num=@non_redun_superfam_match=sort keys %non_redun_superfam_match;
       print SUMMARY_FILE "$superfams[$i] ($superfam_count{$superfams[$i]}) \t$match_SF_num \t@non_redun_superfam_match\n";
       print SUMMARY_FILE "$pdbg_desc{$superfams[$i]}\n";
       $member_no=@{$superfam{$superfams[$i]}};
       print "$superfams[$i]\t$member_no\t$match_SF_num\t$total_pair_inter_for_1_SF\n";
       print ALL_PAIR_vs_SUPERFAM_pair "$match_SF_num\t$total_pair_inter_for_1_SF\n";
       push(@SF_mem_no_vs_Interact_PAIR_no1, $member_no);
       push(@SF_mem_no_vs_Interact_PAIR_no2, $total_pair_inter_for_1_SF);
       push(@SF_mem_no_vs_Interact_PAIR_no3, $superfams[$i]);
       #print SF_mem_no_vs_Interact_PAIR_no "$total_pair_inter_for_1_SF \t$superfam_count{$superfams[$i]}\n";
       $SF_num_vs_INTER_num{$superfam_count{$superfams[$i]}}=$match_SF_num;
    }
    @SF_num_vs_INTER_num=sort {$a<=>$b} keys %SF_num_vs_INTER_num;
    for($i=0; $i< @SF_num_vs_INTER_num; $i++){
       print SF_member_num_vs_Interact_SF_num "$SF_num_vs_INTER_num[$i]\t$SF_num_vs_INTER_num{$SF_num_vs_INTER_num[$i]}\n";
    }
    close(SF_member_num_vs_Interact_num);

    @SF_mem_no_vs_Interact_PAIR_no1= @SF_mem_no_vs_Interact_PAIR_no1;
    for($i=0; $i< @SF_mem_no_vs_Interact_PAIR_no1; $i++){
       print SF_mem_no_vs_Interact_PAIR_no "$SF_mem_no_vs_Interact_PAIR_no1[$i]\t$SF_mem_no_vs_Interact_PAIR_no2[$i]\n";
    }
    close(SF_member_num_vs_Interact_num);
    $num_self_self_inter=@self_self_inter_superfam=sort keys %self_self_inter;
    $num_self_nelf_inter=@self_nelf_inter_superfam=sort keys %self_nelf_inter;
    $ratio_self_inter=$num_self_self_inter/($num_self_self_inter+$num_self_nelf_inter);
    $ratio_nelf_inter=$num_self_nelf_inter/($num_self_self_inter+$num_self_nelf_inter);
    $total_pairing=$total_self_self_inter_pair+$total_self_nelf_inter_pair;
    $total_possible_pairing = $total_pairing + $total_non_interaction_pair;
    $total_r_self_inter  =$total_self_self_inter_pair/$total_pairing;
    $total_r_nelf_inter  =$total_self_nelf_inter_pair/$total_pairing;
    $average_interaction =$total_self_self_inter_pair/$total_possible_pairing;
    $average_interaction_SF=$total_pairing/$total_interact_superfam_num;

    print "\n\$num_self_self_inter : $ratio_self_inter (\$total_self_self_inter_pair: $total_self_self_inter_pair)";
    print "\n\$num_self_nelf_inter : $ratio_nelf_inter (\$total_self_nelf_inter_pair: $total_self_nelf_inter_pair)";
    print "\n\$total_r_self_inter: $total_r_self_inter";
    print "\n\$total_r_nelf_inter: $total_r_nelf_inter\n";
    print "\nSF_mem_no_vs_Interact_PAIR_no\_$PDBG_type.txt ";
    print "\nSF_member_num_vs_Interact_num\_$PDBG_type.txt ";
    print "\nversatility_summary_$PDBG_type\.piver";
    print "\nALL_PAIR_vs_SUPERFAM_pair\_$PDBG_type.txt";
    print "\n# Total superfam number: $total_superfam_number in $PDBG_file";
    print "\n# Total interaction pair number: $total_pairing";
    print "\n# Average interaction all the possible pairs: $average_interaction";
    print "\n# Average interaction for all interacting superfam: $average_interaction_SF ($total_pairing/$total_interact_superfam_num)";
    print "\n# \$total_possible_pairing : $total_possible_pairing";
    print "\n# \$whole_chain_domain_interact_pair : $whole_chain_domain_interact_pair";
    print "\n# \$whole_domain_domain_interact_pair : $whole_domain_domain_interact_pair";
    print "\n\n";

    print SUMMARY_FILE "\n# Total pairing superfam number: $total_superfam_number in $PDBG_file";
    print SUMMARY_FILE "\n# Average interaction for all the possible pairs: $average_interaction";
    print SUMMARY_FILE "\n# Average interaction for all interacting superfam: $average_interaction_SF";
    print SUMMARY_FILE "\n# Total interaction pair number: $total_pairing";
    print SUMMARY_FILE "\n# \$total_possible_pairing : $total_possible_pairing\n";
    print SUMMARY_FILE "\n# \$whole_chain_domain_interact_pair : $whole_chain_domain_interact_pair";
    print SUMMARY_FILE "\n# \$whole_domain_domain_interact_pair : $whole_domain_domain_interact_pair";
    close(SUMMARY_FILE);
}




#______________________________________________________________________________
# Title     : calculate_distance_for_2_points_in_3D
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub calculate_distance_for_2_points_in_3D{
    my(@xyz_coord1, @xyz_coord2, $distance, $diff_x, $diff_y, $diff_z);
    @xyz_coord1=@{$_[0]};
    @xyz_coord2=@{$_[1]};
    $diff_x=abs($xyz_coord1[0] - $xyz_coord2[0]);
    $diff_y=abs($xyz_coord1[1] - $xyz_coord2[1]);
    $diff_z=abs($xyz_coord1[2] - $xyz_coord2[2]);
    $distance=sqrt($diff_x*$diff_x + $diff_y*$diff_y +  $diff_z*$diff_z);
    return(\$distance);
}


#______________________________________________________________________________
# Title     : calculate_ATGC_bases_ratio_window_scan
# Usage     :
# Function  :
# Example   :
# Keywords  : scan_DNA_to_calculate_ATGC_ratio
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.4  Aug. 1999
#------------------------------------------------------------------------------
sub calculate_ATGC_bases_ratio_window_scan{
    my($input_FASTA_file, $base, $strand, %strand, $window_size, $orig_seq,
        $seq_count, $i, $sample_interval, $ratio_deviation, $Final_ratio_file,
        $AT_ratio_atgcr_file, %sum_ATGC, $GC_ratio_atgcr_file, $name, $na_prv,
        $seq_counter);
    $sample_interval =10;
    $input_FASTA_file=${$_[0]} || $_[0];
    $window_size     =${$_[1]} || $_[1];
    $sample_interval =${$_[2]} || $_[2];
    $base=${&get_base_names($input_FASTA_file)};

    print "\n INPUT: $input_FASTA_file, Winsize: $window_size, Sample_Int: $sample_interval \n";
    open(INPUT_FASTA, "<$input_FASTA_file") || die "\n can not open $input_FASTA_file \n\n";
    while(<INPUT_FASTA>){
      if(/\>\s*(\S+)/){ ## eof is for single sequence FASTA input
         if(/\>\s*(\S+(\S)[\-\S]*)/){
            $name=$1;  $strand=$2;
            unless($strand =~/[WC]/){ $strand='_';  }
            $name=~s/\W/\_/g;
            $strand{$strand}=$strand;
         }
         $seq_counter++;
         if($seq_counter == 1){ $na_prv=$name; print "\n>$name Strand $strand. Seq. is processed. Wait!!! \n"; next
         }elsif($seq_counter > 1){
            unless($orig_seq){  $orig_seq=$_;   }
            $na_prv=&write_AT_GC_ratio_and_contents_ratio_files($base, $na_prv,
                                                                $sample_interval,
                                                                $window_size, $orig_seq,
                                                                \%strand);
            $orig_seq='';
         }

      }elsif($name and /^\s*[ATGC]+/i){
         $orig_seq .= $_;
         $na_prv=$name;
      }
    }
    print "\n Doing the last calculation\n";
    $na_prv=&write_AT_GC_ratio_and_contents_ratio_files($base, $na_prv,
                                                           $sample_interval,
                                                           $window_size, $orig_seq,
                                                           \%strand);
    return($seq_counter);
}


#______________________________________________________________________________
# Title     : calculate_composition_profix
# Usage     :
# Function  :
# Example   :
# Keywords  : calculate_composition_identity
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub calculate_composition_prosix{
    local($seqlet_match, $num_of_secture_type);
    $num_of_secture_type=3;
    #$running_av_compos_simil=${$_[0]} || $_[0];
    $seqlet_match=${$_[1]} || $_[1];

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Calculating the composition similarity (dot product of 2 vectors to get a scalar) %secture_composition_hash
    #_____________________________________________________________________________________________________________
    $sum_query_occur=$secture_compos_hash_query{'H'}+
                     $secture_compos_hash_query{'E'}+
                     $secture_compos_hash_query{'C'};
    $sum_match_occur=$secture_compos_hash_match{'H'}+
                     $secture_compos_hash_match{'E'}+
                     $secture_compos_hash_match{'C'};
    $Total_sum_match_occur += $sum_match_occur;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # This if check is necessary as I take only the self-self reverse sectures
    #_____________________________________________________________________________
    if($sum_query_occur and $sum_match_occur){
       print "      (5) Q=", %secture_compos_hash_query, "  M=", %secture_compos_hash_match , "\n";
       print "\n       (6) $Total_sum_match_occur \$seqlet_query_prev : $seqlet_query_prev $sum_query_occur,  \$seqlet_match : $seqlet_match $sum_match_occur\n";
       $ratio_query_H=$secture_compos_hash_query{'H'}/$sum_query_occur;
       $ratio_query_E=$secture_compos_hash_query{'E'}/$sum_query_occur;
       $ratio_query_C=$secture_compos_hash_query{'C'}/$sum_query_occur;
       $ratio_match_H=$secture_compos_hash_match{'H'}/$sum_match_occur;
       $ratio_match_E=$secture_compos_hash_match{'E'}/$sum_match_occur;
       $ratio_match_C=$secture_compos_hash_match{'C'}/$sum_match_occur;

       $rate_diff_H   =abs($ratio_query_H - $ratio_match_H);
       $rate_diff_E   =abs($ratio_query_E - $ratio_match_E);
       $rate_diff_C   =abs($ratio_query_C - $ratio_match_C);
       $rate_diff_sum +=($rate_diff_H + $rate_diff_E + $rate_diff_C)/2;
       $counter++;
       $composition_similarity= ($counter - ($rate_diff_sum))/$counter;
       $running_av_compos_simil=$composition_similarity;
       print "\n, $composition_similarity $running_av_compos_simil \n";
    }
    %secture_compos_hash_query=%secture_compos_hash_match=();
}


#______________________________________________________________________________
# Title     : calculate_scop_domain_alignment_ratios
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub calculate_scop_domain_alignment_ratios{
    my($i, @pdb40d, $pdb40d_file_count, $total_pair_count, $line_count,
       @alignment_ratio, @sorted_ratios, $pdb40d_fasta_file);
    $pdb40d_fasta_file=${$_[0]} || $_[0];

    open(PDB40D, "<$pdb40d_fasta_file") || die "\n Can not open $pdb40d_fasta_file \n";
    while(<PDB40D>){
      if(/\>(\S+)/){
         push(@pdb40d, $1);
      }
    }
    close(PDB40D);
    $pdb40d_entry_num=@pdb40d;
    for($i=0; $i<@pdb40d; $i++){
       if(-s "$pdb40d[$i]\.mspa"){
          $pdb40d_file_count++;
          $total_pair_count += $line_count;
          my($line_count, $query_domain_size, $align_ratio, $alignment_size);
          open(MSP, "<$pdb40d[$i]\.mspa") || die "\n Can not open $pdb40d[$i]\.mspa \n";
          print "\n $i/$pdb40d_entry_num: parsing $pdb40d[$i]\.mspa";
          LOOP: while(<MSP>){
             if(/^\S+\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)/){
                $line_count++;
                if($line_count == 1){
                   if($1 > 0.00005){ print "\n Strange, $1 is too high\n"; next LOOP; }
                   $query_domain_size=$3-$2+1;
                   push(@alignment_ratio, 1);
                }elsif($1 <= 0.0005){
                   $alignment_size=$3-$2+1;
                   $align_ratio=$alignment_size/$query_domain_size;
                   $align_ratio=${&trim_number(\$align_ratio, 2)};
                   if($align_ratio > 1){
                      print "\n$align_ratio: $pdb40d[$i]\.mspa \$alignment_size $alignment_size, \$query_domain_size $query_domain_size\n";
                      $align_ratio=1;
                   }
                   push(@alignment_ratio, $align_ratio);
                }
             }
          }
       }
    }
    print "\n Parsing finished, Writing now\n";
    @sorted_ratios=sort {$a<=>$b} @alignment_ratio;

    open(RATIO, ">domain_seq_alignment_ratio.list") || die "\n Can not create domain_seq_alignment_ratio.list \n";
    for($i=0; $i < @sorted_ratios; $i++){
       print RATIO "$sorted_ratios[$i]\n";
    }
    close(RATIO);
    print "\n \$pdb40d_file_count : $pdb40d_file_count, \$total_pair_count : $total_pair_count\n\n";
    return(\@sorted_ratios);
}


#________________________________________________________________________________
# Title     : calculate_single_linkage_match_with_parf_files
# Usage     :
# Function  : 43 => 1/100,000, 433=> 1/10,000 error rate (RFP) from 432680 possible
#              matches.
# Example   :
#    PARF file looks like this>
#   d1nsca_   d3nn9__   Homolog -664.92 2.43.1.1.3  2.43.1.1.2
#   d1dppa_   d2olba_   Homolog -617.41 3.68.1.1.6  3.68.1.1.1
#   d2ach.1a1 d9api.1a1 Homolog -556.38 5.2.1.1.3   5.2.1.1.4
#
# Keywords  : single_linkage
# Options   :
#       $number_of_Nomologs_to_read= by N=
# Author    :
# Returns   :
# Version   : 1.0
#--------------------------------------------------------------------------------
sub calculate_single_linkage_match_with_parf_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my (@parf_file, $number_of_lines_read,  %all_parf_file_hash,
        %parf_hash_homology_info, %parf_hash_score, $possible_combination,
        %parf_hash_classification_column_1, %parf_hash_classification_column_2,
        %parf_hash_Homologous_rank,  %parf_hash_Nomologous_rank, $homolog_counter, $nomolog_counter);

    my $number_of_lines_to_read   = 50000;
    my $number_of_Homologs_to_read= 7000; # 699 is for 1% error in 935 PDB40D
    my $number_of_Nomologs_to_read= 86; # 9 is for 1% error in 935 PDB40D

    if($vars{'l'}=~/\S+/){ $number_of_lines_to_read=$vars{'l'} }
    if($vars{'N'}=~/\S+/){ $number_of_Nomologs_to_read=$vars{'N'} }

    for($i=0; $i< @file; $i++){
        my ($counter);
        if($file[$i]=~/\.parf/i){
            push(@parf_file, $file[$i]);
        }else{
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Check if it is parf file inside the file
            #__________________________________________________________
            open(INPUT_FILE, "<$file[$i]");
            while(<INPUT_FILE>){
                 $counter++;
                 if(/^\s*\S+\s+\S+\s+[NFUH]omolog\s*\S*\s*\S*\s*\S*/){
                     push(@parf_file, $file[$i]);
                     last;
                 }else{
                     if($counter > 10){  ## giving up, it is not PARF file!
                         print "\n# $0 needs to have PARF files, others are ignored";
                         last;
                     }else{
                         next;
                     }
                 }
            }
            close(INPUT_FILE);
        }
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Processing the parf file contents
    #__________________________________________________________
    for($i=0; $i< @parf_file; $i++){
        my(@hash_index_array, $single_linkage_made_with_linkage_hash, $sorted_pair, @sorted_seq_name_pairs);
        open(PARF_FILE, "<$parf_file[$i]");
        while(<PARF_FILE>){
             if(/^\s*(\S+)\s+(\S+)\s+(\S+)\s*(\S*)\s*(\S*)\s*(\S*)/){
                  $number_of_lines_read++;
                  $seq_name1=$1;
                  $seq_name2=$2;
                  $sorted_pair=join('', sort($1, $2));
                  $homology_info=$3;
                  $score=$4;
                  $classification_column_1=$5;
                  $classification_column_2=$6;
                  if($classification_column_1 =~/^(\d+\.\d+\.\d+)\.*\d*\.*\d*/){
                       $superfamily_class=$1;
                  }

                  $single_linkage_made_with_linkage_hash=0; ## this should be reset

                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # Count the NO-HOmology
                  #____________________________________
                  if($homology_info =~/Nomolog/i){
                      $nomolog_counter++;
                  }elsif($homology_info =~/Homolog/i){

                      $homolog_counter++;       #<----- number hash key is crucial !!!

                      #print "$_ $homolog_counter\n";

                      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                      # checks if there were previous entries
                      #_______________________________________________
                      for $j (@hash_index_array){

                          if( $linkage_hash{$j}{$seq_name1} or $linkage_hash{$j}{$seq_name2} ){
                               #print "\n $j : $linkage_hash{$j}{$seq_name1} ";
                               $linkage_hash{$j}{$seq_name1}=$seq_name1;
                               $linkage_hash{$j}{$seq_name2}=$seq_name2;
                               $single_linkage_made_with_linkage_hash=1;
                               last;
                          }
                      }
                      unless($single_linkage_made_with_linkage_hash){
                          $linkage_hash{$homolog_counter}{$seq_name1}=$seq_name1;
                          $linkage_hash{$homolog_counter}{$seq_name2}=$seq_name2;
                          push(@hash_index_array, $homolog_counter);
                      }

                      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                      # Storing matched pairs
                      #__________________________________________
                      $matched_pairs{"$seq_name1 $seq_name2"}="$seq_name1 $seq_name2";
                  }

             }
             if($number_of_lines_read == $number_of_lines_to_read){  last;     }
             if($nomolog_counter == $number_of_Nomologs_to_read){    last;     }
             if($homolog_counter == $number_of_Homologs_to_read){    last;     }
        }
        close(PARF_FILE);

    }
    print "\n";


    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # producing result
    #_____________________________________________________
    @keys=sort {$a <=> $b } keys %linkage_hash;

    for($i=0; $i< @keys; $i++){
        $superfamily_member_count =@members=keys %{$linkage_hash{$keys[$i]}};
        $superfamily_member_pairs =$superfamily_member_count*($superfamily_member_count-1)/2;
        $possible_combination += $superfamily_member_pairs;
        %all_pair_hash=%{&get_all_possible_pairs_from_array(\@members)};
        @all_pairs=keys %all_pair_hash;
        print "\n>$keys[$i] $superfamily_member_pairs : @members << $possible_combination";
        for($j=0 ; $j< @all_pairs; $j++){
           if($matched_pairs{$all_pairs[$j]}){
                print "\n      \s*$all_pairs[$j]";
           }else{
                print "\n        $all_pairs[$j]";
           }
        }
        print "\n";
    }
    print "\n";
    print "\n \$homolog_counter : $homolog_counter / $possible_combination\n";


}




#______________________________________________________________________________
# Title     : calc_factorial
# Usage     :
# Function  :
# Example   :
# Keywords  : calculate_factorial, get_factorial
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub calc_factorial{
		my ($number, $factorial, $i);
		$factorial=1;
		if(ref($_[0]) eq 'SCALAR'){
				$number=${$_[0]};
		}else{
				$number=$_[0];
		}
		for($i=1; $i <=$number; $i++){
				$factorial=$factorial*$i;
		}
		return(\$factorial);
}


#______________________________________________________________
# Title     : make_swiss_index
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub make_swiss_index{
	 my ($swiss, %index);
	 if(-e "$ENV{'SWDIR'}seq.dat" ){
	  $swiss="$ENV{'SWDIR'}seq.dat";
	 }elsif( -f "$ENV{'SWISS'}seq.dat" ){
	  $swiss="$ENV{'SWISS'}seq.dat";
	 }elsif( -e 'seq.dat'){
	  $swiss="seq.dat";
	 }elsif( -f "$ENV{'swiss'}seq.dat"){
	  $swiss="$ENV{'swiss'}seq.dat";
	 }else{
	 ASK: print "\n Where is your swissprot seq.dat file?\n";
	  $swiss=<>;
	  chomp($swiss);
	  if(-e "$swiss"){
		 goto OPEN;
	  }else{
		 goto ASK;
	  }
	 }
	 OPEN: open(DB, "$swiss");
	 while(<DB>){

	 if(/^ID\s+(\w+)\s+/){
		$index{$1}=tell(DB);
		print "\n$1 $index{$1}";
	 }
		}
}

#______________________________________________________________________________
# Title     : fetch_seqlet_from_sequence
# Usage     : ($region_substr)=${&fetch_seqlet_from_sequence(\$ori_string, \$range)};
#              $range='10-13 30-33 34-35 36-36';
# Function  :
# Example   :
# Keywords  : get_string_from_string, get_sequence_from_sequence
#             fetch_substring_from_sequence, fetch_substring
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.2
#------------------------------------------------------------------------------
sub fetch_seqlet_from_sequence{
    my($original_string, $range, $i, $range, @regions, $temp_orig,
       $start, $stop, $region_size, $fetched_region, $prev_start, $prev_stop);
    $original_string=${$_[0]} || $_[0];

    if(ref($_[1]) eq 'SCALAR'){
       $range=${$_[1]};
       @regions=split(/ +/, $range);
       unless(@regions){ @regions=($range) } ## in the case of ONE single region input
    }elsif(ref($_[1]) eq 'ARRAY'){ @regions=@{$_[1]};
    }else{ $range=$_[1]; @regions=split(/ +/, $range); }

    for($i=0; $i< @regions; $i++){
       $temp_orig=$original_string;
       ($start, $stop)=split(/[\-\_]/, $regions[$i]);
       if($prev_stop >= $start){
          warn "\n Previous region stop is overlaping with new start point @regions\n";
       }
       $region_size=$stop-$start+1;
       $fetched_region .= substr($temp_orig, $start-1,  $region_size);
       ($prev_start, $prev_stop)=($start, $stop);
    }
    return(\$fetched_region);
}




#______________________________________________________________________________
# Title     : fetch_cd2list
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : holm@ebi.ac.uk,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub fetch_cd2list {
        local($cd1,$fsspfile,$stereo)=@_;
                open(IN,$fsspfile) || die "FATAL: bad file name $fsspfile\n";
                while(<IN>) { last if (/^##/); }
                print "<FORM METHOD=\"GET\" ACTION=\"$qz3\">\n\n";
                print $query->h1("Select (check) structural neighbours to display");
                print "<INPUT TYPE=\"submit\" Value=\"3D superimposition\" Name=3D\>";
                print "<INPUT TYPE=\"submit\" Value=\"Multiple alignment (wide)\" Name=1D\>";
                print "<INPUT TYPE=\"submit\" Value=\"Multiple alignment (narrow)\" Name=1Dshort\>";
                 print "<INPUT TYPE=\"submit\" Value=\"Multiple families (wide)\" Name=belvu\>";
                 print "<INPUT TYPE=\"submit\" Value=\"Multiple families (narrow)\" Name=short\>";
                print "<INPUT TYPE=\"reset\" Value=\"Reset selection\">\n";
                # print "<A HREF=$qz3?mode=options&filename=$fsspfile>(Set display options)</A>";
                print "<INPUT TYPE=\"hidden\" NAME=\"filename\" Value=$fsspfile>\n";
                print "<INPUT TYPE=\"hidden\" NAME=\"idecut\" Value=$idecut>\n";
                print "<INPUT TYPE=\"hidden\" NAME=\"stereo\" Value=$stereo>\n";
                $x=<IN>; print "<PRE><B>   ",substr($x,13,33),substr($x,70,50),"</B></PRE>\n";
                print "<PRE>\n";
                undef(@x);
                while(<IN>) {
                        last if(/^\/\//);
                        last if(/^## ALIGNMENTS/);
                        last if(/^## FOOTER/);
                        last if(!/\w/);
                        chop;
                        $long=substr($_,13,6); $cd2=$long; $cd2=~s/-//; $cd2=~s/ //g;
                        ($x,$name)=/ 0\s+0\s+\d+[\sS]+(\d+) [\sS]*(.*)$/;
                        if(!$x) { $domfssp2=$FSSPDIR.$cd2.'.fssp'; }
                        else { $domfssp2=$FSSPDIR.$cd2.'_'.$x.'.fssp'; }
                        if(-e $domfssp2 && $domain) {
#                               ($q,$idom)=/^\s+\d+\: \S+\s+(\S+) .* S \s+(\d+) /; $q.="\:$idom";
                                ($q,$idom)=/^\s+\d+\: \S+\s+(\S+) .* 0\s+0\s+\d+[\sS]+(\d+) /; $q.="\:$idom";
        print "<INPUT TYPE=\"checkbox\" $check Name='cd2list' Value=\"",$q,"\"> ",
"<A HREF=".$qz3."&filename=$domfssp2>$long</A>",substr($_,19,27),$x,' ',$name,"\n";
                        } elsif(-e $domfssp2) {
        print "<INPUT TYPE=\"checkbox\" $check Name='cd2list' Value=\"",$long,"\"> ",
"<A HREF=".$qz3."&filename=$domfssp2>$long</A>",substr($_,19,27),$x,' ',$name,"\n";
                        } else {
        print "<INPUT TYPE=\"checkbox\" $check Name='cd2list' Value=\"",$long,"\"> ",
"$long",substr($_,19,27),$x,' ',$name,"\n";
                        }
                }
                print "</PRE>\n";
                close(IN);
                print "</FORM>\n";
}

#______________________________________________________________________________
# Title     : put_text_page_header
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : holm@ebi.ac.uk,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub put_text_page_header {
        ($header)=@_;
        print<<EOS;
<HEAD><TITLE>EMBL: $header</TITLE></HEAD>
<H1>$header</H1>
<B>Please cite:</B> L. Holm and C. Sander (1996) Science 273(5275):595-60.
EOS
}

#______________________________________________________________________________
# Title     : put_text_page_footer
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : holm@ebi.ac.uk,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub put_text_page_footer {
        print<<EOS;
<HR>
$back
<P>
<P><EM>(C) L. Holm, EMBL-EBI, Hinxton, May 1996</EM>
</BODY>
</HTML>
EOS
}


#_____________________________________________________________________________
# Title     : fetch_sequence_from_db
# Usage     : %sequence=%{&fetch_sequence_from_db($input_file, \@string)};
# Function  : accept seq names (with or without ranges like _10-111 )
#              and produces hash ref.
#             As an option, you can write(xxxx.fasta) the sequences in pwd
#              with the file names with sequence names.
#             The default database used is FASTA format OWL database.
#              You can change this by S (for Swissprot either fasta
#              or full format), P for PDB fasta format data.
#             If you give the path name of DB, it will look for the
#              DB given.
#
#             This automatically checks sequence family number as
#               in >d1bpi___7.6.1
#               and attaches the number in final %sequence output
#
# Example   : %seq=%{&fetch_sequence_from_db(\@input, seq.fasta, seq.fasta.idx)};
#              while @input=qw( 11S3_HELAN_11-31 A1AB_CANFA A1AT_PIG )
# Keywords  : fetch_seq_from_db, fetch_sequence_from_database
# Options   : _  or #  for debugging.
#     w       for write fasta file
#     s=      for putting source DB file name manually
#     d=p100  for PDB100 fasta database from ENV
#     d=p40   for PDB40  fasta database from ENV
#     d=p     for PDB database (usually p100) from ENV
#     d=s     for Swissprot database from ENV
#     d=o     for OWL database from ENV
#     i=      for index filename. If not specified, this looks for it in the same dir as fast     
#     t=      for mspa_threshold
#  mspa_threshold=0.0005  # when MSP file is given as input for getting seq names
#
# Returns   : ref of hash
# Argument  : gets names of sequences
#             eg) \@array, \%hash, \$seq, while @array=(seq1, seq2), $seq='seq1 seq1'
#                                               %hash=(seq1, xxxx, seq2, yyyy);
#
# Version   : 3.8
#------------------------------------------------------------------------------
sub fetch_sequence_from_db{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	my(@DATABASE, @INDEX_FILE, %sequence, %seq_with_index, @input_seq_names,
	   %long_index, @Keys, $R_start, $NAME, $R_leng, $found_seq_count,
	   $eval_blastpgp_thresh, $seq_found1, $sequence, @keys, $index_file,
	   $source_DB_file, $matching_seq, $match_start, $match_stop);

    $eval_blastpgp_thresh=0.001; # default

    if($vars{'mspa_threshold'}=~/(\S+)/ or $vars{'mt'}=~/(\S+)/ or $vars{'t'}=~/(\S+)/ ){
        $eval_blastpgp_thresh=$1;
        print "\n# (INFO) YOU have set the \$eval_blastpgp_thresh $eval_blastpgp_thresh\n\n";
    }
    if($vars{'s'}=~/(\S+)/ or $vars{'DB'}=~/(\S+)/ ){
        $source_DB_file=$1; push(@DATABASE, $source_DB_file);
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# getting input seq names from all sources
	#________________________________________________________
	for(0..@hash){ # from the given hash (keys names)
	     push(@input_seq_names, keys %{$hash[$_]} );
	}
	for(0..@raw_string){  ## from given sequence names
	     push(@input_seq_names, split(/\s+/, $raw_string[$_]) );
	}
    for($i=0; $i<@file; $i++){  ## From MSP file input (get only MATCHED sequences)
        if($file[$i]=~/\.mspa/){
            print "\n# (INFO) MSP file input is detected !\n";
            my ($seq_with_range);
            open(MSP, $file[$i]);
            while(<MSP>){
                 if(/^\s*\S+\s+(\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(nr_\S+_DROME_\S+)\s+/){
                      push(@input_seq_names, $2) if $1 < $eval_blastpgp_thresh;
                 }elsif(/^\s*\S+\s+(\S+)\s+\S*\s*(\d+)\s+(\d+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s*/){
                      $matching_seq=$6; $match_start=$4; $match_stop=$5;
                      $evalue=$1;
                      if($matching_seq=~/^(\S+_\d+\-\d+)/){
                          $seq_with_range=$1;
                      }else{
                          $seq_with_range="$matching_seq\_$match_start\-$match_stop";
                      }
                      push(@input_seq_names, $seq_with_range) if $evalue < $eval_blastpgp_thresh;
                      print "\n# (INFO) pushing $seq_with_range" if $verbose;
                 }
            }
            close(MSP);
            splice(@file, $i, 1);
            $i--;
        }
    }

	print "\n# (1) fetch_sequence_from_db: \@raw_string has: ", scalar(@raw_string), " elements";
	print "\n# (2) fetch_sequence_from_db: No. of seq to fetch is:",scalar(@input_seq_names);
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Choose the DBs and INDEX for fetching sequences. All files input must be DATABAE or INDEXfile
	#___________________________________________
    if(@file > 0){   print "\n# (i) GOOD! Input \@file has \"@file\"";
        for($i=0; $i< @file; $i++){
            if(-T $file[$i] and $file[$i]=~/\.[smp]+fa[sta]?$/){
                push(@DATABASE, $file[$i]);
                if(-T "$file[$i]\.idx"){  push(@INDEX_FILE, "$file[$i]\.idx"); next; }
            }elsif((-T $file[$i]) and ($file[$i]=~/\.seq$/)){    push(@DATABASE, $file[$i]);
            }elsif((-T $file[$i]) and ($file[$i]=~/\.dat$/)){    push(@DATABASE, $file[$i]);
            }elsif(-T $file[$i] and $file[$i]=~/(\S+)\.idx$/){   push(@INDEX_FILE, $file[$i]);
               push(@DATABASE, $1) if -s $1; next;
            }else{
               print "\n#  WARN:  fetch_sequence_from_db:
               You put a file-name-like which is not a fasta DB. Error. I am removing $file[$i]";
               splice(@file, $i, 1);
               $i--;
            }
        }
	}elsif(@file < 1 and !@DATABASE){
	    print "\n# (E)fetch_sequence_from_db: \@file is <= 0, ERROR\n"; die;
	}

	if($vars{'d'}=~/^p[100]*$/){
	   if( -T  $ENV{'PDB_FASTA'} ){             push(@DATABASE,   $ENV{'PDB_FASTA'} );     }
	   elsif(  -T $ENV{'PDB_SEQ_FASTA'} ){      push(@DATABASE,   $ENV{'PDB_SEQ_FASTA'}  ); }
	   elsif(  -T $ENV{'PDB100_FASTA'} ){       push(@DATABASE,   $ENV{'PDB100_FASTA'} ); }
	   if(  -T $ENV{'PDB_FASTA_INDEX'} ){       push(@INDEX_FILE, $ENV{'PDB_FASTA_INDEX'} ); }
	}elsif( $vars{'d'}=~/^p\d+d$/ ){
	   if(  -T $ENV{'PDB100D_FASTA'} ){         push(@DATABASE,   $ENV{'PDB100D_FASTA'});     }
	   elsif(  -T $ENV{'PDBD100_FASTA'}  ){     push(@DATABASE,   $ENV{'PDBD100_FASTA'}); }
	   elsif(  -T $ENV{'PDB100D_SEQ_FASTA'}  ){ push(@DATABASE,   $ENV{'PDB100D_SEQ_FASTA'}); }
	   elsif(  -T $ENV{'PDBD100_SEQ_FASTA'}  ){ push(@DATABASE,   $ENV{'PDBD100_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB100D_SEQ_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB100D_SEQ_FASTA_INDEX'}); }
	   elsif(  -T $ENV{'PDBD100_SEQ_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDBD100_SEQ_FASTA_INDEX'}); }
	}elsif( $vars{'d'}=~/^p40/ ){
	   if(  -T $ENV{'PDB40_FASTA'} ){          push(@DATABASE,   $ENV{'PDB40_FASTA'});     }
	   elsif(  -T $ENV{'PDB40_SEQ_FASTA'}  ){  push(@DATABASE,   $ENV{'PDB40_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB40_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB40_FASTA_INDEX'}); }
	}elsif( $vars{'d'}=~/^p90/ ){
	   if(  -T $ENV{'PDB90_FASTA'}  ){         push(@DATABASE,   $ENV{'PDB90_FASTA'}    ); }
	   elsif(  -T $ENV{'PDB90_SEQ_FASTA'} ){   push(@DATABASE,   $ENV{'PDB90_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB90_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB90_FASTA_INDEX'}); }
	}
	if( $vars{'d'}=~/^s\s*$/){
	   if(  -T $ENV{'SWISS_FASTA'} ){          push(@DATABASE,   $ENV{'SWISS_FASTA'});     }
	   elsif(  -T $ENV{'SWISS_SEQ_FASTA'} ){   push(@DATABASE,   $ENV{'SWISS_SEQ_FASTA'}); }
	   elsif(  -T $ENV{"SWISS_DIR\/seq.fasta"} ){ push(@DATABASE,   $ENV{"SWISS_DIR\/seq.fasta"}); }
	   if(  -T $ENV{'SWISS_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'SWISS_FASTA_INDEX'}); }
	   elsif(  -T $ENV{'SWINDEX'} ){           push(@INDEX_FILE, $ENV{'SWINDEX'}); }
	}elsif( $vars{'d'}=~/^o\s*$/){
		if(  -T $ENV{'OWL_FASTA'} ){            push(@DATABASE,   $ENV{'OWL_FASTA'});     }
		elsif(  -T $ENV{'OWL_SEQ_FASTA'} ){     push(@DATABASE,   $ENV{'OWL_SEQ_FASTA'}); }
		elsif(  -T $ENV{"OWL_DIR\/owl.fasta"} ){   push(@DATABASE,   $ENV{"OWL_DIR\/owl.fasta"}); }
		if(  -T $ENV{'OWL_FASTA_INDEX'} ){      push(@INDEX_FILE, $ENV{'OWL_FASTA_INDEX'}); }
		print "\n# Fetching sequences from OWL\n";
	}elsif( $vars{'d'}=~/^n\s*$/){
	   if(  -T $ENV{'NRDB_FASTA'} ){            push(@DATABASE,   $ENV{'NRDB_FASTA'});     }
 	   elsif(  -T $ENV{'NRDB_SEQ_FASTA'} ){     push(@DATABASE,   $ENV{'NRDB_SEQ_FASTA'}); }
			 if(  -T $ENV{'NRDB_FASTA_INDEX'} ){      push(@INDEX_FILE, $ENV{'NRDB_FASTA_INDEX'}); }
			 elsif(  -T $ENV{'NRDB_FASTA_IDX'} ){     push(@INDEX_FILE, $ENV{'NRDB_FASTA_IDX'}); }
	   print "\n# Fetching sequences from OWL\n";
	}elsif( $vars{'d'}=~/^\S+\.\S+$/ and -T $vars{'d'} ){ push(@DATABASE, $vars{'d'} );     }
	if( $vars{'i'}=~/\S+\.\S+$/ and -T $vars{'i'} ){ push(@INDEX_FILE, $vars{'i'} );   }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Each given source DB file has to have INDEX file. Unless, make it here
    #_____________________________________________________________________
    if(@INDEX_FILE < 1 and @DATABASE > 0){
         for($i=0; $i< @DATABASE; $i++){
             if(-s "$DATABASE[$i]\.idx"){ push(@INDEX_FILE, "$DATABASE[$i]\.idx") }
         }
         unless(@INDEX_FILE){
             print "\n# (E) fetch_sequence_from_db: \@INDEX_FILE has less than 1 elem. Trying to make it\n";
             print "     fetch_sequence_from_db called make_seq_index_file to make @INDEX_FILE\n";
             if(${&make_seq_index_file(@DATABASE)}){
                  push(@INDEX_FILE, ${&make_seq_index_file(@DATABASE)});
             }else{ die "\n $0 : make_seq_index_file failed to make INDEX file(.idx) \n\n"; }
         }
	}elsif(@INDEX_FILE > 0 and @DATABASE > 0){
         if( ${&if_file_older_than_x_days("$DATABASE[0]\.idx", 5)} > 0 ){
             print "\n# (i) fetch_sequence_from_db: $DATABASE[0]\.idx is old, rerunning make_seq_index_file\n";
             $index_file=${&make_seq_index_file(\@DATABASE)};
             push(@INDEX_FILE, $index_file);
         }elsif((-s "$DATABASE[0]\.idx") > 20){
             print "\n# (i) fetch_sequence_from_db: $DATABASE[0]\.idx is larger than 20, USING IT";
             push(@INDEX_FILE, "$DATABASE[0]\.idx");
         }else{
             print "\n# (ERROR) fetch_sequence_from_db: Some weird error in pushing \$index_file to \@INDEX_FILE\n"; die;
         }
	}elsif(@DATABASE < 1){
	     print "\n\n# (W) \@INDEX_FILE \"@INDEX_FILE\", and \@DATABASE \"@DATABASE\" are not big enough(0)\n";
	     die;
    }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
     # To skip a seq name bug (from Sarah)
     #________________________________________________________
     if($input_seq_names[0]=~/\S\-\S+\-\S/){
              print  "\n# (W) NO good having more than 2 dashes in seq. name: $input_seq_names[0], dying \n";
              sleep(2);
     }

     ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     ##  Now I have @DATABASE, @INDEX_FILE, @input_seq_names
     ##_______________________________________________________________

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	#  Reading in index file to get 'seq' 'seek pos' to make %seq_with_index
	#__________________________________________________________________________
	print "\n#  fetch_sequence_from_db: \@INDEX_FILE @INDEX_FILE, \@DATABASE :@DATABASE\n";
	for($i=0; $i< @INDEX_FILE; $i++){
	   open(INDEX, "$INDEX_FILE[$i]");
	   while(<INDEX>){ if(/(\S+)\s+(\S+)/){  $long_index{$1}=$2;  }  }

	   for($j =0; $j < @input_seq_names; $j++){

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
           #  If DATABASE has sequence names with ranges already index the seq with ranges
           #____________________________________________________________________________________
           if($input_seq_names[$j]=~/(\S+\_\d+\-\d+)$/ and $long_index{$1}){
                   $seq_with_index{$1}=$long_index{$1};
           }elsif( ($input_seq_names[$j]=~/pdb_(\S+\_\d+\-\d+)/ or $input_seq_names[$j]=~/nr_(\S+\_\d+\-\d+)/ )
               and $long_index{$1}){
               $seq_with_index{$1}=$long_index{$1};
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
           #  If DATABASE has sequence names without ranges index the seq without ranges
           #____________________________________________________________________________________
           }elsif($input_seq_names[$j]=~/^(\S+)\_\d+\-\d+/ and $long_index{$1}){
                   $seq_with_index{$input_seq_names[$j]}=$long_index{$1}; # !!!! <--- This line is critical
           }elsif($input_seq_names[$j]=~/(\S+)\_\d+\-\d+/ and $long_index{"$1\_"}){ # to handle Tim's new pdb100.fasta files
                   $seq_with_index{$input_seq_names[$j]}=$long_index{"$1\_"};
                   print "\n# Warning: $1 (from $input_seq_names[$j]) matched with $1\_ in $INDEX_FILE[$i],
                                               I hope this is correct!!\n";
           }elsif($input_seq_names[$j]=~/nr_(\S+)\_\d+\-\d+/ and $long_index{"$1"}){ # to handle Tim's new pdb100.fasta files
                   print "           (W) $input_seq_names[$j] matched nr_XXXX_ddd-ddd format removing nr_\n";
                   $seq_with_index{$1}=$long_index{$1};
           }elsif($input_seq_names[$j]=~/pdb_(\S+)\_\d+\-\d+/ and $long_index{$1}){
                   print "           (W) $input_seq_names[$j] matched pdb_XXXX_ddd-ddd format removing pdb_\n";
                   $seq_with_index{$1}=$long_index{$1}; # !!!! <--- This line is critical
           }
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
           #  If input_seq_name has SCOP superfamily numbers
           #____________________________________________________________________________________
           elsif($input_seq_names[$j]=~/^(\S+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/ and $long_index{$1}){
                   $seq_with_index{"$1\_$2"}=$long_index{$1}; # !!!! <--- This line is critical
           }elsif($input_seq_names[$j]=~/\S/ and $long_index{$input_seq_names[$j]}){
                   $seq_with_index{$input_seq_names[$j]}=$long_index{$input_seq_names[$j]}
           }else{
               print chr(7);
               print "\n# (E) Wanted Intermediate, $input_seq_names[$j], has NO corrspndng indx in $INDEX_FILE[$i]";
           }

	   }
	   close INDEX;

	   if ( scalar(keys %seq_with_index) < 1){
			print "\n# fetch_sequence_from_db: \%seq_with_index is too small, ERROR?\n";
	   }
	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	#  Fetching sequences from DATABASE
	#_______________________________________________________________
	print "\n# fetch_sequence_from_db: Fetching seqs from @DATABASE with  @INDEX_FILE ";
	@Keys= sort {$seq_with_index{$a} <=> $seq_with_index{$b} } keys %seq_with_index;        ## <<< NOTE it is @Keys, not @keys
	print "\n# (3) fetch_sequence_from_db: No. of seq indexed is:", scalar(@Keys);

	for($f=0; $f< @DATABASE; $f++){

       open(DB_FASTA, $DATABASE[$f]);

	   F0: for($e=0; $e< @Keys; $e++){
		  my ($seq_found1, $super_fam_class, $NAME, $R_leng, $R_start, $sequence);
		  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		  # When seq name has range attachment, it handles
		  #________________________________________________
		  if($Keys[$e]=~/(\S+)_(\d+)\-(\d+)/){
		      $NAME=$1;
		      $R_start=$2-1;      ## to fit in substr function
              $R_leng =$3-$2+1; ## to fit in substr
		      print "\n# (4) fetch_sequence_from_db: Sequences have ranges ($R_start-$R_leng) only (not superfamily numb.) \n";
		  }
		  elsif($Keys[$e]=~/(\S+)_(\d+)\-(\d+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/){
		      $NAME=$1;
		      $R_start=$2-1;      ## to fit in substr function
		      $R_leng =$3-$2+1; ## to fit in substr
		      $super_fam_class=$4;
		      print "\n# (4) fetch_sequence_from_db: Sequences have ranges and superfamily numb.\n";
		  }
		  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		  # When superfamily (scop) number is attached
		  #___________________________________________________
		  elsif($Keys[$e]=~/(\S+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/){
	          $NAME=$1;
		      $super_fam_class=$2;
		      print "\n# (4) fetch_sequence_from_db: Sequences have SCOP superfamily numbers only \n";
		  }elsif($Keys[$e]=~/^\s*(\S+)[\,]*$/){
		      print "\n# (4) fetch_sequence_from_db: Sequences DON't have ranges or SCOP superfam numb.\n";
							$NAME=$1;
		  }

		  print "\n# BEFORE reading in DB file. \$NAME is  $NAME";

		  if($seq_with_index{$NAME}=~/(\d+)/        # It is importnt having $seq_with_index{$Keys[$e]}
			   or $seq_with_index{$Keys[$e]}=~/(\d+)/
			   or $seq_with_index{"$NAME\,"}=~/(\d+)/    # this is for overcoming '>xxxx,'  entry(the comma)
               or $seq_with_index{"$NAME\_"}=~/(\d+)/    # to handle Tim's  >c1eru_ 3.30.1.1.4
               or $seq_with_index{"pdb\_$NAME"}=~/(\d+)/ # to handle Sarah's pdb_xxxxx
               or $seq_with_index{"nr\_$NAME"}=~/(\d+)/ # to handle Sarah's nr_xxxxx
			   ){
			   my $finding_position= $1-300;
			   if( $finding_position >= 0 ){   seek(DB_FASTA, $1-300, 0);  # -300 is necessary
			   }elsif($finding_position < 0){  seek(DB_FASTA, 0, 0); }      ## This is essential !!!

			   while(<DB_FASTA>){
				  if(!$seq_found1){
					if(/\>\s*$NAME[\,_]?\s*\d*/){
						  $seq_found1=1;
						  print "\n# $NAME  is found in DB, Good ";
					  }
				  }else{
					  if(/^\s*(\w+)\s*$/ ){
                          $sequence .=$1;  ## you should use $1 to avoid including NEW line
						  unless(eof DB_FASTA){ next   ## This is critically important to prevent error.
						  }else{ goto PUT_SEQ }     ## If the last seq has only one single line seq string, it could be a problem
					  }elsif( (/^\s*\>\s*\S+/)  or (eof DB_FASTA) ){
                          #======= When range is defined ==================
                          PUT_SEQ:
                          if($R_start =~/\d+/){
                               $sequence{$Keys[$e]}=substr($sequence, $R_start, $R_leng);                                                                                                                                        $sequence{$Keys[$e]}=substr($sequence, $R_start, $R_leng); next; #
                          }
                          #======= When range is NOT defined ==================
                          else{
                               $sequence{$Keys[$e]}=$sequence;
                          }
                          $R_start=$sequence=$seq_found1='';  ## reset $R_start, $seq_found1,,
                          next F0;
					  }
				  }
			   }

		  }else{
			   print "\n# Error, the sequence pos for $NAME (from $Keys[$e]) in DB doesnt exist in xxxx.idx file?\n";
		  }
	   }
	   close DB_FASTA;
	}
	#print "\n# (6) fetch_sequence_from_db: counted fetched seqs: $found_seq_count, $acquired_seq_count";
	#print "\n# (7) fetch_sequence_from_db: Fetching seq has finished \n";

	return(\%sequence);
}




#______________________________________________________________
# Title     : fetch_seq
# Usage     : &fetch_seq(@ARGV);
# Function  : fetches swissprot entry or fasta format seq with
#             given seq name(like  SAA_HORSE, SA*HORSE, SAA,..)
#             you can give multi files(SAA*, SAU*) at the same
#             time. This uses ENV setting of 'SWDIR'
# Example   : &fetch_swiss_seq(@ARGV);
# Keywords  : fetch_swissprot_sequence, fetch_sequence,
#             find_swiss_sequence, find_sequence
# Options   : _  for debugging.
#             #  for debugging.
#             -f for fasta format file output
#             -a is for ALL matched seq. (same as using glob=> *YEAST)
#             -c is for Creating seq.idx file
#             -h is for HELP!
#             -g is for GDF file format output
#             -l is for list of match entries(in 1 column)
#             -s is for species option (input name mst be species (YEAST, RAT, HUMAN..)
#             n= is for Number of seq you want to get from swissprot
#             s= is for Size limit. Min seq size in swiss, s=10  -> minimum 11 aa seq.
#             S= is for Size limit. Max seq size in swiss, s=1000 -> get less than 1000
#
# Argument  : swissprot seqname
# Category  :
# Version   : 1.7
#--------------------------------------------------------------
sub fetch_seq{
	 my @in=@_;
	 my ($FASTA_index, $FASTA, $where_index, %index, $question, $i,
	    $s,$t,$fasta,$index_file, $all, $species,$target, $matched, $seq, $gdf, $list, $count, $create);
	 my $SEQ_size_max=100000000;

	 if(@_ < 1){	  &HELP_fetch_seq;
	 }else{
	 F: for($t=0; $t<@in; $t++){ #'''''''''''' PROMPT ARGV processing ''''''''''''''''''
		if($in[$t]=~/^\-c$/i){
		   $create=1; splice(@in, $t, 1); $t--;
		   print "\n You should provide database\(e.g, seq.dat\) file with this opt, I guess you did\n";
		   print "\n If you wanted to make an index with any fasta db, you also have to\n";
		   print "  give the file name. e.g:\n     $0 -c /DB/swiss/seq.dat\n";
		   print "  or $0 -c my_db.fasta\n\n";
		   next; }
		if($in[$t]=~/^\-af$/){ $fasta=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-afs$/){ $species=$fasta=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-ag$/){ $gdf=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-g$/){    $gdf=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-f$/i){   $fasta=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-a$/i){   $all=1;   splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-l$/i){   $list=$all=1;   splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-s$/i){  $species=$all=1; splice(@in, $t, 1); $t--; next; }
		if( ($in[$t]=~/seq\.dat/)&&(-f $in[$t])){ ## if the path for swiss prot is given
		   $DB=$in[$t];  splice(@in, $t, 1); $t--; next;        }
		if( ($in[$t]=~/seq\.idx/)&&(-e $in[$t])){ ## if the path for swiss index is given
		   $index_file=$in[$t];	splice(@in, $t, 1); $t--; next;	}

		#''''''' SWiss prompt input file check ''''''''''''''''''
		if( -f $in[$t]){
		   open(TEMP, "$in[$t]");
		   while(<TEMP>){
			 if(/^ID\s+\w+/){$DB=$in[$t]; splice(@in, $t, 1);$t--;next F;}}
		   close TEMP;
		}

		#'''''''' FASTA prmpt input file check '''''''''''''''''''
		if( (-f $in[$t]) && !(defined($FASTA))){
		   open(TEMP, "$in[$t]");
		   while(<TEMP>){
			 if(/^\> {0,4}\S+/){$FASTA=$in[$t]; $fasta=1;
			 if(-s "$FASTA\.idx"){ $FASTA_index="$FASTA\.idx"; }
		     splice(@in, $t, 1);$t--;next F;}}
		   close TEMP;
		}

		#'''''''' INDEX file automatic check ''''''''''''''''''
		if( -f $in[$t]){
		   open(TEMP2, "$in[$t]");
		   my ($first_pos, $Count, @splited);
		   while(<TEMP2>){
			 $Count++;
			 if( $Count>3 ){
				if(/^ {0,2}\S+\s+(\d+)/){
				   if(defined($first_pos) && ($1-$first_pos ) > 1000 ){
					  $index_file=$in[$t];
					  splice(@in, $t, 1);$t--;next F;
				   }elsif( defined($first_pos) && ($1-$first_pos)<1000 ){
					  $FASTA_index=$in[$t]; $fasta=1;
					  if($FASTA_index=~/^(\S+)\.\w+$/){
					     if(-s $1){ $FASTA= $1; }
					  }
					  splice(@in, $t, 1);$t--;next F;
				   }
				   $first_pos=$1;
				}
			 }
		   }
		   close TEMP2;
		}
		if($in[$t]=~/^\-h$/i){ &HELP_fetch_seq; die;}
		if($in[$t]=~/^n=(\d+)$/i){ $SEQ_num_to_fetch=$1;
		   splice(@in, $t, 1);$t--;next F;}
		if($in[$t]=~/^s=(\d+)$/){ $SEQ_size_min=$1; $fasta=1;
		   splice(@in, $t, 1);$t--;next F;}
		if($in[$t]=~/^S=(\d+)$/){ $SEQ_size_max=$1; $fasta=1;
		   splice(@in, $t, 1);$t--;next F;}
	 }

	 if(($create==1)&&(defined($DB)) ){ goto CREATE; }
	 elsif(($create==1) && (defined($FASTA)) ){ goto CREATE; }
	 elsif($create==1){
	    print "\n You must give db filename (e.g. seq.dat) with path to make an index";
	    print "\n  I can handle fasta db file to make an index\n";
	    die;
	 }
	 }

	 if($SEQ_size_max < $SEQ_size_min){ print "\n Seq size Max is smaller than min\n"; die; }

	 ##""""""""""""""""""""""" DB file if not defined """"""""""""""""""""""""""""""""""""""""""""
	 if (!defined($DB)){
	  if((!defined($FASTA))&&($fasta==1)&&(-T "$ENV{'FASTADB'}")){
		 $FASTA=$ENV{'FASTADB'};
	  }elsif(defined($FASTA) && ($fasta==1) &&($create !=1) ){
		 goto SW_INDEX;
	  }elsif(!defined($FASTA) && (defined($FASTA_index))&& !(-e "$ENV{'FASTADB'}") ){
		 print "\n NO fasta db is defined\n";
		 goto ASK;
	  }elsif(-e "$ENV{'SWDIR'}seq.dat" ){
		 $DB="$ENV{'SWDIR'}seq.dat";
	  }elsif(-e "$ENV{'FETCHSWISS'}seq.dat" ){
		 $DB="$ENV{'FETCHSWISS'}seq.dat";
	  }elsif(-e "$ENV{'FETCHSWISS'}" ){
		 $DB="$ENV{'FETCHSWISS'}";
	  }elsif(-e "$ENV{'SWDIR'}\/seq.dat" ){
		 $DB="$ENV{'SWDIR'}\/seq.dat";
	  }elsif( -f "$ENV{'SWISS'}seq.dat" ){
		 $DB="$ENV{'SWISS'}seq.dat";
	  }elsif( -f "$ENV{'SWISS'}\/seq.dat" ){
		 $DB="$ENV{'SWISS'}\/seq.dat";
	  }elsif( -e 'seq.dat'){
		 $DB="seq.dat";
	  }elsif( -f "$ENV{'swiss'}seq.dat"){
		 $DB="$ENV{'swiss'}seq.dat";
	  }elsif(-f "ENV{'HOME'}seq.dat"){
		 $DB="ENV{'HOME'}seq.dat";
	  }elsif(-f "ENV{'SWDIR'}\/seq.dat"){
		 $DB="ENV{'SWDIR'}\/seq.dat";
	  }else{
		ASK: print "\n Where is your swissprot seq.dat(or fasta db) file?\n";
			 print "  I recommand you to set the path for them in ENV vars\n";
			 print "  e.g. export SWDIR=/DB/Swiss/  to where you put seq.dat\n";
			 print "  e.g. export FASTADB=/DB/Swiss/my_swiss.fasta  for fasta database\n";
		 $swiss=<STDIN>;
		 chomp($swiss);
		 if( -f $swiss){
			open(TEMP, "$swiss");
			while(<TEMP>){
			   if(/^ID\s+\w+/){ $DB=$swiss; goto SW_INDEX; }
			   elsif(/^\> {0,3}\S+/){ $FASTA=$swiss; goto SW_INDEX;}
			}
			close TEMP;
		 }else{
			goto ASK;
		 }
	  }
	 }
	 ##""""""""""""""""""""""""""""" INDEX file """"""""""""""""""""""""""""""""""""""""
	 if( !defined($index_file)){
	  SW_INDEX:
	  if((!defined($FASTA_index))&&($fasta==1)&&(-T "$ENV{'FASTAINDEX'}")){
		 $FASTA_index=$ENV{'FASTAINDEX'};
	  }elsif(!defined($FASTA_index)&&(-T $FASTA)){
		 goto W;
	  }elsif(defined($FASTA_index)&&(-T $FASTA)){
		 goto MAIN_SEARCH;
	  }elsif(-e "$ENV{'FETCHSWISSINDEX'}seq.idx" ){
		 $index_file="$ENV{'FETCHSWISSINDEX'}seq.idx";
	  }elsif(-e "$ENV{'FETCHSWISSINDEX'}\/seq.idx" ){
		 $index_file="$ENV{'FETCHSWISSINDEX'}\/seq.idx";
	  }elsif(-e "$ENV{'SWDIR'}seq.idx" ){
		 $index_file="$ENV{'SWDIR'}seq.idx";
	  }elsif( -f "$ENV{'SWISS'}seq.idx" ){
		 $index_file="$ENV{'SWISS'}seq.idx";
	  }elsif( -f "$ENV{'SWISS'}\/seq.idx" ){
		 $index_file="$ENV{'SWISS'}\/seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}" ){
		 $index_file="$ENV{'SWINDEX'}";
	  }elsif( -e 'seq.idx'){
		 $index_file="seq.idx";
	  }elsif( -f "$ENV{'swiss'}seq.idx"){
		 $index_file= "$ENV{'swiss'}seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}seq.idx"){
		 $index_file= "$ENV{'SWINDEX'}seq.idx";
	  }elsif( -f "$ENV{'HOME'}seq.idx"){
		 $index_file= "$ENV{'HOME'}seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}seq.idx"){
		 $index_file="$ENV{'SWINDEX'}\/seq.idx";
	  }elsif( -f "$ENV{'swindex'}seq.idx"){
		 $index_file="$ENV{'swindex'}seq.idx";
	  }elsif(defined($DB)|| defined($FASTA) ){
		 print "\n Your swissprot is in $DB, but no seq.idx file for it.\n";
		 W: print "\n Where is seq.idx(or fasta idx file eg. $FASTA\.idx), type path and filename?\n";
		    print "  I recommand you to set the path for them in ENV vars later\n";
			print "  e.g. export SWINDEX=/DB/Swiss/  to where you put seq.dat index\n";
			print "  e.g. export FASTAINDEX=/DB/Swiss/my.fasta.idx  for fasta db index\n";
			print "  Asking where 3 times. After, will ask creation of seq.idx or $FASTA.idx\n";
		 $question++;
		 $where_index=<STDIN>;
		 chomp($where_index);
		 if(-f $where_index){
			open(TMP, "$where_index");
		    while(<TMP>){
				if($_=~/^ {0,2}\S+\s+\d+/){
				   $index_file=$where_index;
				   print "\n Your index file seems to be right \($index_file\) \n";
				   goto MAIN_SEARCH;
				}elsif($count > 4){ # read at least 4 lines and see if they are index
				   print "\n $where_index doesn't seem to be index file\n";
				   print "\n Terminate(t) or go on (g) trying\n";
				   $try=getc;
				   if($try=~/t/i){  die; }
				   else{ goto W; }
				}else{
				   $count++;
				}
			}
			close TMP;
		 }else{
			if($question > 2){
			   print "\n I can create the index in pwd for you run $0 and \n";
			   print "\n you can copy seq.idx(or $FASTA\.idx) into your swissprot dir later\n";
			   goto CREATE;
			}
			goto W;
		 }

		 #""""""""""""""" CREATION of INDEX file """""""""""""""""""""""""""""""""""""""""""""
		 CREATE:
		 if(defined($DB)){ print "\n Can I create seq.idx in pwd? (y+return or return)\n" }
		 if(defined($FASTA)){ print "\n Can I create $FASTA\.idx in pwd? (y+return or return)\n" }
		 $yes_no=getc;
		 if($yes_no=~/y/i){
			if(defined($DB)){
			   print "\n seq.idx being created...\(1 min in my Linux\)\n";
			   open(DB, "$DB");
			   open(IDX, ">seq.idx");
			   print IDX "# swiss_index\n";
			   while(<DB>){
				 if(/^ID\s+(\w+)\s+/){
					$index{$1}=tell(DB);
					print IDX "\n$1 $index{$1}";
				 }
			   }
			   close(DB);
			   close(IDX);
			   if(-s "seq.idx"){
				   print "\nGood. seq\.idx is created.";
				   print "\n Copy seq.idx to SWISSPROT dir or you can set\n";
				   print "absolute path ENV var \'SWINDEX\' to your seq.idx path\n";
				   print "e.g. #bash\> export SWINDEX=\/DB\/Swiss\/seq.idx\n\n";
				   if($create==1){ die;  }
			   }else{
				   print "\n Creation of seq.dat seems to have gone wrong";
			   }

			}elsif(defined($FASTA)){
			   $F_idx="$FASTA\.idx";
			   print "\n $F_idx being created...\n";
			   open(FASTADB, "$FASTA");
			   open(FASTAIDX, ">$F_idx");
			   print FASTAIDX "# fasta_index\n";
			   while(<FASTADB>){
				 if(/^\> {0,4}(\S+)\s*/){
					$index{$1}=tell(FASTADB);
					print FASTAIDX "\n$1 $index{$1}";
				 }
			   }
			   close(FASTADB);
			   close(FASTAIDX);
			   if(-s $F_idx){
				   print "\nGood! Copy $F_idx to your DB dir and set two ENV vars\n";
				   print "absolute path ENV var \'FASTADB\' to your fastadb path\n";
				   print "absolute path ENV var \'FASTAINDEX\' to your $F_idx path\n";
				   print "e.g. #bash\> export FASTADB   =\/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #bash\> export FASTAINDEX=\/DB\/mySwiss\/$F_idx\n";
				   print "e.g. #tcsh\> setenv FASTADB    \/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #tcsh\> setenv FASTAINDEX \/DB\/mySwiss\/$F_idx\n";
				   print "Unless, you can specify the database each time at prompt\n\n";
				   if($create==1){ die;  }
			   }else{
				   print "\n Creation of seq.dat or $F_idx seems to have gone wrong";
			   }
			}
		 }else{
			die;
		 }
	  }
	 }

	 #""""""""""""""""""""""""""" MAIN SERACH """""""""""""""""""""""""""""""""""""""""""""""
	 MAIN_SEARCH:
	 for($i=0; $i<@in; $i++){
	  my (@possible, @pos, %possible); my $target=$in[$i];
	  if($target=~/\*/){
		 $target=~s/\*/\\\w\{0,6\}/; # to handle glob input
		 $all=1;
	  }
	  if(defined($index_file)){
		 open(INDEX, "$index_file");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target)\s+(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*)\s+(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto SWISS;
	  }elsif(($fasta==1) && (defined($FASTA_index)) ){
		 open(INDEX, "$FASTA_index");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target)\s+(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*)\s+(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto FASTA;
	  }

	  SWISS:
	  @poss = sort keys %possible;

	  if( (@poss >1)&&($all !=1)){
		 print "\n @poss","\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i]";
		 print "\n be more specific! OR use -a option for all matched\n\n";
		 die;
	  }elsif($all !=1){
		 print "\n";
		 open (DB, "$DB");
		 if(defined($SEQ_num_to_fetch)){
			print "\n# You defined the number of sequence to fetch: $SEQ_num_to_fetch\n";
			$num_sequence=$SEQ_num_to_fetch;
		 }else{ $num_sequence=@poss; }

		 A:for($p=0; $p < $num_sequence; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};   # %possible has the name and index num
			 seek(DB, ($matched-52), 0);
			 while(<DB>){
			   if($gdf==1){
			      if(/ID\s+$poss[$p]\s+\S+\s+\S+\s+(\d+)/){
			         printf ("%-24s %-3d %-7d %-14s %4s\n", "$poss[$p]\/1\-$1", 1, $1, $poss[$p], '0.0');
					 next A;
			      }
			   }
			   elsif(/^ {0,2}\/\// and  $fasta !=1){  # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  print "\/\/\n";
				  next A;
			   }elsif(/^ {0,2}\/\//  and  $fasta==1){ # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  $seq=~s/ //g;
				  if( ($SEQ_size_min < length($seq))&&(length($seq) < $SEQ_size_max) ){
					 print "\>$poss[$p]\n$seq\n"; $seq=''; next A;
				  }else{  $seq=''; $num_sequence++;  next A; }
			   }elsif( $fasta==1 and /^\s+\w+/){
				  $seq.=$_;
				  next ;
			   }elsif($list==1){
			      if(/ID\s+$poss[$p]\s+\S+\s+\S+\s+(\d+)/){
			         print "$poss[$p]\n";
					 next A;
			      }
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 close(DB);
	  }elsif($all==1){
		 print "\n";
		 open (DB, "$DB");
		 if(defined($SEQ_num_to_fetch)){ $num_sequence=$SEQ_num_to_fetch;
		 }else{ $num_sequence=@poss; }
		 A:for($p=0; $p < $num_sequence; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(DB, ($matched-51), 0);
			 while(<DB>){
			   if($gdf==1){
			      if(/ID\s+$poss[$p]\s+\S+\s+\S+\s+(\d+)/){
			         printf ("%-24s %-3d %-7d %-14s %4s\n", "$poss[$p]\/1\-$1", 1, $1, $poss[$p], '0.0');
					 next A;
			      }
			   }elsif(/^ {0,2}\/\// and $fasta==1){ # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  $seq=~s/ //g;
				  if( ($SEQ_size_min < length($seq))&&(length($seq) < $SEQ_size_max) ){
					 print "\>$poss[$p]\n$seq\n"; $seq='';  next A;
				  }else{  $seq=''; $num_sequence++; next A; }
			   }elsif(/^ {0,2}\/\// and $fasta !=1){  # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  print "\/\/\n";
				  next A;
			   }elsif(($fasta==1)&&(/^\s+\w+/)){
				  $seq.=$_;
				  next ;
			   }elsif($list==1){
			      if(/ID\s+$poss[$p]\s+\S+\s+\S+\s+(\d+)/){
			         printf "$poss[$p]\n";
					 next A;
			      }
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 close(DB);
	  }

	  FASTA:
	  @poss = sort keys %possible;
	  if( (@poss >1)&&($all !=1)){
		 print "\n @poss","\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i]";
		 print "\n be more specific! OR use -a option for all matched\n\n";
		 die;
	  }elsif($all !=1){
		 print "\n";
		 open (FAS, "$FASTA");
		 B:for($p=0; $p < @poss; $p++){
		 if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(FAS, ($matched-350), 0);
			 my $seq_found;
			 while(<FAS>){
			if((/^> {0,4}(\S+)/)&&($seq_found==1)){
				   next B;
	 			}elsif(/^> {0,4}($poss[$p])/){
				   print;
				   $seq_found=1;
				}elsif($seq_found==1){
				   print;
				}
			 }
		   }
		 }
		 close(FAS);
	  }elsif($all==1){
		 print "\n";
		 open (FAS, "$FASTA");
		 B2:for($p=0; $p < @poss; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(FAS, ($matched-350), 0);
			 my $seq_found;
			 while(<FAS>){
				if((/^> {0,4}(\S+)/)&&($seq_found==1)){
				   next B2;
				}elsif(/^>\s*($poss[$p])/){
				   print;
				   $seq_found=1;
				}elsif($seq_found==1){
				   print;
				}
			 }
		   }
		 }
		 close(FAS);
	  }
	 }
}



#______________________________________________________________
# Title     : fetch_swiss_seq
# Usage     :
# Function  : fetches swissprot entry or fasta format seq with
#             given seq name(like  SAA_HORSE, SA*HORSE, SAA,..)
#             you can give multi files(SAA*, SAU*) at the same
#             time. This uses ENV setting of 'SWDIR'
# Example   : &fetch_swiss_seq(@ARGV);
# Warning   :
# Keywords  : fetch_swissprot_sequence, fetch_sequence,
#             find_swiss_sequence, find_sequence, fetch
# Options   : _  for debugging.
#             #  for debugging.
#             -f for fasta format file output
# Returns   :
# Argument  : swissprot seqname
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub fetch_swiss_seq{
	 my @in=@_;
	 my ($i, $index_file, $target, $matched, $seq);
	 if(@_ < 1){
	 print "\n Usage: $0 [-f] <any swissprot name entry>\n";
	 print "   -f is for fasta output format only\n";
	 print "\n You have to set ENV var, SWDIR to seq.dat path\n";
	 print chr(7);
	 }
	 for($i=0; $i<@in; $i++){
	  if($in[$i]=~/\-f$/i){
		 $fasta=1;
		 splice(@in, $i, 1);
		 next;
	  }
	 }

	 if(-e "$ENV{'SWDIR'}seq.dat" ){
	  open(DB, "$ENV{'SWDIR'}seq.dat");
	 }elsif( -f "$ENV{'SWISS'}seq.dat" ){
	  open(DB, "$ENV{'SWISS'}seq.dat");
	 }elsif( -e 'seq.dat'){
	  open(DB, "seq.dat");
	 }elsif( -f "$ENV{'swiss'}seq.dat"){
	  open(DB, "$ENV{'swiss'}seq.dat");
	 }

	 if(-e "$ENV{'SWDIR'}seq.idx" ){
	  $index_file="$ENV{'SWDIR'}seq.idx";
	 }elsif( -f "$ENV{'SWISS'}seq.idx" ){
	  $index_file="$ENV{'SWISS'}seq.idx";
	 }elsif( -e 'seq.idx'){
	  $index_file="seq.idx";
	 }elsif( -f "$ENV{'swiss'}seq.idx"){
	  $index_file= "$ENV{'swiss'}seq.idx";
	 }
	 for($i=0; $i<@in; $i++){
	  my @possible;
	  my $target=$in[$i];
	  $target=~s/\*/\\\w\{0,4\}/; # to handle glob input
	  open(INDEX, "$index_file");
	  while(<INDEX>){
		if( /(\w*$target\w*)/ ){
		   push(@possible, $1);
		}
	  }
	  close INDEX;
	  open(INDEX,  "$index_file");
	  if(@possible >1){
		 print "\n@possible", "\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i], \n be more specific!\n\n";
	  }else{
		 print "\n";
		 A:while(<INDEX>){
		   if(/(\w*$target\w*)\s+(\d+)/){
			 $matched=$1;
			 seek(DB, ($2-51), 0);
			 while(<DB>){
			   if((/^\/\/$/)&&($fasta==1)){
				  $seq=~s/ //g;
				  print "\>$matched\n$seq\n";
				  $seq='';
				  next A;
			   }elsif((/^\/\/$/) && ($fasta !=1)){
				  print "\n";
				  next A;
			   }elsif(($fasta==1)&&(/^\s+\w+/)){
				  $seq.=$_;
				  next ;
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 print "========= Search for $ARGV[$i] was a success\n" if @in > 1;
	  }
	 }
}



#______________________________________________________________
# Title     : get_sequence_number
# Usage     :
# Function  : reads database and tells how many sequences are there
#             fasta format db is only accepted for now.
# Example   :
# Warning   :
# Keywords  : count_number_of_sequence, get_number_of_sequence
#             get_sequence_number_in_fasta
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#--------------------------------------------------------------
sub get_sequence_number{
		my @file=@{$_[0]} || @_;
		my %out;
		for($i=0; $i< @file; $i++){
			 my $seq_number_in_db;
			 open(DB, "$file[$i]") || die "\n# (E) Can not open $file[$i]";
			 while(<DB>){
					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
					 #  For standard FASTA db
					 #_____________________________________________
					 if(/^\> {0,6}\w+/){
							 $seq_number_in_db++;

					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
					 # Liisa's NRDB90
					 #_____________________________________________
					 }elsif(/^\> {0,6}\/\:/){
							 $seq_number_in_db++;
					 }
			 }
			 close DB;
			 $out{$file[$i]}=$seq_number_in_db;
		}
		return(\%out);
}


#________________________________________________________________________________
# Title     : write_mspa_files
# Usage     : &write_mspa_files(\%in1, \%in2, ['s'], [$filename, o=$output_MSP_file_name],)
# Function  : Writes input which is already in mspa file format to
#              files either the name is given or generated
#              If more than one ref of hash is given, this will
#              concatenate all the hashes to one big one to
#              make one file.
#             When NO output xxx.mspa file name is given, it creates
#              with the query sequence name.
# Example   :  &write_mspa_files(@sso, 's', $out_file);
# Warning   : When NO output xxx.mspa file name is given, it creates
#              with the query sequence name.
# Keywords  : write_mspa, write_MSP, write_MSP_file, write_mspa_file
# Options   : _  for debugging.
#             #  for debugging.
#             s  for each single file output for each hash input
#  filename  for putting output to the specified filename, should be xxx.mspa
#  $output_MSP_file_name= by o=  # same as filename, but you cleary pass it.
#
# Returns   : if 's' option is set, it will make say,
#               HI001.mspa HI002.mspa HI003.mspa  rather than
#
#               HI001HI002HI003.mspa
#  eg of one output(single file case)
#
#   1027     0.0     1     154   HI0004     1     154   HI0004
#   40       0.0     84    132   HI0004     63    108   HI0001
#   31       0.0     79    84    HI0004     98    103   HI0003
#
# Category  :
# Version   : 3.3
#----------------------------------------------------------------------------------
sub write_mspa_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my ($output_MSP_file_name, $add_range, @final_out, $mspa_file_out,$output_MSP_file_name,
        @keys, $N, $temp_1, %hash, $query_seq_name, $single_out_opt);

    if($char_opt=~/r/){ $add_range      ='r' };
	if($char_opt=~/s/){ $single_out_opt ='s' };
    if(@file == 1){ $output_MSP_file_name=$file[0]; $single_out_opt='' } # s is for single file output
    if($vars{'o'}){ $output_MSP_file_name=$vars{'o'} }
    if(@hash < 1){ warn "\n WARNING: write_mspa_files, \@hash is empty. This can be fatal \n\n"; }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # (0) When outfilename is not given
    #___________________________________________________________
    if($single_out_opt eq 's' and !$output_MSP_file_name){
		 $mspa_file_out='default_single_out.mspa';
		 for($i=0; $i< @hash; $i++){
			 my %hash=%{$hash[$i]};
			 my @keys =sort keys %hash;
			 #------------------ Writing the first line ---------------------------
			 for($j=0; $j< @keys; $j++){
                if($keys[$j]=~/(\S+)_\d+\-\d+/){ $N = $1 }else{ $N = $keys[$j] }
                if($hash{$keys[$j]}=~/\s+$N[\_\d+\-\d+]*\s+\d+\s+\d+\s+$N[\_\d+\-\d+]*/){
                    open(MSP_FILE, ">$mspa_file_out") ||
                            die "# write_mspa_files: I can not create $mspa_file_out, check permission\n";
                    chomp( $hash{$keys[$j]} ); ## precaution
                    print MSP_FILE "# (H) $0 write_mspa_files: $keys[$j]\n";
                    print MSP_FILE $hash{$keys[$j]}, "\n";
                    splice(@keys, $j, 1);
                    $j--; last;
                }
             }
             #------------- Writing the rest of the lines ____________________
             for($j=0; $j< @keys; $j++){
                 chomp( $hash{$keys[$j]} );
                 print MSP_FILE $hash{$keys[$j]}, "\n";
             }
             print MSP_FILE "\n";
          }
          if(-s $mspa_file_out){
                 print "\n# write_mspa_files: $mspa_file_out is written \n";
          }else{
                 print "\n# Error, write_mspa_files\n"; die
          }
          push(@final_out, $mspa_file_out);
          close(MSP_FILE);
          return(\@final_out);
    #~~~~~~~~~~~~~ DEfault ~~~~~~~~~~~~~~~~~~
    #  (1) When output file name was given!
    #________________________________________
    }elsif($output_MSP_file_name){
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # When FILE input is given (NOT hash )
        #___________________________________________________________
        if(@file==1){
            my($temp_1);
            open(MSP_FILE, ">$output_MSP_file_name") ||  die "# write_mspa_files: I can not create $output_MSP_file_name, check permission\n";
            print MSP_FILE "# (H) $0 write_mspa_files: @file\n";
            for($i=0; $i< @hash; $i++){
                 %hash=%{$hash[$i]};
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # Sorting %hash values by the second column(Evalue)
                 #_______________________________________________________
                 @keys= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { $hash{$_}=~/^\s*\S+\s+(\S+)\s+/ and [$_, $1] } keys %hash;

                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # for Final output
                 #_____________________________
                 push(@final_out, $output_MSP_file_name);

                 #--------- Writing the first line only --------------
                 for($j=0; $j< @keys; $j++){
                     if($keys[$j]=~/(\S+)_\d+\-\d+$/){ $N = $1 }else{ $N = $keys[$j] }

                     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                     # Following is to put the self match on top of the list
                     #________________________________________________________
                     if($hash{$keys[$j]}=~/\s+$N[\_\d+\-\d+]*\s+\d+\s+\d+\s+$N[\_\d+\-\d+]*/){
                          $temp_1=$keys[0]; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
                     }
                 }
                 for($j=0; $j< @keys; $j++){
                     chomp($hash{$keys[$j]});
                     print MSP_FILE $hash{$keys[$j]}, "\n";
                 }
                 print MSP_FILE "\n";
            }
            print MSP_FILE "\n";
            close(MSP_FILE);
            if(-s $output_MSP_file_name and $output_MSP_file_name !~/^\s*\.mspa$/){
                    print "\n# write_mspa_files: $output_MSP_file_name is written\n" if(-s $output_MSP_file_name);
            }else{
                    print "\n# write_mspa_files: ERROR. Either $output_MSP_file_name is empty or \".msp\" is written\n";
            }
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # When HASH INPUT IS given
        #__________________________________________
        }else{
            for($i=0; $i< @hash; $i++){
                my %hash=%{$hash[$i]};
                my @keys =sort keys %hash;
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # If s option with $output_MSP_file_name is given
                #____________________________________________________
                if($single_out_opt eq 's' ){      $mspa_file_out=$output_MSP_file_name;
                }elsif($hash{$keys[0]}){
                    ($query_seq_name)=$hash{$keys[0]}=~/\S+\s+\d+\s+\d+\s+(\S+)\s+\d*\s+\d*\s+\S+\s*\d*$/;
                    $mspa_file_out="$query_seq_name\.msp";
                }else{ die "\n :-<  \$query_seq_name is not defined. Error\n\n\n"; }

                open(MSP_FILE, ">$mspa_file_out") or die "\n# write_mspa_files: Failed to open $mspa_file_out\n";
                print MSP_FILE "# (H) $0 write_mspa_files: $query_seq_name\n";
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # for Final output
                #_____________________________
                push(@final_out, $mspa_file_out);
                #~~~~~~~ Writing the first line only ~~~~~~~~~~~~~~~~~~
                for($j=0; $j< @keys; $j++){
                    if($keys[$j]=~/^(\S+)_\d+\-\d+/){
                       $N = $1
                    }elsif($keys[$j]=~/^(\S+)\s+(\S+)/){
                       $N = $1;
                    }else{ $N = $keys[$j] }
                    if($hash{$keys[$j]}=~/\s+$N[\_\d+\-\d+]*\s+\d+\s+\d+\s+$N[\_\d+\-\d+]*/){
                       $keys[0]=$temp_1; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
                    }
                }
                for($j=0; $j< @keys; $j++){
                    chomp($hash{$keys[$j]});
                    print MSP_FILE $hash{$keys[$j]}, "\n";

                }
                print MSP_FILE "\n";
            }
            print MSP_FILE "\n";
            if(-s $mspa_file_out and $mspa_file_out=~/\S+\.msp/){
                 print "\n :-) write_mspa_files: $mspa_file_out is written\n";
            }else{
                 print "\n\n :-(  write_mspa_files: Either $output_MSP_file_name empty or only \".msp\" is written\n\n\n";
            }
            close MSP_FILE;
         }
    }
    if(@final_out ==1){ return( \$final_out[0] );
    }else{    return(\@final_out);    }
}




#______________________________________________________________________
# Title    : write_aln_files
# Function : writes multiple seqs. in msf format (takes one or more than one seq.!!)
# Usage    : two argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
#             uses Perl5 pointers(references).
# Example  : &write_aln(\%hash, \$out_file_name);
#  CLUSTAL W (1.74) multiple sequence alignment
#
#
#  MMAF6040_1           -----MATDD--SIIVLDD----DDEDEA-AAQP-GPSNLPPN-PASTGPGPGLSQQATG
#  AF015956_1           -----MATAN--SIIVLDD----DDEDEA-AAQP-GPSHPLPN-AASPGAG---------
#  HSAB2381_80-900      KQRLLSVTSDEGSMNAFTGRGSPDTEIKINIKQESADVNVIGNKDVVTEEDLDVFKQAQE
#                             .* :  *: .: .    * * :    *  .  :   *  . .  .
#
# Options  :
#     $first_sequence_name= by f=  # to put a certain seq at the first in writing
# Keywords :
# Version  : 1.1
#----------------------------------------------------------------------
sub write_aln_files{

	 $| =1;
	 my($string, %input, $temp, $output_file, $first_sequence_name,
			$name, $k, %final_output);
	 for($k=0; $k< @_; $k++){
			 if(ref($_[$k]) eq 'HASH'){
					 %input=%{$_[$k]};
			 }elsif(ref($_[$k]) eq 'SCALAR'){
					 $output_file=${$_[$k]};
			 }else{
					 if($_[$k]=~/f=(\S+)/){
								$first_sequence_name=$1;
					 }else{
								$output_file=$_[$k];
					 }
			 }
	 }
	 open (ALN_FILE_OUT,">$output_file");		# $string is the seq string.

	 print ALN_FILE_OUT 'CLUSTAL W (1.74) multiple sequence alignment', "\n\n";

	 my(@names) = sort keys %input;
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # Putting the wanted top first seq name at the top
	 #____________________________________________________
	 if($first_sequence_name){
			 for($k=0; $k< @names; $k++){
					 if($names[$k]=~/$first_sequence_name/){
							 $temp=$names[0];
							 $names[0]=$names[$k];
							 $names[$k]=$temp;
					 }
			 }
	 }
	 my($larg)  = length($input{$names[0]});

	 for ($k=0; $k < $larg; $k+= 60){    # 60 residues interval
	for($i=0; $i < @names; $i++){    # number of sequences
						 $names = $names[$i];
						 $input{$names[$i]}=~ s/\n//g; ## this is to remove MANY new lines in the input !!
						 $seq = substr($input{$names[$i]}, $k, 60);
						 #$seq = &put_gaps_every_x_position_in_string($seq, 10, ' ');
						 printf ALN_FILE_OUT ("%-18s %-60s\n", $names, $seq);
						 $final_output{$output_file}.=sprintf("%-18s %-60s\n", $names, $seq);
	}
	printf ALN_FILE_OUT "\n";
	$final_output{$output_file}.="\n";
	 }
	 close(ALN_FILE_OUT);
	 return(\%final_output);
}

#______________________________________________________________________
# Title    : write_msf
# Function : writes multiple seqs. in msf format (takes one or more than one seq.!!)
# Usage    : two argments:  $seq_hash_reference  and $output_file_name
#            takes a hash which has got names keys and sequences values.
#            uses Perl5 pointers(references).
# Example  : &write_msf(\%hash, \$out_file_name, ["o=$seq_order"]);
#             eg) $seq_order='asdf seq2 seq3 seq5';
#    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#    PileUp
#
#       MSF: 1205  Type: P    Check:  9937   ..
#
#     Name: PYC1_YEAST      oo  Len: 1205  Check:  7954  Weight:  1.00
#     Name: PYC2_YEAST      oo  Len: 1205  Check:  5807  Weight:  1.00
#     Name: PYC_MOUSE       oo  Len: 1205  Check:  6176  Weight:  1.00
#
#    //
#
#    PYC1_YEAST      MSQ.RKFAGL RDNFNLLGEK N......... .......... .KILVANRGE
#    PYC2_YEAST      MSSSKKLAGL RDNFSLLGEK N......... .......... .KILVANRGE
#    PYC_MOUSE       ...MLKFQTV RGGLRLLGVR RSSSAPVASP NVRRLEYKPI KKVMVANRGE
#
#    PYC1_YEAST      IPIRIFRTAH ELSMQTVAIY SHEDRLSTHK QKADEAYVIG EVGQYTPVGA
#    PYC2_YEAST      IPIRIFRSAH ELSMRTIAIY SHEDRLSMHR LKADEAYVIG EEGQYTPVGA
#    PYC_MOUSE       IAIRVFRACT ELGIRTVAVY SEQDTGQMHR QKADEAYLIG R..GLAPVQA
#
#    PYC1_YEAST      YLAIDEIISI AQKHQVDFIH PGYGFLSENS EFADKVVKAG ITWIGPPAEV
#    PYC2_YEAST      YLAMDEIIEI AKKHKVDFIH PGYGFLSENS EFADKVVKAG ITWIGPPAEV
#    PYC_MOUSE       YLHIPDIIKV AKENGVDAVH PGYGFLSERA DFAQACQDAG VRFIGPSPEV
#
# Keywords : write_msf_files, save_msf_files
# Version  : 2.2
#------------------------------------------------------------
sub write_msf{
		 my($seq, $string, $name, $k, $i, $longest_seq_leng, @seq_order_final,
				$seq_order, @files_created, @names, $fill_seq_to_the_end);
		 $| =1;
		 if(@_ < 2){
					print "\n# write_msf: I need 2 arguments(hash and filename). Look at the header box\n";
					print chr(7); die;
		 }
		 my($gap_char)='-';
		 my(%input)=%{$_[0]};
		 my($output_file)=${$_[1]} || $_[1];

		 if($_[2]=~/o=(.+)/){
				 $seq_order=$1;
				 @seq_order=split(/\s+/, $seq_order);
		 }elsif(ref($_[2]) eq 'ARRAY'){
				 @seq_order=@{$_[2]};
		 }
		 @names = sort keys %input;

		 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		 # If seq order information is given, sort the out output
		 #________________________________________________________________
		 if(@seq_order > 0){ # when seq names were given in an order, you just use it
				 print "\n# (INFO) \@seq_order is given \n";
				 for($i=0; $i< @seq_order; $i++){
						 $seq_order_item=$seq_order[$i];
						 if($seq_order_item=~/_rv$/){ $seq_order_item=~s/_rv$// }
						 for($j=0; $j<@names; $j++){
								$names_item=$names[$j];
								if($names_item=~/_rv$/){ $names_item=~s/_rv$// }
								if($seq_order_item eq $names_item){
										push(@seq_order_final, $names[$j]);
										splice(@names, $j, 1); $j--;
								}else{ next }
						 }
				 }
				 @names=@seq_order_final;
		 }
		 $longest_seq_leng=length($input{$names[0]});
		 for $name (@names){
				 $len = length($input{$name});
				 if($len< 1){ print "\n# (ERROR) The length of seq. in \%input with $name key is 0, error!\n";
						 die;  }
				 $longest_seq_leng=$len if $len > $longest_seq_leng;
		 }

		 push(@files_created, $output_file);
		 open (MSF_FILE_OUT,">$output_file");        # $string is the seq string.

		 print MSF_FILE_OUT " $output_file  MSF: $longest_seq_leng",' Type: P    Check:  9937   .. '; ## This is dummy
		 print MSF_FILE_OUT "\n\n";


		 for $name (@names){
					 $len = length($input{$name});
					 printf MSF_FILE_OUT (" Name: %-15s   Len: %-5s Check:  9999  Weight:  1.00\n", $name, $longest_seq_leng);
		 }
		 print MSF_FILE_OUT "\n";
		 print MSF_FILE_OUT "\/\/\n\n";
#""""""""""""""""""""""""""""""""""""""""""""""""""
#             MSF file form
#==================================================
format MSF_FILE_OUT =
@<<<<<<<<<<<<<<<<<<<<  @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$names,         $seq
.

		 for ($k=0; $k < $longest_seq_leng; $k+=50){    # 50 residues interval
					 for($i=0; $i < @names; $i++){    # number of sequences
									$names = $names[$i];
									$input{$names[$i]}=~ s/\n//g;
									#$input{$names[$i]}=~ s/_/$gap_char/g; # automatically changes '_' to '-'

									$seq = substr($input{$names[$i]}, $k, 50);
									$seq_leng=length($seq);

									#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									# This is to fill the empty space to make complete block of seq
									#________________________________________________________________
									if($fill_seq_to_the_end){
												$seq .="$gap_char"x($longest_seq_leng-$seq_leng); # putting '---' at the blank line end
												# Above option will coredump 'seaview' seq editor
									}
									$seq=~s/[\-_ ]/$gap_char/g; # setting the final gap_char you like

									$seq = &put_gaps_every_x_position_in_string_special($seq, 10, ' ');
									sub put_gaps_every_x_position_in_string_special{
											my($string); if(ref($_[0])){ $string = ${$_[0]};
																	 }else{ $string = $_[0]; }
											my($interval) = $_[1];     my($gap_char) = $_[2];
											$string =~ s/(.{$interval,$interval})/$1$gap_char/g;
											return($string);
									}

									select (MSF_FILE_OUT); ## to print out to a FILE
									write MSF_FILE_OUT;
					 }
					 print "\n";                       # next block starts.
		 }
		 close(MSF_FILE_OUT);
		 select STDOUT;  # <- this is necessary to normalize output for other sub
		 return(\@files_created);
}




#______________________________________________________________
# Title     : get_seqblock
# Usage     :
# Function  :
# Example   : @blocks_in_hash=@{&get_seqblock(\%msf, 30)};
# Warning   :
# Keywords  : find_sequence_block, get_sequence_block, BLOCK
#             make_seq_block, make_seqblock, find_seqblock, sequence_block
# Options   : _  for debugging.
#             #  for debugging.
#             m=  for margin length of the seqblock
#             t=  for threshold
#             l=  for min seqlet length
#
# Returns   :
# Argument  :
# Category  :
# Authors   : jong@mrc-lmb.cam.ac.uk
# Version   : 1.4
#--------------------------------------------------------------
sub get_seqblock{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my ($connect_gap, @seq_frag, %digitized, $verbose, %hash, $best_block_opt);
	my $margin=3;
	my $threshold=0.8;
	my $min_seqlet_size=25;
	$connect_gap=5;
	my @vars=keys %vars;
	for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'm'){
		  $margin=$margin_ori=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 't'){
		  $threshold=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'l'){
		  $min_seqlet_size=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'c'){
		  $connect_gap=$vars{$vars[$i]} if( defined($vars{$vars[$i]}) );
	   }
	}
	if($char_opt=~/b/){ $best_block_opt='b' }
	if($char_opt=~/r/){ $range_in_name='r' }
	if($char_opt=~/c/){ $connect_opt ='c' }
	if($char_opt=~/v/){ $verbose='v' }
	for($o=0; $o<@hash; $o++){
	  %hash=%{$hash[$o]};
	  %digitized=%{&convert_char_to_0_or_1_hash($hash[$o])};
	}

	%added=%{&add_columns(\%digitized)}; # 11111 + 1010101 => 2121211

	&show_hash(\%added) if ($debug==1);

	%blocks=%{&get_high_score_blocks(\%added,
			   "m=$margin", "t=$threshold", "l=$min_seqlet_size", $verbose,
			   "c=$connect_gap", $connect_opt, $best_block_opt, $range_in_name)};

	my @keys=keys %blocks;
	for($e=0; $e< @keys; $e++){

	   my $range="$keys[$e]\-$blocks{$keys[$e]}";
	   my $seq_let_leng=$blocks{$keys[$e]} - $keys[$e] + 1;
	   if($seq_let_leng < $min_seqlet_size){
		  next;
	   }else{
	      push(@RANGE, $range);
	   }
	}
	@seq_frag=&get_seq_fragments(\%hash, @RANGE,
	    "l=$min_seqlet_size", "$range_in_name");
	return(\@seq_frag);
}


#______________________________________________________________
# Title     : add_columns
# Usage     :
# Function  :
# Example   :
# Warning   : if the attached name is too long(over 12 char),
#             it changes to 'Added_upX' while X is a numb.
# Keywords  : add_seq_columns, add_sequence_columns,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub add_columns{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my $gap_char=' ';
	for($i=0; $i<@hash; $i++){
	  my %hash=%{$hash[$i]};
	  my @names=keys %hash;
	  my %final_hash_out;
	  my @final_added;
	  my $out_seq_name='Add';
	  for($j=0; $j<@names; $j++){
	     $out_seq_name.= "_$names[$j]";
		 my $string =$hash{$names[$j]};
		 my @ar_string;
		 if($string=~/\d{1,5}[ \,]\d{1,5}[ \,]\d{1,5}/){
			@ar_string =split(/$gap_char|\,/, $string );
		 }elsif($string=~/^\d{5,}$/){
			@ar_string =split(//, $string );
		 }
		 for($s=0; $s < @ar_string; $s++){
			$final_added[$s]=$ar_string[$s]+$final_added[$s];
		 }
	  }
	  if(length($out_seq_name) > 12){ $out_seq_name="Added_up${i}"; }
	  $final_hash_out{$out_seq_name}=join("$gap_char", @final_added);
	  push(@OUT_HASH, \%final_hash_out);
	}
				wantarray ? return(@OUT_HASH) : return($OUT_HASH[0]);
}


#____________________________________________________________________
# Title     : get_high_score_blocks
# Usage     : get_high_score_blocks(<ref. of hash for number string>)
# Function  : gets hash of key and number string and filters out the
#              number string region which is below certain threshold
#              determined inside this sub and returns a selected high
#              number regions
# Example   : %block_start_end=%{&get_high_score_blocks(\%input_numb_block)};
#             %out=%{&get_high_score_blocks(\%inp_numbs, 'v', 'b')};
# Warning   : This assumes that the inputs are multiply aligned seq
# Keywords  : high_scoring_regions
#             get_high_scoring_blocks, find_blocks, get_blocks
# Options   : _  for debugging.
#             #  for debugging.
#             b  for best_block_opt, returns best block only
#             v  for showing the final range hash output
#             c  for connect close blocks
#             c= for connect close blocks with specific closing gap size
#             m=  for margin length of the seqblock
#             t=  for threshold
#             l=  for min seqlet length
#
# Returns   :
# Argument  : accepts one single ref. of hash
# Category  :
# Version   : 1.4
#--------------------------------------------------------------------
sub get_high_score_blocks{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	my $min_seqblock_leng=25;
	my $threshold=0.8;
	my (%block_start_end, @possible_block, %hash, $range_in_name,$connect_gap);
	my $margin=$margin_ori=2;  # $margin is m in .....mmm111111111111mmm.....
	my $gap_char=' ';
	my @vars=keys %vars;
	my $connection_gap=5;
	my $connect_opt=1;
	for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'm'){
		  $margin=$margin_ori=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 't'){
		  $threshold=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'l'){
		  $min_seqblock_leng=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'c'){
		  $connect_opt='c';
		  $connection_gap=$vars{$vars[$i]};
		  #print "\n get_high_score_blocks: \$connection_gap is $connection_gap\n";
	   }
	}
	if($char_opt=~/c/){ $connect_opt='c' }
	if($char_opt=~/r/){ $range_in_name='r' }
	if($char_opt=~/v/){
		print "\n  \$threshold         is $threshold    ";
		print "\n  \$margin            is $margin       ";
		print "\n  \$min_seqblock_leng is $min_seqblock_leng \n\n";
		print "\n  \$connection_gap   is $connection_gap \n\n";
		print "\n  \$connect_opt      is $connect_opt \n\n";
	}

	for($i=0; $i<@hash; $i++){
	   my @range;
	   my %hash_ori=%{$hash[$i]};
	   my @names=keys %hash_ori;
	   if(@names>1){ # If the hash has multi entry, make one added up hash
	      %hash=%{&add_columns(\%hash_ori)};
	      @names=keys %hash;
	   }else{  %hash=%hash_ori;    }
	   for($j=0; $j< @names; $j++){
		  my $string=$hash{$names[$j]};
		  if($string=~/\d{1,5}[ \,]\d{1,5}[ \,]\d{1,5}/){
			 @ar_string =split(/$gap_char|\,/, $string );
		  }elsif($string=~/^\d{4,}$/){ ## the string should be minimum 4 length
			 @ar_string =split(//, $string );
		  }
		  my $largest = ${&get_largest_element(\@ar_string)};
		  my $cut_line=$largest*$threshold;
		  #print "\n \$cutline in get_high_score_blocks is $cut_line \n" if $debug==1;
		  #~~~~~~~~~~~~  Cutting the tops The core algorythm #######
		  for($s=0; $s< @ar_string; $s++){
			 if($ar_string[$s] > $cut_line){ # possible_block is the increasing seqlet
				if(@possible_block == $min_seqblock_leng){
				   while( $ar_string[$s] > $cut_line){
					  $ar_string[$s]=1;
					  while($s+1+$margin > @ar_string){ $margin-- }
					  push(@possible_block, ($s+1+$margin));
					  $margin=$margin_ori;
					  $s++;
				   }
				   $ar_string[$s]=0;  #<--- Should be 0 than 1
				   $block_start_end{$possible_block[0]}=$possible_block[$#possible_block];
				   @possible_block=();
				}else{
				   $ar_string[$s]=1;
				   while(($s+1-$margin) < 0){ $margin-- };
				   push(@possible_block, ($s+1-$margin) );
				   $margin=$margin_ori;
				}
			 }elsif($ar_string[$s] <= $cut_line){
				$ar_string[$s]=0;
				@possible_block=();
			 }
		  }
		  #print "\n", @ar_string,"\n" if $debug==1;
		  #~~~~~~~~~~~~  Cutting the tops The core algorythm #######
	   }
	}
	#print "\n@ar_string\n";
	#&show_hash(\%block_start_end);
	&show_hash(\%block_start_end) if($char_opt=~/v/);

	#~~~~~~~~~~~~  Connecting blocks ~~~~~~~~~~~~~~~~~~~~~~
	if($connect_opt=~/c/){
	   my @keys=sort numerically keys %block_start_end;
	   sub numerically{  $a <=> $b;  }

	   ### sorting the %block_start_end
	   for($i=0; $i< @keys; $i++){
		  push(@block_s_e, $keys[$i], $block_start_end{$keys[$i]});
	   }

	   for($i=1; $i< $#block_s_e; $i++){  ## must be $#block_s_e to stop
		  $first_end   =$block_s_e[$i];   ## before it removes everything
		  $second_start=$block_s_e[$i+1];

		  #""""""" if gap is smaller than connection_gap given """"""""
		  if($connection_gap > ($second_start-$first_end) ){
			 splice(@block_s_e, $i, 2);
			 $i--;
		  }else{
		     $i++; # to skip to the next start correctly
		  }
	   }
	   %block_start_end=@block_s_e;
	}
	print "\n# Blocks start and end after connection(gap was $connection_gap)\n"  if($char_opt=~/v/);

	&show_hash(\%block_start_end)  if($char_opt=~/v/);

	#~~~~~~~~~~~~ Getting the largest ~~~~~~~~~~~~~~~~~~~~~~~
	if($char_opt=~/b/){
	   print "\n# Getting the largest block only to get all the blocks use a opt\n";
	   my @keys=keys %block_start_end;
	   my ($largest, %largest,$range_size, $largest_key);
	   for($i=0; $i< @keys; $i++){
		  $range_size = $block_start_end{$keys[$i]}-$keys[$i];
		  if($range_size > $largest){
			 $largest=$range_size;
			 $largest_key=$keys[$i];
			 #print "\n $largest_key \n";
		  }
	   }
	   $largest{$largest_key}=$block_start_end{$largest_key};
	   %block_start_end=%largest;
	   print "\n# The best block chosen (from to) \n"  if($char_opt=~/v/);
	   &show_hash(\%block_start_end) if($char_opt=~/v/);
	}
	return(\%block_start_end);
}






#______________________________________________________________
# Title     : delbut
# Usage     : delbut *.zip  (delete files except xxxx.zip)
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#--------------------------------------------------------------
sub delbut{
		my ($remove_subdir, $i);
		@save_files=@{$_[0]} || @_;
		$remove_subdir=${$_[1]} || $_;
		for(@save_files){
			 if($_=~/^s\s*$/ and !(-e $_)){ $remove_subdir='s'; next }
			 unless(-e $_){
				 print "\n\n \"$_\" does not exist, so nothing is deleted\n\n";
				 print chr(7);
				 die;
			 }
		}
		my @files=@{&read_dir_and_file_names_only('.')};
		my @del_files=@{&subtract_array(\@files, \@save_files)};

		for($i=0; $i< @del_files; $i++){
			 if(-d $del_files[$i]){
			    if( $remove_subdir=~/s/i){
						 system("rm -fr $del_files[$i]");
					}else{
					   print "\n# subdir $del_files[$i] has not been deleted\n";
					}
			 }else{
			    unlink($del_files[$i]);
			 }
		}
}

#______________________________________________________________________________
# Title     : subtract_hash
# Usage     :
# Function  :
# Example   :
# Keywords  : subtract_hash_by_keys, get_diff_between_2_hashes, subtract_hashes
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub subtract_hash{
    my(%counter, %File_1, %File_2, @keys1, @keys2, @diff, %Hash1_minus_Hash2);
    %File_1=%{$_[0]};
    %File_2=%{$_[1]};
    @keys1=sort keys (%File_1);
    @keys2=sort keys (%File_2);
    grep($counter{$_}++, @keys2 );
    @diff=grep(!$counter{$_}, @keys1);
    foreach(@diff){
       $Hash1_minus_Hash2{$_}=$File_1{$_};
    }
    return(\%Hash1_minus_Hash2);
}

#________________________________________________________________________
# Title     : subtract_array
# Usage     : @subs = @{&subtract_array(\@array1, \@array2)};
# Function  : removes any occurances of certain elem. of the first
#             input array with second input array.
# Example   : Following will produce (A K C);
#		@array1= qw( A B K B B C);
#  		@array2= qw( B E D);
#  		@subs = @{&subtract_array(\@array1, \@array2)};
# Keywords  : array_subtract, substract_array, ary1_minus_ary2
# Options   :
# Returns   :
# Argument  :
# Authors   : jong@biosophy.org
# Version   : 1.4
#--------------------------------------------------------------------
sub subtract_array{
    my(@first, @second, %counter);
    @first=@{$_[0]};
    @second=@{$_[1]};
    grep($counter{$_}++, @second );
    return ( [grep(!$counter{$_}, @first)] );
}


#______________________________________________________________
# Title     : get_mspa_range
# Usage     : @range=@{&get_mspa_range($seqlet)};
#             @temp=&get_mspa_range($seqlet);
#
# Function  :
# Example   :
# Warning   :
# Keywords  : get_mspa_file_ranges
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#--------------------------------------------------------------
sub get_mspa_range{
	 my $lines1=${$_[0]} || $_[0];
	 my ($SEQ, $num_seq, $matched_SEQ, @Ranges);
	 if($lines1 =~/^\s*\d+\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
	  if($SEQ eq $matched_SEQ){ ## skipping self match
		  $num_seq++;
	  }else{
		  @Ranges=($1, $2, $4, $5);  ## <-- example. (10-20, 30-45)
	  }
	 }
	 return wantarray ? (\@Ranges, \$SEQ, \$matched_SEQ): \@Ranges;
}
#______________________________________________________________
# Title     : get_mspa_enquiry_sequence
# Usage     :
# Function  : gets the name of sequence used as enquiry(target)
# Example   :
# Warning   :
# Keywords  : get_mspa_target_sequence, get_mspa_enquiry_sequence_name
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_mspa_enquiry_sequence{
	 my $lines1=${$_[0]} || $_[0];
	 my ($SEQ, $matched_SEQ);
	 if($lines1 =~/^\s*\d+\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
	 }
	 return \$SEQ;
}

#______________________________________________________________
# Title     : get_mspa_matched_sequence
# Usage     :
# Function  : gets the name of sequence used as enquiry(target)
# Example   :
# Warning   :
# Keywords  : get_mspa_matched_sequence_name
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_mspa_matched_sequence{
	 my $lines1=${$_[0]} || $_[0];
	 my ($SEQ, $matched_SEQ);
	 if($lines1 =~/^\s*\d+\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
	 }
	 return \$matched_SEQ;
}


#______________________________________________________________________________
# Title     : get_line_number
# Usage     :
# Function  :
# Example   :
# Keywords  : get_line_number_from_cursor, line_number, get_line_position
# Options   :
# Author    : jong@biosophy.org, from Andrew E. Page, aep@world.std.com
# Category  : Tk
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_line_number{
    my ($self) = @_ ;
    my ($line_num_info) ;

    $line_num_info = $self->index('insert') ; # get the location for the insertion point
    $line_num_info =~ s/\..*$/\.0/ ;
    return \(int $info); # return a reference
} # end of get_line_number


#______________________________________________________________
# Title     : get_linked_sequence
# Function  : opens msp file and links the sequences according
#             to the matches.
# Usage     :
# Example   : seq1 ------------------------------
#                            |||||||||||
#             seq2        --------------------------------
#             OUT  000000000011111111111000000000000000000
#
# Warning   :
# Keywords  : link_sequence_from_mspa_file, linked_sequenced_length
#             get_clustered_sequence_length, get_annexed_sequence_length
#             connect_sequences, merge_sequences, combine_sequences
# Options   : _  for debugging.
#             #  for debugging.
# Returns   : A ref. of an array
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_linked_sequence{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($final_leng, $start_diff,@MSP, %seq_sizes, $final_leng);
				 my ($max_head_overhang, $head_diff, $tail_diff,
		 $off_set, $max_tail_overhang, @LINKED, $LINKED);
	 my $Threshold=40;
	 for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];
	  if($debug eq 1){ print "\n inputfile is $input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_mspa_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  my %seq_sizes=%{&open_mspa_files(\$input_file, '-s')};
	  my @NAmes=keys %seq_sizes;
	  for($s=0; $s< @NAmes; $s++){ # making '000000000000000.....';
		 my $len=$seq_sizes{$NAmes[$s]};
		 for($t=0; $t< $len; $t++){
			${"$NAmes[$s]"}[$t]=0;
		 }
	  }
	  open(FILE_1,"$input_file");
	  my @MSP=<FILE_1>;
	  close(FILE_1);
	  for($j=0; $j<@MSP; $j++){
		 if($MSP[$j]=~/^\s*(\d+)\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\w+)\s+(\d+)\s+(\d+)\s+(\w+)\s+(.+)/i){
			if(($1 >= $Threshold)&& ($4 eq $7)){
			   push(@matched_members, $4);
			}elsif(($1 >= $Threshold)&& ($4 ne $7)){
			   $matched_segment_count++;
			   if($match_name ne $7){  push(@matched_members, $7);  }
			   $query_start=$2-1;  $query_end  =$3-1;
			   $query_seq  =$4;    $match_start=$5-1;
			   $match_end  =$6-1;  $desc       =$8;
			   $match_name =$7;
			   for($x=$query_start; $x<= $query_end; $x++){
				  ${"$query_name"}[$x]++;
			   }
			   for($y=$match_start; $y<= $match_end; $y++){
				  ${"$match_name"}[$y]++;
			   }
			}
		 }
	  }
	  for($j=0; $j<@MSP; $j++){
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 #                 $1                $2     $3    $4      $5     $6    $7     $8
		 #                 171     41.18      6      73  HI1690    9      76  HI0736 sodium...
		 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
		 if($MSP[$j]=~/^\s*(\d+)\s+\d+\.?[e\-\d]*\s+(\d+)\s+(\d+)\s+(\w+)\s+(\d+)\s+(\d+)\s+(\w+)\s+(.+)/i){
			if(($1 >= $Threshold)&& ($4 eq $7)){
			   $query_name=$4;   $query_leng=$3;
			   push(@matched_members, $4);
			}elsif(($1 >= $Threshold)&& ($4 ne $7)){
			   $matched_segment_count++;
			   if($match_name ne $7){  push(@matched_members, $7);  }
			   $query_start=$2-1;  $query_end  =$3-1;
			   $query_seq  =$4;    $match_start=$5-1;
			   $match_end  =$6-1;  $desc       =$8;
			   $match_name =$7;
			}
			@matched_seq_array= @{"$match_name"};
			$matched_seq_array=join('', @matched_seq_array);
			my $start_diff= $query_start - $match_start;
			if($start_diff >= 0){
			   my $tail_diff= $start_diff + $seq_sizes{$match_name} - $seq_sizes{$query_name};
			   if($tail_diff > 0){
				  $max_tail_overhang = $tail_diff if $tail_diff > $max_tail_overhang;
				  for($x=0; $x< $tail_diff; $x++){
					 $tail_start=$seq_sizes{$match_name}-$tail_diff + $x;
					 @matched_seq_array=split(//,$matched_seq_array);
					 $tail_array[$x] +=$matched_seq_array[$tail_start];
				  }
			   }
			}elsif($start_diff < 0){
			   $head_diff = abs($start_diff);
			   $max_head_overhang=$head_diff if $head_diff > $max_head_overhang;
			   for($z=0; $z< $head_diff; $z++){
				  $head_array[$z] += ${"$match_name"}[$z];
			   }
			}
		 }
	  }
	 }
	 @LINKED=( @{"$match_name"}[0..($max_head_overhang-1)], @{"$query_name"}, @tail_array);
	 $LINKED=join('', @LINKED);
	 if($debug eq 1){
	 print __LINE__, " In open_mspa_files \%sequence is", %sequence ,"\n";
	 }
	 $final_offset=$extened_number_line - $query_leng;
	 return(\@LINKED);
}

#______________________________________________________________________________
# Title     : get_averaged_prediction
# Usage     : %av_of_forw_backw_pred=%{&get_averaged_prediction(\%sec1, \%sec1_rv)};
# Function  : The content of out %average is
#               $averaged{$position}=[$residue1, $sec_str2, $dif_reliability];
# Example   :
# Keywords  : get_average_predator_prediction, average_predator_prediction
#             get_averaged_sec_prediction get_average_prediction, give_weights
# Options   :
#   $ignore_non_matching_residues=i by i  ## do not predict non-matching for backward pred.
#   $reverse_order_of_one_hash=r by r
#   $give_weight_with_good_match=w by w # this is to give preference to well
#   $weight_factor= by w=
#                                        matching sec. str. I add '0.1'
# Author    : jong@biosophy.org sat@mrc-lmb.cam.ac.uk
# Version   : 2.1
#------------------------------------------------------------------------------
sub get_averaged_prediction{
		 #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		 my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		 my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		 my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		 my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		 my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		 if($debug==1){print "\n\t\@hash=\"@hash\"
		 \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		 \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

		 my($position, $residue1, $residue2, %averaged, %hash1, %hash2, $sec_str1,
				$sec_str2, $sum_reliability, $average_reliability, $dif_reliability,
				$reverse_order_of_one_hash, $give_weight_with_good_match, $weight_factor,
				$mismatched_prediction_count, $matched_prediction_count,
				%mismatch_stats, $num_of_residues, $ignore_non_matching_residues);
		 $weight_factor=1.8; # default positive weight factor which is really small.

		 if($char_opt=~/r/){  $reverse_order_of_one_hash='r';
				 print "\n# (i) get_averaged_prediction : \$reverse_order_of_one_hash is $reverse_order_of_one_hash";
		 }
		 if($char_opt=~/w/){  $give_weight_with_good_match='w';     $weight_factor=1.8; }
		 if($char_opt=~/i/){  $ignore_non_matching_residues='i';    }
		 if($vars{'w'}=~/(\S+)/){ $give_weight_with_good_match='w'; $weight_factor=$1 }
		 $input_hash_num=@hash;

		 print "\n#    (i) get_averaged_prediction: The number of hashes to get_averaged_prediction was $input_hash_num\n";
		 for($i=0; $i< @hash; $i++){
				 unless(%averaged > 2){
							%hash1=%{$hash[$i]};
							%hash2=%{$hash[$i+1]};
							$i++;
				 }elsif( %averaged ){
							%hash1=%averaged;
							%hash2=%{$hash[$i]};
							print "\n   # (i) Iteratively averaging hash $i (counted from 0) with previous average\n";
				 }

				 $num_of_residues=@keys=sort { $a <=> $b} keys %hash1;
				 for($k=0; $k< @keys; $k++){
							$position      =$keys[$k]; ## in case of predator prediction, key is the position of residue
							$rev_posi      =@keys-$position+1;

							$residue1      =$hash1{$position}->[0];
							if($reverse_order_of_one_hash){
								 $residue2      =$hash2{$rev_posi}->[0];
							}else{
								 $residue2      =$hash2{$position}->[0];
							}

							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``~
							# (1)This is for Identical sequence averaging with REVERSING option (for and backward forward perhaps)
							#____________________________________________________________________________________________
							if($residue1=~/$residue2/i){ # if they are the same aa
									 $sec_str1      =$hash1{$position}->[1]; ## usually one of 'c h e'
									 if($reverse_order_of_one_hash){
											$sec_str2      =$hash2{$rev_posi}->[1];
									 }else{
											$sec_str2      =$hash2{$position}->[1];
									 }

									 #print "# ($position) $sec_str1 $sec_str2, $residue1 $residue2 ";

									 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									 # If both predictions are matching  H->H and E->E cases
									 #________________________________________________________
									 if($sec_str1=~/$sec_str2/i){
											 $matched_prediction_count++;
											 if($sec_str1=~/^[eEhHcC]$/i){
													 $sum_reliability = ($hash1{$position}->[2] + $hash2{$rev_posi}->[2] + $weight_factor/3);
													 $average_reliability=($sum_reliability/2) * $weight_factor;  # usually 0.1
													 $averaged{$position}=[$residue1, $sec_str1, $average_reliability];
													 #print " $sum_reliability, $average_reliability\n";
													 $average_reliability=$sum_reliability='';
											 }else{
													 $sum_reliability = ($hash1{$position}->[2] + $hash2{$rev_posi}->[2]);
													 $average_reliability=($sum_reliability/2) * $weight_factor;
													 $averaged{$position}=[$residue1, $sec_str1, $average_reliability];
													 $average_reliability=$sum_reliability='';
											 }
									 }
									 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
									 # If both predictions are NOT matching eg)  E->H, C-H, E-C,,,
									 #________________________________________________________________
									 else{
											 if ($sec_str1 =~/h/i){
													 if($sec_str2 =~/e/i){     $mismatch_stats{'H_E_mismatch'}++;
													 }elsif($sec_str2 =~/c/i){ $mismatch_stats{'H_C_mismatch'}++;
													 }
											 }elsif($sec_str1 =~/e/i){
													 if($sec_str2 =~/h/i){     $mismatch_stats{'E_H_mismatch'}++;
													 }elsif($sec_str2 =~/c/i){ $mismatch_stats{'E_C_mismatch'}++;
													 }
											 }elsif($sec_str1 =~/c/i){
													 if($sec_str2 =~/h/i){     $mismatch_stats{'C_H_mismatch'}++;
													 }elsif($sec_str2 =~/e/i){ $mismatch_stats{'C_E_mismatch'}++;
													 }
											 }
											 $mismatched_prediction_count++;
											 $dif_reliability =$hash1{$position}->[2];
											 if($ignore_non_matching_residues){
													 $averaged{$position}=[$residue1, '_', $dif_reliability];
											 }else{
													 $averaged{$position}=[$residue1, $sec_str2 , $dif_reliability];
											 }
											 #$averaged{$position}=[$residue1, $sec_str1, $dif_reliability];
											 #print "  << $dif_reliability, $dif_reliability\n";
											 $dif_reliability=$sum_reliability='';
									 }
							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
							# (2) This is for NOT identical sequences averaging. From multi alignment perhaps
							#_________________________________________________________________________________
							}else{
									 #print "   # (i) $position: $residue1: Not reversing the order here, From multi alignment perhaps\n";
									 print chr(7), "\n# (W) Somehow the residues are not identical: $residue1 $residue2\n\n";
									 sleep (4);
									 $sec_str1      =$hash1{$position}->[1]; ## usually one of 'c h e'
									 if($reverse_order_of_one_hash){
											$sec_str2      =$hash2{$rev_posi}->[1];
									 }else{
											$sec_str2      =$hash2{$position}->[1];
									 }

									 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									 # If both predictions are matching eg)  H->H
									 #________________________________________________
									 if($sec_str1=~/$sec_str2/i){
											 $sum_reliability = ($hash1{$position}->[2] + $hash2{$rev_posi}->[2]);
											 if($residue1 ne $residue2){
													 $average_reliability=($sum_reliability/2) * $weight_factor;  # usually 0.1
											 }else{
													 $average_reliability=($sum_reliability/2) * $weight_factor;  # usually 0.1
											 }
											 $averaged{$position}=[$residue1, $sec_str1, $average_reliability];
											 print "# ($position) $sec_str1 $sec_str2, $residue1 $residue2, $sum_reliability, $average_reliability\n";
											 $average_reliability=$sum_reliability='';

									 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									 # If both predictions are NOT matching eg)  E->H
									 #________________________________________________
									 }else{

											 $average_reliability = ($hash1{$position}->[2]);
											 if($ignore_non_matching_residues){
													 $averaged{$position}=[$residue1, '_', $average_reliability];
											 }else{
													 $averaged{$position}=[$residue1, $sec_str2 , $average_reliability]; ##<--- Watch out
											 }
											 $average_reliability=$sum_reliability='';
									 }

							}
				 }
		 }
		 $averaged{'mismatched_prediction_count'}=$mismatched_prediction_count;
		 $averaged{'matched_prediction_count'}   =$matched_prediction_count;
		 $averaged{'mismatch_stats'}={%mismatch_stats};
		 $averaged{'num_of_residues'}=$num_of_residues;
		 return(\%averaged);
}



#______________________________________________________________
# Title     : get_average_sequence_size
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : get_av_sequence_size, get_average_seq_size
#             get_av_seq_size, average_seq_size, av_seq_size
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_average_sequence_size{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my( @OUT_AV, $size, @size, @names, %hash, $sum, $av);
	 for($i=0; $i<@hash; $i++){
	 %hash=%{$hash[$i]};
	 @names=keys %hash;
	 for($j=0; $j<@names; $j++){
		$size=length($hash{$names[$j]});
		push(@size, $size);
	 }
	 for($j=0; $j<@size; $j++){
		$sum+=$size[$j];
	 }
	 $av=int($sum/@names);
	 push(@OUT_AV, $av);
	 }
	 wantarray ? \@OUT_AV : \$OUT_AV[0];
}


#______________________________________________________________
# Title     : get_linux_kernel_version
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : get_kernel_version, kernel_version,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_linux_kernel_version {
	my($image, $version, $i);
	for($i=0; $i< @_; $i++){
		 if(ref($_[$i]) eq 'SCALAR'){
				$image  = ${$_[$i]};
		 }else{
				$image  = $_[$i];
		 }
		 unless(defined($image)){
				if(-e '/vmlinuz'){
					 $image='/vmlinuz';
				}elsif(-e '/boot/vmlinuz'){
					 $image='/boot/vmlinuz';
				}elsif(-f '/boot/bvmlinux' ){
					 $image='/boot/bvmlinux';
				}
		 }
		 print "\n# The final chosn \$image is $image\n";
		 my($str)             = "phlogiston";
		 my($version_start)   = 0;
		 my($version_length)  = 10;

		 open(DATA, $image) or return(undef);
		 #seek(DATA, $version_start, 0);
		 while(<DATA>){
				if(/(\d+\.\d+\.\d+)/){
						$version=$1;
						push(@versions, $version);
						last;
				}
		 }
		 close(DATA);
	}
	if(@versions > 1){
			return(\@versions);
	}else{
			return(\$version)
	}
}



#______________________________________________________________
# Title     : load_mount_info
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub load_mount_info {
	undef %mounted;
	undef %fs_type;

	open(MTAB, "</etc/mtab") or die "Can't read /etc/mtab: $!\n";
	while (<MTAB>) {
	my($dev, $mp, $type) = split;
	next if $dev eq 'none';
	$mounted{$dev} = $mp;
	$mounted{$mp}  = $dev;
	$fs_type{$dev} = $type;
	}
	close(MTAB);
}



#______________________________________________________________
# Title     : plot_vertically
# Usage     : &plot_vertically(\@query);
# Function  : This is a sub used for plot_domains.pl for
#             genome_analysis
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub plot_vertically{
	@numbers=@{$_[0]};
	print "\n    |===================================================\>\n";
	for($i=0; $i<@numbers;$i++){
	 printf ("%-4d\|", $i);
	 print "\*"x$numbers[$i], "\n";
	}
	print "    |===================================================\>\n";
}

#______________________________________________________________
# Title     : plot_histogram_horizontally
# Usage     : &plot_horizontally(\@query);
# Function  :
# Example   :
#  Input: $input= '00001111111113333333333444444444111111111111111';
#
#  Output:
#   00001111111113333333333444444444111111111111111
#   1-------------------------------------------47
#  |
#  |
#  |                       *********
#  |             *******************
#  |             *******************
#  |    *******************************************
#  |-----------------------------------------------
#
# Warning   :
# Keywords  : plot_horizontally, plot_numbers_horizontally, plot,
#             plot_numbers,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub plot_histogram_horizontally{
	my @numbers=@{$_[0]};
	my $leng= @numbers;
	my ($largest, @inversed, $m, $i);
	for($i=0; $i< @numbers; $i++){
	$largest=$numbers[$i] if $largest < $numbers[$i];
	}
	for($i=0; $i< @numbers; $i++){ # this inverse the digits
	$inversed[$i]=abs($numbers[$i]-$largest);
	}
	print "\n ", @numbers;
	print "\n 1", "\-"x($leng-4),$leng;
	print "\n\|";
	print "\n\|";

	for($m=0; $m< $largest; $m++){
	print "\n\|";
	for($i=0; $i<@inversed;$i++){
	   if($inversed[$i] > 0){
		 print " ";
		 $inversed[$i]--;
	   }else{
		 print "\*";
	   }
	}
	}
	print "\n\|", "\-"x@numbers;
	print "\n";
}




#______________________________________________________________
# Title     : condense_number_string
# Usage     :
# Function  : condenses the numbers by making an average with
#             given factor. If the factor is 2 on number seq
#              1334284425 , result will be 23543
#              133428442  ,                23541 <-- preserved end
#             Factor 3 =>
#              133428442  , (1+3+3)/3 = 2
#                           (4+2+8)/3 = 4,,,
# Example   : @output=@{&condense_number_string(\@input, $factor)};
#             with @input=qw(1 2 4 10 10 22 2 3 44 2 3); and $factor=3
# Warning   :
# Keywords  : compact_number_string, compact_digits, condense
#             condense_string
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub condense_number_string{
	my @ARRAY=@{$_[0]};
	my $factor = ${$_[1]} || $_[1];
	my ( $i, $j, @out );
	unless(defined($factor)){ $factor=1 }
	for($i=0; $i< @ARRAY; $i+=$factor){
	  my $temp_sum;
	  for($j=$i; $j < ($factor+$i); $j++){
		$temp_sum+=$ARRAY[$j]
	  }
	  push(@out, int($temp_sum/$factor) );
	}
	return(\@out);
}


#___________________________________________________________
# Title     : get_seq_fragments
# Usage     : @seq_frag=&get_seq_fragments(\%msf, @RANGE);
# Function  : gets sequence(string) segments with defined
#             ranges.
# Example   :
#  %test=('seq1', '1234AAAAAAAAAAAaaaaa', 'seq2', '1234BBBBBBB');
#  @range = ('1-4', '5-8');
#
#  %out = %{&get_seq_fragments(\%test, \@range)};
#  %out => (seq1_5-8   AAAAA
#           seq2_5-8   BBBBB
#           seq1_1-4    1234
#           seq2_1-4    1234 )
#
# Warning   :
# Keywords  : get_sequence_fragments,
# Options   : _  for debugging.
#             #  for debugging.
#             l=  for min seqlet length
#             r  for adding ranges in the seq names
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.8
#-------------------------------------------------------
sub get_seq_fragments{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my $min_seqlet_size=10;
	 my @vars=keys %vars;
	 my $no_range_in_name=1;
	 for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'l'){
		  $min_seqlet_size=$vars{$vars[$i]};
	   }
	 }
	 if($char_opt=~/v/){ print "\n \$char_opt is $char_opt  @char_opt\n"; }
	 if($char_opt=~/n/){ $no_range_in_name = 1 }
	 if($char_opt=~/r/){ $no_range_in_name = 0 }

	 print "\nget_seq_fragments \$no_range_in_name is $no_range_in_name \n";
	 for($i=0; $i< @hash; $i++){
	 my (%out_frag, $frag_name, $range_start, $range_end, @out_hash);
	 my %seqs = %{$hash[$i]};
	 my @names = keys %seqs;
	 if(@names==1){
	    for($j=0; $j < @names; $j++){
		   my $seq_name = $names[$j];
		   my $seq = $seqs{$seq_name};
		   for($k=0; $k< @range; $k++){
			  my $range = $range[$k];
			  if($no_range_in_name==1){
				 $frag_name = "$seq_name";
			  }else{
			     $frag_name = "$seq_name\_$range";
			  }
			  #if(length($frag_name)>14 ){
			  #	 $frag_name ='x'."${j}_${range}";
		      #}
			  ($range_start, $range_end)=$range=~/(\d+\.?\d*)\-(\d+\.?\d*)/;
			  my $frag_len = $range_end-$range_start+1;
			  if($frag_len < $min_seqlet_size){
			     next;
			  }
			  my $fragment = substr($seq, $range_start-1, $frag_len);
			  $out_frag{$frag_name}=$fragment;
		   }
		}
		push(@out_hash,  \%out_frag);
	 }elsif(@names > 1){
	    for($k=0; $k< @range; $k++){
		  my %out_frag=();
	      my $range=$range[$k];
		  ($range_start, $range_end)=$range=~/(\d+\.?\d*)\-(\d+\.?\d*)/;
	      my $frag_len = $range_end-$range_start+1;
		  if($frag_len < $min_seqlet_size){
		     next;
		  }
	      for($j=0; $j < @names; $j++){
	         my $seq_name=$names[$j];
			 my $seq = $seqs{$seq_name};
		     if($no_range_in_name==1){
				 $frag_name = "$seq_name";
			 }else{
			     $frag_name = "$seq_name\_$range";
			 }
			 #if(length($frag_name)>15 ){
			 #	$frag_name ='x'."${j}_${range}";
		     #}
			 if($range_start==0){ $range_start++; } ## This is a bugfix
			 my $fragment = substr($seq, $range_start-1, $frag_len);
			 $out_frag{$frag_name}=$fragment;
		  }
		  push(@out_hash, \%out_frag);
		}
	 }
	 }
	 if(@out_hash > 1){ return(@out_hash)
	 }elsif(@out_hash==1){ return($out_hash[0]) }
}




#________________________________________________________________________
# Title     : make_standalone_subroutines
# Usage     : &make_standalone_subroutines(@ARGV);
# Example   : &make_standalone_subroutines(@ARGV);
# Function  : Creates each subroutine derived xxx.pl file from Bio.pl or any
#             given library file. If there is a file for a sub already, it
#             skips.
# Class     : Utility
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------
sub make_standalone_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($each_sub, %out_subs, %left_out, @lib, $ver, $real_sub_entry_found);
	$|=1;
	for($i=0; $i < @file; $i++){
		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		my @lib =<LIB_FILE>;
		FOR: for($j=0; $j < @lib; $j++){
				  my (%out_subs, $each_sub);
				  my $title_found;
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#+[_\-\*]{10,120} *$/)
					 &&($lib[$j+1]=~/^(#\s*title\s*:\s*([\w\-]+))[^\.pl]/i) ){
					  $each_sub=$2;
					  $title_found =1;
					  if( (-s "$each_sub\.pl") > 200 ){
						  print (-s "$each_sub\.pl"), "   ";
						  print "  $each_sub", " exists \n";
						  next FOR;
					  }elsif((-s "$each_sub\.pl") <= 200){
					     my $temp;
					     open (TEMP, "<$each_sub\.pl");
					     while(<TEMP>){
					        if(/^#[_\-\*]{10,120}\s*$/){ $temp++ }
					        elsif(/^#\s*title\s*:\s*[\w\-]+[^\.pl]/i ){
					           $temp++;
					        }elsif(/^#\s*\w+/){
					           $temp=$temp+0.5;
					        }
					     }
					     if($temp >2){
					        next FOR;
					     }
					  }

					  $out_subs{"$each_sub"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub\s*\w+\s*\{/)||($lib[$j]=~/^#---+\s*$/) ||
							 ($lib[$j]=~/^#_____+\s*$/) || ($lib[$j]=~/^#\*\*+\s*$/) ){
							 $lib[$j]=~s/(\s*)$//;  #<-- removing ending space
							 $out_subs{"$each_sub"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;    ## essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if( ($title_found==1)&&($lib[$j]=~/^sub\s+([\w\-]+)\s*\{/) ){
				     $each_sub=$1;
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  if($lib[$j]=~/^sub\s+([\w\-]+)\s*\{.+\}/){
						  goto WRITE;
					  }
					  $j++;
					  until($lib[$j]=~/^\}/){
					     $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'
					  $j++;

					  WRITE:
				     open (EACH_FILE, ">$each_sub\.pl");
				     print EACH_FILE  "#\!\/perl\n";
				     print EACH_FILE  "# Made by $0 at: ", `date`, "\n";
				     print EACH_FILE $out_subs{$each_sub};
		           close EACH_FILE;
		           %out_subs=();
		           #chmod

				  }
			  }
	}#""""""""""""" end of for (@file)
	close LIB_FILE;
}



#___________________________________________________________
# Title     : is_html
# Usage     :
# Function  : Checks if it is an html file.
# Example   : $html=&is_html(\@test);
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-------------------------------------------------------
sub is_html{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my $html=0;
	if( @string >0 ){
	 for($i=0; $i<@string; $i++){
		if($string[$i]=~/^[.\n]{0, 100}\<\s*HTML\s*\>/i){
			$html_head=1;
		}if($string[$i]=~/[.\n]+\<\s*\/HTML\s*\>[\n.]{0,100}$/i){
			$html_end=1;
		}
	 }
	 if( ($html_head eq $html_end)&&($html_end=1)){
		$html=1;
	 }
	 if($debug==1){ print "\n \@string is @string\n"; }
	}elsif(@file>0){
	 for($i=0; $i< @file; $i++){
		my $all_lines;
		open(F, "$file[$i]");
		while(<F>){
		  $all_lines.=$_;
		}
		print "\n All the lines of $file[$i] is $all_lines\n" if $debug==1;
		if($all_lines =~/\<\s*HTML\s*\>/i){
		  if($all_lines=~/\<\s*\/\s*HTML\s*\>/i){
			  $html=1;
			  print "\n html matched $html\n" if $debug ==1;
		  }
		}
	 }
	}elsif( @array>0 ){
	 @arr = @{$array[$i]};
	 for($i=0; $i< @arr; $i++){
		 if($arr[$i]=~/^[.\n]{0, 100}\<\s*HTML\s*\>/i){
			$html_head=1;
		 }if($arr[$i]=~/[.\n]+\<\s*\/HTML\s*\>[\n.]{0,100}$/i){
			$html_end=1;
		 }
	 }
	 if( ($html_head eq $html_end)&&($html_end=1) ){
		$html=1;
	 }
	}elsif(@hash>0){
	 for($i=0; $i< @hash; $i++){
		@hash=%{$hash[$i]};
		for($i=0; $i< @hash; $i++){
			if($hash[$i]=~/^[.\n]{0, 100}\<\s*HTML\s*\>/i){
			  $html_head=1;
			}if($hash[$i]=~/[.\n]+\<\s*\/HTML\s*\>[\n.]{0,100}$/i){
			  $html_end=1;
			}
		}

	 }
	}
	return($html);
}

#___________________________________________________________________
# Title     : get_sum_of_columns
# Usage     : &get_sum_of_columns(\@ar, 1,2 ,3);
#             &get_sum_of_columns(\%ha, 1,2 ,3);
#             &get_sum_of_columns(@ARGV);
#             # where prompt is like: column.pl temp.txt 1 2 3 4
# Function  : Prints any specified columns, can change order of them,
#             can filter values of columns to filter (max or min value)
#             Skipps blank line.
# Example   : For getting only necessary columns
#             Input: %Hash=(1, 'col1 col2 col3',
#                           2, 'col1 col2 col3',
#                           3, 'col1 col2 col3');
#             input format: &get_sum_of_columns(\%Hash, 3,2,1, 'k'); # k is opt
#             Ouput format: STDOUT as
#
#                1     col3 col2 col1
#                2     col3 col2 col1
#                3     col3 col2 col1
#
# Keywords  : columns, column.pl, column, get_sum_of_columns, take_columns,
# Options   : #  for debugging.
#             _  for debugging.
#             k  for Key print when hash input is given.
#             n  for no first line display(Handy when you have title line
#                                          and wanna remove it)
#             ?max?=xxx for filtering column numbers by maximum of xxx
#             ?min?=yyy for filtering column numbers by minimum of yyy
#                      (eg, min4=100000 means 4th column minimum is 100000)
#                      (eg, 1min4=10, 2min3=10, means get 4th column values
#                           below 10 as the first output column. Get 3rd
#                           column values below 10 as the second out column.
#
#  $combine           =1  by -c c   # c is for combining columns in different file
#  $ignore            =1  by -i i   # i is for ignoring leng diff in columns over
#  $reverse_line_order=r by r -r
#
# Returns   : Ref of
# Argument  : Ref of Hash, Array or just filename, and wanted column numbers.
# Category  :
# Version   : 1.7
#---------------------------------------------------------------
sub get_sum_of_columns{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my (%whole, %sum_of_colmuns, $previous, @out, @columns);
    my $len =4;
    my @v_keys= sort keys %vars; ## To be able to exchange order of column.
    if($char_opt=~/i/i){
        $ignore =1;
    }elsif($char_opt=~/c/i){
        $combine=1;
    }
    if($char_opt=~/r/){ $reverse_line_order='r' }

    if(@v_keys > 0){
       for($i=0; $i< @v_keys; $i++){
          if($v_keys[$i]=~/\d*(m..)(\d+)/i){
         $M=$1.$2;
         ${"$1$2"}= $vars{$&};
             push(@columns, $2);
          }
       }
    }else{
       @columns=@num_opt;
    }
    my $troubled_column ;

    if((@num_opt==0)&&(@file>1)){ $combine=1 }; # when no column num. is given assume $combine

    #""""""""""" When combine option is set """"""""""""""""""""""
    if(($combine==1)&&(@file > 1)){
       for ($f=0; $f<@file; $f++){
        open(IN, "$file[$f]"); # real showing is now.
        my @all_lines=<IN>;
        if((@all_lines != $previous)&&($ignore !=1)){
            print "\n The column lengths do not match in the inputs\n";
            print "\n you can use -i option \n";
            exit;
        }
        $previous=@all_lines;
        for($w=0; $w< @all_lines; $w++){
            if($all_lines[$w]=~/^[\t ]*$/){ next }  # skipping blank line
            chomp($all_lines[$w]);
            $out[$w].="$all_lines[$w] ";
        }
        close IN;
       }
       push(@array, \@out);
    }

    ###### File is given as input #######""""""""""""""""""""""""""""""""""""""
    if((@file >=1)&&(@array < 1)){
       my $file;
       for $file(@file){
         my ($line_num, $line_read, $change, $line_counter, @all_lines);
         $change=0.1;
         open(IN, "$file");
         @all_lines=<IN>;
         for($q=0; $q < @all_lines; $q++){ # This open is only for getting largest column width size
             $line_read++;
             @splited=split(/\s+/, $all_lines[$q]);
             $l=${&get_longest_str_size(\@splited)};
             if($l>$len){ $len=$l; $change++ }
             if( ($line_read/$change) > 50 ){ last } # this is to check the column
         }                                          # consistency and stops after some
         close(IN);

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # Reverse LINE order if -r option is set
         #_____________________________________________
         if($reverse_line_order){
            @all_lines=reverse(@all_lines);
         }
         for($x=0; $x < @all_lines; $x++){
            if($all_lines[$x]=~/^[\s\#]*$/){ next }  # skipping blank line and comment lines
            if($all_lines[$x]=~/^\s{1,3}#/){ next }  # skipping  comment lines
            $line_counter++;
            if(($char_opt=~/n/i)&&($line_counter==1)){ next } ## NO title #
            if(@M=$all_lines[$x]=~/([\-\.\d]+)/g){
               if(@columns < 1){
                 for($n=0; $n< @M; $n++){  $columns[$n]=$n+1;    }
               }
               for($r=0; $r< @columns; $r++){ # columns is from num_opt which is given at prompt (like 3 2 1)
                  $col =$columns[$r]-1;  #
                  if( defined( ${"max$col"} ) &&    # when max or min is defined
                       defined( ${"min$col"} ) ){
                        if( ( ${"max$col"}  > $M[$col])&&
                       ( ${"min$col"}  < $M[$col]) ){
                             printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                        }else{
                             printf "%-${len}s ";
                        }
                  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
                        if(  ${"max$col"} > $M[$col] ){
                             printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                        }else{
                             printf "%-${len}s ";
                        }
                  }elsif( defined(${"min$col"}) ){
                        if(  ${"min$col"} < $M[$col] ){
                            printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                        }else{
                            printf "%-${len}s ";
                        }
                  }else{
                        printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                  }
                  $sum_of_colmuns{$col} +=$M[$col];
              }
              print "\n";

           }
        } #for
      }
    }    ###### Array is given as input #######""""""""""""""""""""""""""""""""""
    elsif(@array>0){  # if input is ('x y xx y','k t yy zz',,,,)
      for($t=0; $t<@array; $t++){
         my @arr=@{$array[$t]};
         #print "\n Array input\n" if $debug ==1;
         for($i=0; $i<@arr;$i++){
           @splited=split(/\s+/,$arr[$i]);
           $l=${&get_longest_str_size(\@splited)};
           $len=$l if $l>$len;
        }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Reverse LINE order if r option is set
        #_____________________________________________
        if($reverse_line_order){
           @arr=reverse(@arr);
        }
        for($i=0; $i< @arr; $i++){
            if($arr[$i]=~/^[\t ]*$/){ splice(@arr, $i, 1); $i--; next }
            if(($char_opt=~/n/i)&&($i==0)){ next } # skipping the first line
            if(@M=$arr[$i]=~/(\S+)/g){
               if(@columns < 1){
                  for($n=0; $n< @M; $n++){
                     $columns[$n]=$n+1;
                  }
               }
               for($j=0; $j< @columns; $j++){
                  $col =$columns[$j]-1;  #

                  if( defined( ${"max$col"} ) &&    # when max or min is defined
                        defined( ${"min$col"} ) ){
                        if( ( ${"max$col"}  > $M["$col"] )&&
                       ( ${"min$col"}  < $M[$col]) ){
                             printf "%-${len}s ",$M["$col"]  unless($M["$col"]  eq '');
                        }else{
                             printf "%-${len}s ";
                        }
                  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
                        if(  ${"max$col"} > $M["$col"]  ){
                             printf "%-${len}s ",$M["$col"]  unless($M["$col"]  eq '');
                        }else{
                             printf "%-${len}s ";
                        }
                  }elsif( defined(${"min$col"}) ){
                        if(  ${"min$col"} < $M["$col"]  ){
                            printf "%-${len}s ",$M["$col"]  unless($M["$col"] eq '');
                        }else{
                            printf "%-${len}s ";
                        }
                  }else{
                        printf "%-${len}s ",$M["$col"] unless($M["$col"] eq '');
                  }
                  $sum_of_colmuns{$col} +=$M[$col];
               }
               print "\n";
            }
        }
      }
    }  ##### Hash is given as input #######""""""""""""""""""""""""""""""""""
    elsif(@hash>0){
      my @arr;
      for($h=0; $h<@hash; $h++){
          my @array=values %{$hash[$h]};
          my @keys =keys %{$hash[$h]};
          for($i=0; $i< @array; $i++){ # getting the longest str size
             @arr=split(/\s+/,$array[$i]);
             $l=${&get_longest_str_size(\@arr)};
             $len=$l if $l>$len;
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Reverse LINE order if r option is set
          #_____________________________________________
          if($reverse_line_order){
             @array=reverse(@array);
          }
          for($i=0; $i< @array; $i++){
            if($array[$i]=~/^[\t ]*$/){ splice(@array, $i, 1); $i--; next }
            if(($char_opt=~/n/i)&&($i==0)){ next } #  skipping the first line
            printf "%-10s", $keys[$i] if($char_opt=~/k/i); ## Option for key printing
             if(@M=$array[$i]=~/(\S+)/g){
               if(@columns < 1){
                  for($n=0; $n< @M; $n++){
                     $columns[$n]=$n+1;
                  }
               }
               for($j=0; $j< @columns; $j++){

                  $col =$columns[$j]-1;

                  if( defined( ${"max$col"} ) &&    # when max or min is defined
                        defined( ${"min$col"} ) ){
                        if( ( ${"max$col"}  > $M[$col])&&
                       ( ${"min$col"}  < $M[$col]) ){
                             printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                        }else{
                             printf "%-${len}s ";
                        }
                  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
                        if(  ${"max$col"} > $M[$col] ){
                             printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                        }else{
                             printf "%-${len}s ";
                        }
                  }elsif( defined(${"min$col"}) ){
                        if(  ${"min$col"} < $M[$col] ){
                            printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                        }else{
                            printf "%-${len}s ";
                        }
                  }else{
                        printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
                  }
                  $sum_of_colmuns{$col} +=$M[$col];
               }
               print "\n";
            }
          }
      }
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Writing down the SUM line
    #___________________________________________________
    print "The sum of the columns\' numbers only is:\n\n";
    for($r=0; $r< @columns; $r++){
       $col =$columns[$r]-1;  #
       printf "%-${len}s ", $sum_of_colmuns{$col};
    }
    print "\n\n";
}




#___________________________________________________________________
# Title     : get_column
# Usage     : &get_column(\@ar, 1,2 ,3);
#             &get_column(\%ha, 1,2 ,3);
#             &get_column(@ARGV);
#             # where prompt is like: column.pl temp.txt 1 2 3 4
# Function  : Prints any specified columns, can change order of them,
#             can filter values of columns to filter (max or min value)
#             Skipps blank line.
# Example   : For getting only necessary columns
#             Input: %Hash=(1, 'col1 col2 col3',
#                           2, 'col1 col2 col3',
#                           3, 'col1 col2 col3');
#             input format: &get_column(\%Hash, 3,2,1, 'k'); # k is opt
#             Ouput format: STDOUT as
#
#                1     col3 col2 col1
#                2     col3 col2 col1
#                3     col3 col2 col1
#
# Keywords  : columns, column.pl, column, get_columns, take_columns,
# Options   : #  for debugging.
#             _  for debugging.
#             k  for Key print when hash input is given.
#             n  for no first line display(Handy when you have title line
#                                          and wanna remove it)
#             ?max?=xxx for filtering column numbers by maximum of xxx
#             ?min?=yyy for filtering column numbers by minimum of yyy
#                      (eg, min4=100000 means 4th column minimum is 100000)
#                      (eg, 1min4=10, 2min3=10, means get 4th column values
#                           below 10 as the first output column. Get 3rd
#                           column values below 10 as the second out column.
#
#  $combine           =1  by -c c   # c is for combining columns in different file
#  $ignore            =1  by -i i   # i is for ignoring leng diff in columns over
#  $reverse_line_order=r by r -r
#  $get_SUM_of_columns=S by S
#
# Returns   : Ref of
# Argument  : Ref of Hash, Array or just filename, and wanted column numbers.
# Category  :
# Version   : 1.9
#---------------------------------------------------------------
sub get_column{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my (%whole, $previous, @out, @v_keys, %sum_of_colmuns, @columns, $get_SUM_of_columns, $len,
        $file, $q, $x, $troubled_column, $max_column_number );
    $len =4;
    @v_keys= sort keys %vars; ## To be able to exchange order of column.
    if($char_opt=~/i/i){      $ignore =1; }
    if($char_opt=~/c/i){  $combine=1;    }
    if($char_opt=~/r/){ $reverse_line_order='r' }
    if($char_opt=~/S/){ $get_SUM_of_columns='S' }
    if(@v_keys > 0){
       for($i=0; $i< @v_keys; $i++){
          if($v_keys[$i]=~/\d*(m..)(\d+)/i){
         $M=$1.$2;
         ${"$1$2"}= $vars{$&};
             push(@columns, $2);
          }
       }
    }else{
       @columns=@num_opt;
    }

    $max_column_number=${&get_largest_element(\@columns)};

    if((@num_opt==0)&&(@file>1)){ $combine=1 }; # when no column num. is given assume $combine

    #""""""""""" When combine option is set """"""""""""""""""""""
    # Combine means combining 2 file columns
    #_____________________________________________________________
    if($combine==1  and  @file > 1){
       for ($f=0; $f<@file; $f++){
        open(IN, "$file[$f]"); # real showing is now.
        my @all_lines=<IN>;
        if((@all_lines != $previous)&&($ignore !=1)){
            print "\n The column lengths do not match in the inputs\n";
            print "\n you can use -i option \n";
            die;
        }
        $previous=@all_lines;
        for($w=0; $w< @all_lines; $w++){
            if($all_lines[$w]=~/^\s*$/){ next }  # skipping blank line
            chomp($all_lines[$w]);
            $out[$w].="$all_lines[$w] ";
        }
        close IN;
       }
       push(@array, \@out);
    }
    ###### File is given as input #######""""""""""""""""""""""""""""""""""""""
    if((@file >=1)&&(@array < 1)){
       for $file(@file){
         my ($line_num, $line_read, $line_counter, @all_lines);
         my $change=0.1;
         open(IN, "$file");
         @all_lines=<IN>;
         for($q=0; $q < @all_lines; $q++){ # This open is only for getting largest column width size
             @splited=split(/\s+/, $all_lines[$q]);
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # If you set column size 3 to parse and the file has only 2 columns, I remove the line
             #________________________________________________________________________________________
             if(@splited < $max_column_number){
                 splice(@all_lines, $q, 1);
                 next;
             }
             $l=${&get_longest_str_size(\@splited)};
             $line_read++;
             if($l>$len){ $len=$l; $change++ }
             if( ($line_read/$change) > 100 ){ last } # this is to check the column
         }                                          # consistency and stops after some
         close(IN);

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # Reverse LINE order if -r option is set
         #_____________________________________________
         if($reverse_line_order){    @all_lines=reverse(@all_lines);         }

         for($x=0; $x < @all_lines; $x++){
            if($all_lines[$x]=~/^[\t \#]*$/){ next }  # skipping blank line and comment lines
            if($all_lines[$x]=~/^ {1,2}#/){ next }  # skipping  comment lines
            $line_counter++;
            if(($char_opt=~/n/i)&&($line_counter==1)){ next } ## NO title #
            if(@M=$all_lines[$x]=~/(\S+)/g){
               if(@columns < 1){
                 for($n=0; $n< @M; $n++){  $columns[$n]=$n+1;    }
               }
               for($r=0; $r< @columns; $r++){ # columns is from num_opt which is given at prompt (like 3 2 1)
                  $col =$columns[$r]-1;  #
                  if( defined(${"max$col"}) and defined(${"min$col"}) ){ # when max or min is defined
                        if( ( ${"max$col"}  > $M[$col])&&
                       ( ${"min$col"}  < $M[$col]) ){
                             printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                             printf "%-${len}s \n";
                        }
                  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
                        if(  ${"max$col"} > $M[$col] ){
                             printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                             printf "%-${len}s \n";
                        }
                  }elsif( defined(${"min$col"}) ){
                        if(  ${"min$col"} < $M[$col] ){
                            printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                            printf "%-${len}s \n";
                        }
                  }else{
                        printf "%-${len}s \n", $M[$col] if($M[$col] =~/\S/);
                  }
                  $sum_of_colmuns{$col} +=$M[$col];
              }
           }
        } #for

      }
    }    ###### Array is given as input #######""""""""""""""""""""""""""""""""""
    elsif(@array>0){  # if input is ('x y xx y','k t yy zz',,,,)
      for($t=0; $t<@array; $t++){
         my @arr=@{$array[$t]};
         #print "\n Array input\n" if $debug ==1;
         for($i=0; $i<@arr;$i++){
           @splited=split(/\s+/,$arr[$i]);
           $l=${&get_longest_str_size(\@splited)};
           $len=$l if $l>$len;
        }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Reverse LINE order if r option is set
        #_____________________________________________
        if($reverse_line_order){
           @arr=reverse(@arr);
        }
        for($i=0; $i< @arr; $i++){
            if($arr[$i]=~/^\s*$/){ splice(@arr, $i, 1); $i--; next }
            if(($char_opt=~/n/i)&&($i==0)){ next } # skipping the first line
            if(@M=$arr[$i]=~/(\S+)/g){
               if(@columns < 1){
                  for($n=0; $n< @M; $n++){
                     $columns[$n]=$n+1;
                  }
               }
               for($r=0; $r< @columns; $r++){ # columns is from num_opt which is given at prompt (like 3 2 1)
                  $col =$columns[$r]-1;  #
                  if( defined(${"max$col"}) and defined(${"min$col"}) ){ # when max or min is defined
                        if( ( ${"max$col"}  > $M[$col])&&
                       ( ${"min$col"}  < $M[$col]) ){
                             printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                             printf "%-${len}s \n";
                        }
                  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
                        if(  ${"max$col"} > $M[$col] ){
                             printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                             printf "%-${len}s \n";
                        }
                  }elsif( defined(${"min$col"}) ){
                        if(  ${"min$col"} < $M[$col] ){
                            printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                            printf "%-${len}s \n";
                        }
                  }else{
                        printf "%-${len}s \n", $M[$col] if($M[$col] =~/\S/);
                  }
                  $sum_of_colmuns{$col} +=$M[$col];
               }
            }
        }
      }
    }  ##### Hash is given as input #######""""""""""""""""""""""""""""""""""
    elsif(@hash>0){
      my @arr;
      for($h=0; $h<@hash; $h++){
          my @array=values %{$hash[$h]};
          my @keys =keys %{$hash[$h]};
          for($i=0; $i< @array; $i++){ # getting the longest str size
             @arr=split(/\s+/,$array[$i]);
             $l=${&get_longest_str_size(\@arr)};
             $len=$l if $l>$len;
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Reverse LINE order if r option is set
          #_____________________________________________
          if($reverse_line_order){
             @array=reverse(@array);
          }
          for($i=0; $i< @array; $i++){
            if($array[$i]=~/^\s*$/){ splice(@array, $i, 1); $i--; next }
            if(($char_opt=~/n/i)&&($i==0)){ next } #  skipping the first line
            printf "%-10s", $keys[$i] if($char_opt=~/k/i); ## Option for key printing
             if(@M=$array[$i]=~/(\S+)/g){
               if(@columns < 1){
                  for($n=0; $n< @M; $n++){
                     $columns[$n]=$n+1;
                  }
               }
               for($r=0; $r< @columns; $r++){ # columns is from num_opt which is given at prompt (like 3 2 1)
                  $col =$columns[$r]-1;  #
                  if( defined(${"max$col"}) and defined(${"min$col"}) ){ # when max or min is defined
                        if( ( ${"max$col"}  > $M[$col])&&
                       ( ${"min$col"}  < $M[$col]) ){
                             printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                             printf "%-${len}s \n";
                        }
                  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
                        if(  ${"max$col"} > $M[$col] ){
                             printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                             printf "%-${len}s \n";
                        }
                  }elsif( defined(${"min$col"}) ){
                        if(  ${"min$col"} < $M[$col] ){
                            printf "%-${len}s \n",$M[$col] if($M[$col]=~/\S/);
                        }else{
                            printf "%-${len}s \n";
                        }
                  }else{
                        printf "%-${len}s \n", $M[$col] if($M[$col] =~/\S/);
                  }
                  $sum_of_colmuns{$col} +=$M[$col];
               }
            }
          }
      }
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Writing down the SUM line
    #___________________________________________________
    print "The sum of the columns\' numbers only is:\n\n";
    for($r=0; $r< @columns; $r++){
       $col =$columns[$r]-1;  #
       printf "%-${len}s ", $sum_of_colmuns{$col};
    }
    print "\n\n";
}



#____________________________________________________________________
# Title    : write_sdb_file
# Function : gets a hash ref. and writes the SDB file with 'sprintf'
# Usage    : @out=@{&write_sdb_file(\%seq)};
# Example  : @out=@{&write_sdb_file(\%seq, 'v')};  ## for STDOUT as well
#    ___________________________________________________________________________
#    Title      : EST_YEAST.sdb
#    Full Name  : Telomerase_yeast_699aa
#    Nicknames  :
#    EMBL       :
#    PDB        :
#    Swissprot  :
#
# Argument : \%ref_of_seq
# Keywords : write_sdb
# Returns  :
# Options  : v  for verbose representation. This will print boxes on STDOUT
#            n  for no '#' leader.
#            e  for Endline( '-----------------------------..' )
# Version  : 1.1
#---------------------------------------------------------------
sub write_sdb_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($Commont_Symbol, $delimiter, $Enclosed_came, $end_found, $end_line, $entry,
	  $length, $line,  $name, $name_found, $name_found, $num,
	  $original_dir, $output, $out_string, $pre, $pwd, $start_line, $string, $string1,
	  $temp, $title_found, $type_DSSP, @arg_output, @Final_out, @k, @keys, @names, @out, @out_hash,
	  @out_hash_final, @output_box, @outref, @read_files, @str1, @str2,  @string1,
	  %correct_head_box_entry, %Final_out, %hash, %input, %out_hash_final
	 );
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($Entry_length) =9 ;
	my($VL) =60; ## key length( like in  # Title )  ## value length
	$num    =80;
	if($char_opt =~ /n/i){
	  $Commont_Symbol=' '; ## Comment symbol. For help display, you can change into ' '
	}else{
	  $Commont_Symbol='';   #  Comment symbol. Default head_box display.
	}
	for($x=0; $x < @hash; $x++){
		my(%input) = %{$hash[$x]};  my(@keys)= sort (keys %input); my(@out);

		#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
		##  PUTTING an order in the printout entries. To make 'Title' come first
		#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
		for($i=0; $i < @keys; $i++){
		  if($keys[$i]=~/^Title/i){
			  $temp=$keys[0];	   $keys[0]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Enclosed?/i){
			  $temp=$keys[$#keys];	   $keys[$#keys]=$keys[$i];   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Usage$/i){
			  $temp=$keys[1];	   $keys[1]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Function/i){
			  $temp=$keys[2];	   $keys[2]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Example/i){
			  $temp=$keys[3];	   $keys[3]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Version/i){
			  $temp=$keys[$#keys-2];  $keys[$#keys-2]=$keys[$i];   $keys[$i]=$temp;
			  #### To make null version value to '1.0'
			  if($input{$keys[$#keys-2]}=~/^\s*$/){ $input{$keys[$#keys-2]}='1.0'; }
		  }elsif($keys[$i]=~/^Warning/i){
			  $temp2=$keys[$#keys-1]; $keys[$#keys-1]=$keys[$i];   $keys[$i]=$temp2;
		  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		##       Writing starting line
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my($start_line) = "$Commont_Symbol".'_'x"$num"."\n";
		if( $char_opt =~ /v/i){
			print $start_line;      } # Prints to STDOUT,

		my($Enclosed_came);  ## <<-- This should be HERE !
		$Entry_length=${&get_longest_str_size(\@keys)};
		for( $i =0; $i < @keys; $i++){  #### @keys has been sorted before.
		  my($Len) = length($input{$keys[$i]});
		  my $delimiter = ':';
		  my($entry) = $keys[$i];
		  $entry =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e; ## capitalizing word
		  if($entry=~/^Enclosed?$/i){ $Enclosed_came = 1; }
		  my(@input) = split(/\n+/, $input{$keys[$i]});
		  if(@input > 0){
			  for($j =0; $j < @input; $j++){
				 ## If NO entry name(blank) is given    ##
				 if($j > 0){  ## If the value is a multi line.
					  $entry = '';   $delimiter=' ';    }
				 if( $char_opt =~ /v/i){
					  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
					  ###  This is to reduce the entry length of Enclosed content lines   ##
					  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
					  if( ($Enclosed_came==1)&&($entry eq '') ){
						  $Entry_length=2; $VL=80; }
					  printf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry , $input[$j]);
				 }
				 if(($Enclosed_came==1)&&($entry eq '')){ $Entry_length=2; $VL=80; }
				 $out[$k++]=sprintf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry,$input[$j]);
				 if($entry=~/^Enclosed?/){ $Enclosed_came = 1; }   }}
		  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		  ##   If the entries have null descriptions, just print entries  ######
		  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		  elsif(@input ==0){
				 if( $char_opt =~ /v/i){
					 printf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry, ' ');   }
				 $out[$k++]=sprintf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry, ' ');
		  }
		}
		############################################################
		##       Writing  Ending  line                            ##
		############################################################
		$end_line = "$Commont_Symbol".'-'x"$num"."\n";
		if( ($char_opt =~ /v/i)&&($char_opt =~ /e/i) ){  print $end_line;  }
		if( $char_opt =~ /e/i){  push(@out, $end_line)   }
		unshift(@out, $start_line);
		push(@Final_out, \@out);
	}
	if(@Final_out > 1){ @Final_out; }
	elsif( @Final_out==1){ $Final_out[0] }
} #<--- END of write_sdb_file
#________________________________________________________________________
# Title     : push_if_not_already
# Usage     : @out=@{&push_if_not_already(\@mother_array, \@adding_array )};
#             @out=@{&push_if_not_already(\@mother_array, $adding_scalar)};
# Function  : returns ref. of an array for a list of non-repetitive entry.
# Example   :
# Warning   :
# Keywords  : add_if_not_already, add_element_if_not_already, if_not_already
#             add_element_if_not_already, push_element_if_not_already,
#             if_no_already_push, put_element_if_not_already, add_new_element
#             add_new_items_only, push_new_items_only, push_new_elements_only
#             put_if_not_already,
# Options   :
# Returns   : a ref. of an array.
# Argument  : two references. The first should be an array ref. The 2nd can be either
#             scalar or array reference.
# Category  :
# Version   : 1.3
#--------------------------------------------------------------------
sub push_if_not_already{
	my($already_in, $already, $i, @push_items_given);
	my(@out_array)=@{$_[0]};
	if(ref($_[0]) ne 'ARRAY'){ print "\n push_if_not_array need ref\n"; die; }
	push(@push_items_given, ${$_[1]}) if(ref($_[1]) eq 'SCALAR');
	@push_items_given=@{$_[1]} if(ref($_[1]) eq 'ARRAY');
	for $already (@out_array){  ## This for is to remove repetitive
	  for ($i=0; $i< @push_items_given; $i++){
		 if($already eq $push_items_given[$i]){ splice(@push_items_given,$i, 1); $i--; }
	  }
	}
	push(@out_array, @push_items_given);
	return(\@out_array);
}

#______________________________________________________________________________
# Title     : compare_sec_str_predictions
# Usage     : &compare_sec_str_predictions(\@files, \$use_universal_seq_hash_format);
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub compare_sec_str_predictions{
    my(@files, $use_universal_seq_hash_format,
       $i, $seq_name1, $seq_name2, %seq1, %seq2, %predator_sec1 ,
       %predator_sec2, @final_hashes, %DSSP, %STRIDE, $in, %PHD_sec,%Aln);
    @files=@{$_[0]};
    $use_universal_seq_hash_format=${$_[1]};

    foreach $in (@files){
        if(!%predator_sec1 and $in=~/\S\.pr[edl]+$/){
            print "\n# (i) I found the first predator file \"$in\"";
            if($in=~/\S\_rv\./){
               print "\n# (i) _rv. string is detected. I am reversing the seq!! \"$in\"";
               $put_reverse_seq_option='r';
            }
            %predator_sec1  = %{&open_predator_files(\$in, $put_reverse_seq_option,
                                                     $use_universal_seq_hash_format)};
            push(@final_hashes, \%predator_sec1);
        }elsif(%predator_sec1 and $in=~/\S\.pr[edl]+$/){
            print "\n# (i) I found the second predator file \"$in\"";
            if($in=~/\S\_rv\./){
               $put_reverse_seq_option='r' ;
               print "\n# (i) _rv. string is detected. I am reversing the seq!! \"$in\"";
            }
            %predator_sec2  = %{&open_predator_files(\$in, $put_reverse_seq_option,
                                                      $use_universal_seq_hash_format)};
            push(@final_hashes, \%predator_sec2);
        }elsif($in=~/\.phd/){
            print "\n# (i) I found a PHD sec str pred. file \"$in\"";
            %PHD_sec  = %{&open_phd_files(\$in, $use_universal_seq_hash_format)};
            push(@final_hashes, {%PHD_sec});
        }elsif($in=~/\.dssp/){
            print "\n# (i) I found a DSSP file \"$in\"";
            %DSSP = %{&open_dssp_files(\$in, $use_universal_seq_hash_format)};
            push(@final_hashes, \%DSSP);
        }elsif($in=~/\.stride/){
            print "\n# (i) I found a Stride file \"$in\"";
            %STRIDE = %{&open_stride_files(\$in, $use_universal_seq_hash_format)};
            push(@final_hashes, \%STRIDE);
        }elsif($in=~/\S\.msf/){
            print "\n# (i) I found a MSF file \n";
            %Aln = %{&open_msf_files($in, $use_universal_seq_hash_format)};
        }else{
            print "\n# (E) Is $in  a sec. struc. prediction file??\n";
        }
     }
     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # The final data set is  @final_hashes, %Aln, %DSSP
     #_____________________________________________________
     for($i=0; $i<@final_hashes; $i+=2){
        %seq1=%{$final_hashes[$i]};
        %seq2=%{$final_hashes[$i+1]};
        #&show_hash(\%seq1);
        #&show_hash(\%seq2);
        $seq_name1=$seq1{name};
        $seq_name2=$seq2{name};
        $sequence1=$seq1{sequence};
        $sequence2=$seq2{sequence};
        $sec_str1=$seq1{sec_str_string};
        $sec_str2=$seq2{sec_str_string};
        $sec_str1=~tr/[a-z]IiGgBbTt/[A-Z]CcCcCcCc/;
        $sec_str2=~tr/[a-z]IiGgBbTt/[A-Z]CcCcCcCc/;
        if(length($sec_str1) != length($sec_str2)){  print "\n# (E) The lengths are different!!! error??\n"; }
        print "\n# 1 $seq_name1  $seq_name2 \n";
        print "\n$sequence1\n$sequence2\n";
        print "\n$sec_str1\n$sec_str2\n";
        @sec1=split(//, $sec_str1);
        @sec2=split(//, $sec_str2);
        for($j=0; $j< @sec1; $j++){
           if($sec1[$j] eq $sec2[$j]){
              print '*';
              $hits++;
           }else{
              print ' ';
              $errors++;
           }
        }
        print "\n\n Rate of identity : ", $hits/length($sec_str2), "\n\n";
     }
}


#______________________________________________________________________________
# Title     : compare_PDBD_structural_domain_hashes
# Usage     :
# Function  :
# Example   :
# Keywords  : compare_PDBD_scop_domain_hashes
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub compare_PDBD_structural_domain_hashes{
    my($i, @pdbd1_domains, @pdbd2_domains, %PDBD1_hash, %PDBD2_hash,
       %DIFF_classification_domains, $pdbd2_domain, $pdbd2_scop_class,
       $pdbd1_scop_class);
    %PDBD1_hash=%{$_[0]};
    %PDBD2_hash=%{$_[1]};
    @pdbd1_domains=keys %PDBD1_hash;
    @pdbd2_domains=keys %PDBD2_hash;
    for($i=0; $i< @pdbd1_domains; $i++){
       $pdbd1_domain=$pdbd1_domains[$i];
       $pdbd1_scop_class=$PDBD1_hash{$pdbd1_domain};
       $pdbd2_scop_class=$PDBD2_hash{$pdbd1_domain};
       if($pdbd2_scop_class){
          if($pdbd1_scop_class eq $pdbd2_scop_class){
             #Good! the domain exists in both hashes with same classification
          }else{
             $DIFF_classification_domains{'DIF_CLASSIFICATION'}{$pdbd1_domain}="$pdbd1_scop_class : $pdbd2_scop_class";
          }
       }else{
          $DIFF_classification_domains{'MISSING_IN_PDBD2'}{$pdbd1_domain}=$pdbd1_scop_class;
       }
    }
    for($i=0; $i< @pdbd2_domains; $i++){
       $pdbd2_domain=$pdbd2_domains[$i];
       $pdbd2_scop_class=$PDBD2_hash{$pdbd2_domain};
       $pdbd1_scop_class=$PDBD1_hash{$pdbd2_domain};
       if($pdbd1_scop_class){
       }else{
          $DIFF_classification_domains{'MISSING_IN_PDBD1'}{$pdbd2_domain}=$pdbd2_scop_class;
       }
    }
    return(\%DIFF_classification_domains);
}

#___________________________________________________________________
# Title     : compare_proline_residue_sec_str_prediction
# Usage     : compare_proline_residue_sec_str_prediction stx_fb.prdl stx.prdl [stx.stride, stx.dssp]
# Function  :
# Example   : assess_sec_str_pred.pl bpip.pre bpip.phd $DSSP/1bll.dssp 1bllE_bpip.msf
#
#           H        Alpha helix
#           G        3-10 helix
#           I        PI-helix
#           E        Extended conformation
#           B or b   Isolated bridge
#           T        Turn
#           C        Coil (none of the above)
#       ONE example run result>>
#             $total_proline_count 13746, $total_identical 12247, $total_non_identical 1499
#             $total_num_correct_identical 9657, $total_num_wrong_identical 2590
#
# Class     : Perl::Utility::Arg_handling
# Keywords  :
# Options   : #  for debugging.
#             _  for debugging.
#    $use_universal_seq_hash_format=u by u -u
#
# Reference : http://cyrah.med.harvard.edu/perl_for_bio.html
# Returns   : Ref of
# Version   : 1.4
#---------------------------------------------------------------
sub compare_proline_residue_prediction{
    my (%SEC_STR_ASSIGN, %str_prediction1, %str_prediction2,
        $num_PROLINE, $num_identical, $num_non_identical,
        @success_rates, $num_correct_identical,
        $num_wrong_identical, @sec_structure, $num_of_residues  );

    %SEC_STR_ASSIGN =%{$_[0]};
    %str_prediction1=%{$_[1]};
    %str_prediction2=%{$_[2]};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # $matched_prediction_count is matched pred. res. between forw and backward seq
    #________________________________________________________________________________
    $matched_prediction_count   =$str_prediction2{'matched_prediction_count'};
    $num_of_diff_pred_for_both_methods=$str_prediction2{'mismatched_prediction_count'};

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # %mismatch_stats has 'H_E_mismatch', 'H_C_mismatch' etc keys
    #______________________________________________________________
    %mismatch_stats=%{$str_prediction2{'mismatch_stats'}};

    $num_of_residues =@sec_structure= @{$SEC_STR_ASSIGN{'sec_str_array'}};
    $num_of_residues1=@prediction1=   @{$str_prediction1{'sec_str_array'}};
    $num_of_residues2=@prediction2=   @{$str_prediction2{'sec_str_array'}};
    @residues1 = @{$str_prediction1{'residue_array'}};
    @residues2 = @{$str_prediction2{'residue_array'}};
    for($i=0; $i< @residues1; $i++){
       if($residues1[$i] =~/P/i and $residues2[$i]=~/P/i){
          $num_PROLINE++;
          if($prediction1[$i] ne $prediction2[$i]){
             $num_non_identical++;
          }elsif($prediction1[$i] eq $prediction2[$i]){
             $num_identical++;
             $sec_structure[$i]=~tr/IiGgBbTt/CcCcCcCc/;
             if($sec_structure[$i]=~/c/){
                $coil_predicted_PRO++;
             }
             if($prediction1[$i] =~/$sec_structure[$i]/i){
                $num_correct_identical++;
             }else{
                print "$residues1[$i] $prediction1[$i] $prediction2[$i] $sec_structure[$i] ***\n";
                $num_wrong_identical++;
             }
          }
       }
    }
    print "\$num_PROLINE $num_PROLINE, \$num_identical $num_identical, \$num_non_identical $num_non_identical\n";
    return([$num_PROLINE, $num_identical, $num_non_identical,
            $num_correct_identical, $num_wrong_identical]);
}



#______________________________________________________________________________
# Title     : compare_MIPS_interaction_with_SISF
# Usage     : &compare_MIPS_interaction_with_SISF($ARGV[0], $ARGV[1]);
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Returns   :
# Version   : 1.0
#------------------------------------------------------------------------------
sub compare_MIPS_interaction_with_SISF{
    my($i, $Strintergene, $Sinteraction_pair, $all_mips_inter, $all_sisf_inter,
       @interact_pairs_MIPS, @interact_pairs_SISF, %MIPS, %SISF,
       @common, $common);
    if(@_ < 1){ die "\nUsage: $0 strintergene_pairs.txt Sinteraction_pair_seqID_0.4_0.001.txt \n"; }
    $Strintergene=$_[0];
    $Sinteraction_pair=$_[1];

    open(MIPS, "<$Strintergene") or die;
    while(<MIPS>){
      if(/(\S+)\s+(\S+)/){
         $sorted_pair=join(' ', sort($1, $2));  $MIPS{$sorted_pair}=$sorted_pair;
      }
    }
    close(MIPS);

    open(SISF, "<$Sinteraction_pair") || die;
    while(<SISF>){
      if(/^(\S+)\|\s+\S[\S ]+\s+\|\s+(\S+)\|\s+\S+/){
         $sorted_pair=join(' ', sort($1, $2));
         $SISF{$sorted_pair}=$sorted_pair;
      }elsif(/^(\S+)\s+(\S+)/){
         $sorted_pair=join(' ', sort($1, $2));
         $SISF{$sorted_pair}=$sorted_pair;
      }
    }
    $all_mips_inter=@interact_pairs_MIPS=sort keys %MIPS;
    $all_sisf_inter=@interact_pairs_SISF=sort keys %SISF;
    $sisf_only=@SISF_only=@{&subtract_array(\@interact_pairs_SISF, \@interact_pairs_MIPS)};
    $mips_only=@MIPS_only=@{&subtract_array(\@interact_pairs_MIPS, \@interact_pairs_SISF)};
    $common   =@common   =@{&get_common_array_entry(\@interact_pairs_MIPS, \@interact_pairs_SISF)};

    open(MIPS_only, ">MIPS_only.txt");
    for($i=0; $i< @MIPS_only; $i++){    print  MIPS_only"\n$MIPS_only[$i]";   }

    open(SISF_only, ">SISF_only.txt");
    for($i=0; $i< @SISF_only; $i++){    print SISF_only "\n$SISF_only[$i]";   }

    for(@common){
      if(/(\S+)\s+(\S+)/){
         if($1 eq $2){    $homopartner++;
         }else{           $heteropartner++;
         }
      }
    }
    print "\n\nCommon: $common,  \$mips_only: $mips_only (Total: $all_mips_inter), \$sisf_only: $sisf_only (Total: $all_sisf_inter) \n";
    print "\n \"MIPS_only.txt\" and \"SISF_only.txt\" are made \n";
    print "\n \$homopartner $homopartner, \$heteropartner $heteropartner\n";
    return(\@common);
}





#______________________________________________________________________________
# Title     : compare_diff_version_of_PDBD_fasta_files
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 1.1
#------------------------------------------------------------------------------
sub compare_diff_version_of_PDBD_fasta_files{
    my($pdbd1, $pdbd2, $pdbd_id, $scop_classification, %PDBD1_hash, %PDBD2_hash,
       %DIFF_classification_domains);
    $pdbd1=${$_[0]} || $_[0];
    $pdbd2=${$_[1]} || $_[1];

    open(PDBD1, "<$pdbd1") || die "can not open $pdbd1 \n";
    open(PDBD2, "<$pdbd2") || die "can not open $pdbd2 \n";

    while(<PDBD1>){
      if(/\>(\S+)\s+(\S+)/){
         $pdbd_id=$1;
         $scop_classification=$2;
         $PDBD1_hash{$pdbd_id}=$scop_classification;
      }
    }
    close(PDBD1);
    while(<PDBD2>){
      if(/\>(\S+)\s+(\S+)/){
         $pdbd_id=$1;
         $scop_classification=$2;
         $PDBD2_hash{$pdbd_id}=$scop_classification;
      }
    }
    close(PDBD2);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
    # %DIFF_classification_domains{'DIF_CLASSIFICATION'}{$pdbd1_domain}
    # $DIFF_classification_domains{'MISSING_IN_PDBD1'}{$pdbd2_domain}
    # $DIFF_classification_domains{'MISSING_IN_PDBD2'}{$pdbd1_domain}
    #______________________________________________________________________
    %DIFF_classification_domains=%{&compare_PDBD_structural_domain_hashes(\%PDBD1_hash, \%PDBD2_hash)};
    &write_SDDIF_file(\%DIFF_classification_domains, \$pdbd1, \$pdbd2);
}




#________________________________________________________________________________
# Title     : compare_parf_files.pl
# Usage     :
# Function  :
# Example   :
#    PARF file looks like this>
#   d1nsca_   d3nn9__   Homolog -664.92 2.43.1.1.3  2.43.1.1.2
#   d1dppa_   d2olba_   Homolog -617.41 3.68.1.1.6  3.68.1.1.1
#   d2ach.1a1 d9api.1a1 Homolog -556.38 5.2.1.1.3   5.2.1.1.4
#
# Keywords  :
# Options   :
# Author    :
# Version   : 1.0
#--------------------------------------------------------------------------------
sub compare_parf_files{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
     \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
     \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
     my (@parf_file,  %all_parf_file_hash);
     my $number_of_lines_to_read   = 5000;
     my $number_of_Homologs_to_read= 700; # 699 is for 1% error in 935 PDB40D
     my $number_of_Nomologs_to_read= 9; # 9 is for 1% error in 935 PDB40D

     if($vars{'l'}=~/\S+/){ $number_of_lines_to_read=$vars{'l'} }
     for($i=0; $i< @file; $i++){
         my ($counter);
         if($file[$i]=~/\.parf/){
             push(@parf_file, $file[$i]);
         }else{
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # Check if it is parf file inside the file
             #__________________________________________________________
             open(INPUT_FILE, "<$file[$i]");
             while(<INPUT_FILE>){
                  $counter++;
                  if(/^\s*\S+\s+\S+\s+[NH]omolog\s+\S+\s+\S+\s+\S+/){
                      push(@parf_file, $file[$i]);
                      last;
                  }else{
                      if($counter > 100){  ## giving up, it is not PARF file!
                          print "\n# $0 needs to have PARF files, others are ignored";
                          last;
                      }else{
                          next;
                      }
                  }
             }
             close(INPUT_FILE);
         }
     }
     for($i=0; $i< @parf_file; $i++){
         my($nomolog_counter, $homolog_counter, $sorted_pair, @sorted_seq_name_pairs);
         open(PARF_FILE, "<$parf_file[$i]");
         while(<PARF_FILE>){
              if(/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/){
                   $sorted_pair=join(' ', sort($1, $2));
                   $homology_info=$3;
                   $score=$4;
                   push(@sorted_seq_name_pairs, $sorted_pair);
                   if($3 =~/Nomolog/i){
                       $nomolog_counter++;
                   }elsif($3 =~/Homolog/i){
                       $homolog_counter++;
                   }
                   $pair_hash{$sorted_pair}="$homology_info   $score   Homol rank: $homolog_counter    Nomol rank: $nomolog_counter";
              }
              $number_of_lines_read=$nomolog_counter+$homolog_counter;
              if($number_of_lines_read == $number_of_lines_to_read){
                   last;
              }
              if($nomolog_counter == $number_of_Nomologs_to_read){
                   last;
              }
              if($homolog_counter == $number_of_Homologs_to_read){
                   last;
              }
         }
         close(PARF_FILE);
         $all_parf_file_hash{$parf_file[$i]}=[@sorted_seq_name_pairs];
     }
     @parf_file_keys = keys %all_parf_file_hash;

     for($i=0; $i< @parf_file_keys; $i++){
         @seq_pair_array=@{ $all_parf_file_hash{$parf_file_keys[$i]} };
         push(@redundant_all, @seq_pair_array);
     }

     @all=@{&remove_dup_in_array(\@redundant_all)};
     $num_of_all_seq_pairs=@all;

     for($i=0; $i< @parf_file_keys; $i++){
         $first_set_name=$parf_file_keys[$i];
         @first_seq_pair_array=@{ $all_parf_file_hash{$first_set_name} };
         for($j=0; $j< @parf_file_keys; $j++){
              if($i == $j){ next }
              $second_set_name=$parf_file_keys[$j];
              @second_seq_pair_array=@{ $all_parf_file_hash{$second_set_name} };
              $first_minus_second="$first_set_name\_$second_set_name";
              @first_minus_second=@{&subtract_array(\@first_seq_pair_array, \@second_seq_pair_array)};
              @second_minus_first=@{&subtract_array(\@second_seq_pair_array, \@first_seq_pair_array)};
              @first_and_second=@{remove_dup_in_array([@first_seq_pair_array, @second_seq_pair_array])};
         }
     }
     for(@first_minus_second){
         print $_, " $pair_hash{$_} \n";
     }
     print "-----------------------------------------------------------\n";
     for(@second_minus_first){
         print $_, " $pair_hash{$_} \n";
     }
}





#______________________________________________________________________________
# Title     : compare_MSP_hash_data
# Usage     :
# Function  : Compares the matched sequences of 2 HASH MSP files input
#             There can be 2 different types of HASH format.
#
# Example   : The input format is like (1) <-- From open_mspa_files sub
#                'file_name_of_msp' => 'A long line of MSP with \n for each match'
#
#             The input format is like (2) <-- From convert_bla_to_msp sub
#                'Seq_name_match_name' => 'ONE line of MSP with for each match'
#                'Seq_name_match_name' => 'ONE line of MSP with for each match'
#                'Seq_name_match_name' => 'ONE line of MSP with for each match'
#                'Seq_name_match_name' => 'ONE line of MSP with for each match'
#
# Keywords  :
# Options   :
#     $Percent_similarity= by Percent_similarity=
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub compare_MSP_hash_data{
    my(%hash1, %hash2, @subtracted_diff_elements, @hash1, @hash2,
       @each_lines1, @each_lines2, @matches1, @matches2, @subtracted_diff_elements,
       $Percent_similarity);
    $Percent_similarity=80;
    @hash1=%{$_[0]};
    @hash2=%{$_[1]};
    if($_[2]=~/Percent_similarity=(\S+)/){  $Percent_similarity=$1;   }

    if(@hash1 == 2 and $hash1[1]=~/\n/){  # IF the input is MSP hash format (1)
           @each_lines1=split(/\n+/, $hash1[1]);
    }elsif(@hash1 > 2){
           @each_lines1=values %{$_[0]};
    }else{
           @each_lines1=split(/\n+/, $hash1[1]);
    }

    if(@hash2 == 2 and $hash2[1]=~/\n/){  # IF the input is MSP hash format (1)
           @each_lines2=split(/\n+/, $hash2[1]);
    }elsif(@hash2 > 2){                     # IF the input is MSP hash format (2)
           @each_lines2=values %{$_[1]};
    }else{
           @each_lines2=split(/\n+/, $hash2[1]);
    }

    @matches1= map {(/(\S+)/g)[8] } @each_lines1;
    @matches2= map {(/(\S+)/g)[8] } @each_lines2;

    if(@matches2 > @matches1){
        @subtracted_diff_elements=@{&subtract_similar_seq_elements(\@matches2, \@matches1,
                                      "Percent_similarity=")}; ## @matches2 is supposedly bigger
    }elsif(@matches1 >= @matches2){
        @subtracted_diff_elements=@{&subtract_similar_seq_elements(\@matches1, \@matches2,
                                      "Percent_similarity=80")}; ## @matches2 is supposedly bigger
    }
    return(\@subtracted_diff_elements);
}

#________________________________________________________________________
# Title     : subtract_similar_seq_elements
# Usage     : @subs = @{&subtract_similar_seq_elements(\@match_seqs1, \@match_seqs2, "Percent_similarity=80")};
# Function  : removes any occurances of certain elem. of the first
#             input array with second input array.
# Example   :
#
#     @match_seqs1=('xxxx_1-30', 'YYYYY_30-44', 'ZZZZ_1-4000');
#     @match_seqs2=('xxxx_4-32', 'YYYYY_25-41', 'ZZZZ_2000-3000');
#
# Keywords  : remove_similar_seq_elements, subtract_similar_sequence_elements
#              remove_similar_sequence_elements
# Options   :
# Returns   :
# Argument  :
#     $Percent_similarity= by Percent_similarity=
#
# Authors   : jong@biosophy.org
# Version   : 1.1
#--------------------------------------------------------------------
sub subtract_similar_seq_elements{
		my($i, $j, @seq_match_array1, @seq_match_array2, $Percent_similarity, %counter,
		   @diff_elements, $seq1, $start1, $end1, $tail1,
		   $seq2, $start2, $end2, $tail2, $diff_start, $diff_end,
		   $leng1, $leng2, $smaller_leng, %truly_diff_elements);
		$Percent_similarity=80; # 80% similarity is accepted
		$leng_thresh=10;

		@seq_match_array1=@{$_[0]}; ## this is the larger Array (supposedly)
		@seq_match_array2=@{$_[1]};
		if($_[2]=~/Percent_similarity=(\S+)/){
				$Percent_similarity=$1;
		}

		grep($counter{$_}++, @seq_match_array2 );
		grep($counter_seq_name_only{$_}++, map {/(\S+)_\d+\-\d+/ && $1} @seq_match_array2 );
		@diff_elements= grep(!$counter{$_}, @seq_match_array1);

		for($i=0; $i< @diff_elements; $i++){  ## @diff_elements is from  @seq_match_array1
			 if($diff_elements[$i]=~/^(\S+)_(\d+)\-(\d+)(\S*)/){
					 ($seq1, $start1, $end1, $tail1)=($1, $2, $3, $4);
					 for($j=0; $j< @seq_match_array2; $j++){
							if($seq_match_array2[$j]=~/^(\S+)_(\d+)\-(\d+)(\S*)/){
									($seq2, $start2, $end2, $tail2)=($1, $2, $3, $4);
									#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
									# Check if the seqs are identicl (from the two arrays), no point to merge which are not identical from the first
									#__________________________________________________________________________________________
									if($seq1 eq $seq2){
											 $diff_start=abs($start1-$start2);
											 $diff_end  =abs($end1  -$end2  );
											 $leng1=$end1-$start1;
											 $leng2=$end2-$start2;
											 if($leng1 >= $leng2){
													 $smaller_leng=$leng2;
													 $larger_leng =$leng1
											 }else{
													 $smaller_leng=$leng1;
													 $larger_leng =$leng2
											 }
											 $average_leng_of_2=($smaller_leng+$larger_leng)/2;
											 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
											 # Checking the minimal seq region leng here
											 #______________________________________________________
											 if($smaller_leng < $leng_thresh){ next }
											 $overlapping_seq_match_size=${&get_overlapping_seq_match_size($start1, $end1, $start2, $end2)};
											 $averge_seq_leng_of_2_seqs=($leng1+$leng2)/2;

											 $finally_adjusted_seq_leng = $average_leng_of_2*($Percent_similarity/100);

											 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
											 # Now let's check if we regard them homologous or not\
											 #_______________________________________________________
											 if( $overlapping_seq_match_size >=  $finally_adjusted_seq_leng){
													 #print "\n \$overlapping_seq_match_size :$overlapping_seq_match_size, \$finally_adjusted_seq_leng $finally_adjusted_seq_leng\n";
													 $truly_diff_elements{$diff_elements[$i]}=$diff_elements[$i];
											 }
									 }else{
											 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									     # this is a case of totally NEW sequence in the First Larger Array given
									     #______________________________________________________________________________________
									     unless($counter_seq_name_only{$seq1}){
													 $truly_diff_elements{$diff_elements[$i]}=$diff_elements[$i];
									     }
									 }
							 }
					  }
				}
		} # END of for($i=0; $i< @diff_elements; $i++){
		return([keys %truly_diff_elements]);
}




#___________________________________________________________________
# Title     : get_peptide_occurance
# Usage     :
# Function  : gets the number of occurances of peptide(with given size) for
#             any number of sequences given.
# Example   : %stat=%{&get_peptide_occurance(\%pro_sequence, $size)};
#              while %pro_sequence has one or more sequences like
#              seq1 AAAAAAAAAAAA, seq2 BBBBBBBBBBBBBB, ...
#              $size is number. For dipeptide=2, tripeptide=3, tetrapep=4...
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  : eg=> (\%ref_hash, 4)
# Category  :
# Version   : 1.2
#---------------------------------------------------------------
sub get_peptide_occurance{
	 my($k, $i, $s, $peptide, $pep_entry_num, @name, %stat);
	 %all=%{$_[0]};
	 $size=$_[1];
	 @name=keys %all;
	 for($k=0; $k<@name; $k++){
		if($all{$name[$k]}=~/[BZX]/i){ next;
		}else{
			$seq_leng += length($all{$name[$k]});
	      my @seq=split(//, $all{$name[$k]});
			my $all_occur_pep;
	      for($i=0; $i< (@seq-($size-1)); $i ++){
	        my $peptide;
	        for($s=0; $s < $size; $s++){
		       $peptide .= $seq[$i+$s];
	        }
	        $stat{$peptide}++;
			  $all_occur_pep ++;
	        print "\n$peptide  $stat{$peptide}" if $debug==1;
	        $pep_entry_num=keys %stat;
			  if( ($debug==1)&&($pep_entry_num%100 == 0 ) ){
				  print "\n Present peptide entries are:  $pep_entry_num  out of $all_occur_pep residues \n";
			  }
	      }
		 }
	 }
	 return(\%stat);
}


#___________________________________________________________________
# Title     : open_lottery_file
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#---------------------------------------------------------------
sub open_lottery_file{
	my ($i, @lines, @bin1, @bin2, @bin3, @bin4, @bin5, @bin6, @bonus, @allbins);
	open(F, "$_[0]");
	if($_[1] =~/\-a/i){ $single_array_return=1 };
	@lines = <F>;
	for($i=0; $i< @lines; $i++){
		if($lines[$i]=~/ {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,2}(\d\d)/){
	push(@bin1, $1);
	#print $1, "\n";
	push(@bin2, $2);
	push(@bin3, $3);
	push(@bin4, $4);
	push(@bin5, $5);
	push(@bin6, $6);
		  push(@bonus, $7);
		}
	}
	if($single_array_return == 1){ @allbins=( @bin1, @bin2, @bin3, @bin4, @bin5, @bin6, @bonus); }
	else{  return(\@bin1, \@bin2, \@bin3, \@bin4, \@bin5, \@bin6, \@bonus); }
}


#_________________________________________________________________
# Title     : get_probable_half
# Usage     :
# Function  : This produces a hash ref. which is supposed to be most probable
#             according to the given array. It divides array into halves
#             gets the more probable half until it gets one single number.
# Example   :
# Warning   :
# Keywords  : get_frequent_halves,
# Options   :
# Returns   :
# Argument  : \@array
# Category  :
# Version   : 1.0
#-----------------------------------------------------------------
sub get_probable_half{
    my (%hash, $i, @keys, @values);
    %hash=%{$_[0]};
    @keys=  sort {$a<=$b} keys   %hash;
    @values= values %hash;
    print "\n Hash is ",%hash,"\n";
    if(@keys == 1){
             return(\%hash); last;
    }elsif(@keys >1){
            if((@keys % 2) != 0){ # make the number even
                     $keys[@keys]=$keys[$#keys];
            }
            @first_half=@keys[0..(@keys/2-1)];
            @second_half=@keys[(@keys/2)..$#keys];
            print "@first_half", "\n";
            print "@second_half", "\n";
            my($sum1, $sum2, %hash1, %hash2);
            for($i=0; $i<(@keys/2); $i++){
                     $sum1 +=$hash{$first_half[$i]};
                     $sum2 +=$hash{$second_half[$i]};
                     $hash1{$first_half[$i]} =$hash{$first_half[$i]};
                     $hash2{$second_half[$i]}=$hash{$second_half[$i]};
            }
            if($sum1 > $sum2){ &get_probable_half(\%hash1);}
            else{  &get_probable_half(\%hash2);}
    }
}

#_______________________________________________________________
# Title     : divide_array
# Usage     : &show_array(&divide_array(\@input, 6));
# Function  : divides any array to the denominator given.
#             If you give array of 100 elem, with 5, you will
#             get 5 arrays with 20 elem each.
# Example   :
# Warning   :
# Keywords  : split_array_into_pieces, split_array, chop_array,
#             fragment_array,
# Options   : s=  for dividing the array with sub array size
#                 eg) to get 20 elem length sub arrays from
#                     a big array
#                     @ar_ref=@{&divide_array(\@array, 's=20')};
# Returns   :
# Argument  :
# Category  :
# Version   : 1.4
#-----------------------------------------------------------
sub divide_array{
	 my ($size,$remaining, $size_div, $s);
	 my @array = @{$_[0]};
	 my  @final_array_ref=();
	 if(ref($_[1])){
		if(${$_[1]}=~/^\d+$/){
		   $denominator = ${$_[1]};
		}elsif(${$_[1]}=~/s=(\d+)$/i){
	       $size=$1;
		   $size_div=1;
	    }
	 }elsif($_[1]=~/^\d+$/){
	    $denominator = $_[1];
	 }elsif($_[1]=~/s=(\d+)$/i){
		$size=$1;
		$size_div=1;
	 }

	if((@_ ==1)&&($denominator == 0)){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}

	if($size_div==1){
	   while(@array){
		  push(@final_array_ref, [splice(@array, 0, $size)]);
	   }
	}else{
	   my $frag_ar_size = int(@array/$denominator);
	   if($debug eq 1){ print "\n Frag arr size is :  $frag_ar_size \n" }
	   $remaining = @array % $denominator;
	   if($debug eq 1){ print "\n Remnant elem size is : $remaining \n" }
	   for($i=0; $i < $denominator; $i++){
		  if($remaining > 0){
			  push(@final_array_ref, [splice(@array, 0, ($frag_ar_size+1),)] );
		      $remaining --;
		  }elsif(($remaining == 0)&&(@array>0)){
			  push(@final_array_ref, [splice(@array, 0, ($frag_ar_size),)] );
		  }
	   }
	}
	return(\@final_array_ref);
}


#__________________________________________________________________________
# Title     : split_fasta_files
# Usage     : @names_of_single_files=@{&split_fasta_files(\@files)};
# Function  :
# Example   :
# Keywords  : divide_fasta_files, split_fasta_db_files, divide_fasta_db_files
#             make_single_fasta_files, write_single_fasta, write_single_fasta_files
# Options   :
#   $return_seq_file_name=f by f  # to return file names than seq names array
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub split_fasta_files{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my (@each_single_seq_names, $seq_name, $single_seq_fasta_exists,
				$return_seq_file_name, $out_single_fasta_file_name );
		if($char_opt=~/f/){ $return_seq_file_name='f'; }

		for($i=0; $i< @file; $i++){
				open(FASTA, "$file[$i]");
				while(<FASTA>){
					 if(/\>\s*(\S+)/){
							 $seq_name=$1;
							 $out_single_fasta_file_name="$1\.spfa";
							 unless(-s $out_single_fasta_file_name){
									open(OUT_SINGLE, ">$out_single_fasta_file_name");
									print OUT_SINGLE $_;
							 }else{
									print "\n# (i) split_fasta_files: $out_single_fasta_file_name exists, skipping";
							 }
							 if($return_seq_file_name){
									 push(@each_single_seq_names, $out_single_fasta_file_name);
							 }else{
									 push(@each_single_seq_names, $seq_name);
							 }
					 }elsif(/\w+/){
							 unless(-s $out_single_fasta_file_name){
									print OUT_SINGLE $_;
							 }
					 }

				}
				close OUT_SINGLE;
		}
		return(\@each_single_seq_names);
}




#______________________________________________________________________________
# Title     : split_files
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
#  $division_factor= by d=
# Author    : jong@biosophy.org,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub split_files{
		 #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		 my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		 my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		 my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		 my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		 my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		 if($debug==1){print "\n\t\@hash=\"@hash\"
		 \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		 \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

		 my($division_factor, $base_name);
		 $division_factor=2;
		 if($vars{'d'}=~/(\d+)/){ $division_factor=$1 };

		 for($i=0; $i< @file; $i++){
				 open(FILE, $file[$i]);
				 $base_name=${&get_base_names($file[$i])};
				 $extension=${&get_extension_names($file[$i])};
				 $total_line_num=@lines=<FILE>;
				 $splited_frag_size=int(@lines/$division_factor);
				 for($j=0; $j< $division_factor; $j++){
						 #$frag_file="$base_name\_s${j}\.split${j}";
						 $frag_file="$base_name\_s${j}\.$extension";
						 push(@splited_files, $frag_file);
						 open(FRAGMENT_FILE, ">$frag_file");
						 for($k=0; $k<= $splited_frag_size; $k++){
								 print FRAGMENT_FILE $lines[$k];
						 }
						 splice(@lines, 0, $splited_frag_size);

				 }
				 print FRAGMENT_FILE @lines; # Writing the very last remaining lines
				 close (FRAGMENT_FILE);
		 }
		 return(\@splited_files);
}


#_______________________________________________________________
# Title     : split_sequence
# Usage     : %out=%{&split_sequence(\%input, 2 )};
# Function  : divides any string to the denominator given.
# Example   : &show_array( &divide_string(\%input, 3) );
#              while  $input is 'seq', '12345789ABCDEFHIJKLMN'
#              The output will be 'seq_1_half', '1234578'
#                                 'seq_2_half', '9ABCDEF'
#                                 'seq_3_half', 'HIJKLMN'
# Warning   :
# Keywords  : divide_string, split_string, chop_string, divide_sequence
#             split_sequence(look at separate split_sequence sub),
#              break_sequence, break_string
# Options   :
#  $reverse_second_half=S by S -S
#  $reverse_first_half =F by F -F
#  $reverse_rest   =R by R -R  ## reversing all except the first
#  $reverse_all  =A by A -A # reverse all the fragments
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#-----------------------------------------------------------
sub split_sequence{
    my(@string, $frag_str_size, $remaining, @hash, $i, $j,
         @seq_names, $denominator, %input_seq, @final_hash_ref,
         $frag, $reverse_second_half, $reverse_first_half,
         $reverse_rest, $reverse_all);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Checking arguments
    #________________________________________________
	for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'HASH'){
					push(@hash, $_[$i]);
	   }elsif(ref($_[$i]) eq 'SCALAR'){
					 if(${$_[$i]}=~/^(\d+\.?\d*)$/){ # this can handle fraction number!!
							$denominator = int($1);            # like  9.5
					 }
			 }elsif($_[$i]=~/^(\d+\.?\d*)$/){ # this can handle fraction number!!
			   $denominator = int($1);            # like  9.5
			 }elsif($_[$i]=~/F/){ # this can handle fraction number!!
					 $reverse_first_half='F';
			 }elsif($_[$i]=~/S/){ # this can handle fraction number!!
					 $reverse_second_half='S';
			 }elsif($_[$i]=~/R/){ # this can handle fraction number!!
					 $reverse_rest='R'; $reverse_second_half='s';
			 }elsif($_[$i]=~/A/){ # this can handle fraction number!!
					 $reverse_all='A'; $reverse_rest='r'; $reverse_first_half='f';
					 $reverse_second_half='S';
	   }elsif($_[$i]=~/\S/){
	       print "\n# $0: split_sequence, You put some strange stuff to me!!\n\n";
	       die;
	   }
	}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
		# When s option is given while denominator is over 2, set r option
		#____________________________________________________________________
	if($denominator > 2 and $reverse_second_half){
	   $reverse_rest='r';
	}

	if($denominator == 0){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}

		print "\n# split_sequence: \$denominator is $denominator, with \@hash\n";

	for($i=0; $i< @hash; $i++){
			 my %input_seq=%{$hash[$i]};
			 @seq_names=keys %input_seq;
			 my (%out_seq);

			 for($k=0; $k < @seq_names; $k++){
					 $seq_name=$seq_names[$k];

					 @string = split(//, $input_seq{ $seq_name });
					 $frag_str_size = int(@string/$denominator);
					 if($debug eq 1){ print "\n Frag str size is :  $frag_str_size \n" }
					 $remaining = @string % $denominator;

					 for($j=0; $j < $denominator; $j++){
								$frag_number=$j+1;

								if($remaining > 0){
										$frag=join('', splice(@string, 0, ($frag_str_size+1) ) );
										$remaining --;
										$seq_name_split="$seq_name\_$frag_number";

										#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
										# Before final, set the seq types
										#_________________________________________________________________
										if($frag_number==1 and $reverse_first_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number==2 and $reverse_second_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number >2 and $reverse_rest){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($reverse_all){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}
										$out_seq{$seq_name_split}=$frag;
								}elsif( $remaining == 0  and  @string>0){
										$seq_name_split="$seq_name\_$frag_number";
										$frag=join('', splice(@string, 0, $frag_str_size,) );

										#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
										# Before final, set the seq types
										#_________________________________________________________________
										if($frag_number==1 and $reverse_first_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number==2 and $reverse_second_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number >2 and $reverse_rest){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($reverse_all){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}
										$out_seq{$seq_name_split}=$frag;
								}
					 }
			 }
			 push(@final_hash_ref, \%out_seq);
	}
	wantarray? \@final_hash_ref : $final_hash_ref[0];
}




#_______________________________________________________________
# Title     : divide_string
# Usage     : &show_array(&divide_string(\$input, 6));
# Function  : divides any string to the denominator given.
# Example   : &show_array( &divide_string(\$input, 3) );
#              while  $input is '12345789ABCDEFHIJKLMN'
#              The output will be '1234578 9ABCDEF HIJKLMN'
# Warning   :
# Keywords  : divide_string, split_string, chop_string, divide_sequence
#             split_sequence(look at separate split_sequence sub),
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.4
#-----------------------------------------------------------
sub divide_string{
	my (@array, $i, $j, $denominator, @temp, @string, $frag_str_size,
	   $remaining, $frag);

	for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'ARRAY'){
		  push(@array, @{$_[$i]});
	   }elsif(ref($_[$i]) eq 'SCALAR'){
		  if(${$_[$i]} =~/^(\d+)$/){
			 $denominator = $1;
		  }else{
			 push(@array, ${$_[$i]});
		  }
	   }elsif($_[$i]=~/^(\d+\.?\d*)$/){ # this can handle fraction number!!
		  $denominator = int($1);            # like  9.5
	   }else{
		  push(@array, $_[$i]);
	   }
	}
	if($denominator == 0){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}
	for($i=0; $i< @array; $i++){
	   my @temp;
	   my @string = split(//, $array[$i]);
	   $frag_str_size = int(@string/$denominator);
	   if($debug eq 1){ print "\n Frag str size is :  $frag_str_size \n" }
	   $remaining = @string % $denominator;
	   for($j=0; $j < $denominator; $j++){
	 	    if($remaining > 0){
			    $frag=join('', splice(@string, 0, ($frag_str_size+1) ) );
	            push(@temp, $frag);
			    $remaining --;
		    }elsif( $remaining == 0  and  @string>0){
								$frag=join('', splice(@string, 0, $frag_str_size,) );
	            push(@temp, $frag);
		    }
	   }
	   push(@final_array_ref, \@temp);
	}
	wantarray? \@final_array_ref : $final_array_ref[0];
}

#____________________________________________________________
# Title     : write_html_headbox
# Usage     : &write_html_headbox($outfilename, \%entries);
# Function  : write html format headbox explanation with
#              given hashes of headbox content.
# Example   :
# Warning   : It takes off the last '/' when $URL has it
#
# Keywords  : write_headbox_html, write headbox in html,
#               write_headbox_files
# Options   : 'd' for date inclusion at the top of the page
#   $URL=  by f=  for default ftp dir name
#   $SUB_DB_DIR= by d=
#   $put_date_information=D by D
#
# Version   : 1.9
#-----------------------------------------------------------
sub write_html_headbox{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my(%in, $HOME_DIR, $name, $name1, $SUB_DB_DIR, $put_date_information,
			 $out_sub_file_base, $this_prog_name);
		$SUB_DB_DIR='/Proj/Comp/Perl/Sub_DB'; ## default
		$HOME_DIR=$ENV{'HOME'};
		my $URL='ftp://cyrah.med.harvard.edu/pub/Perl/';

		if($vars{'f'}=~/(\S+)\/$/){ $URL=$1; }  ## checking if 'f' opt is set(for default ftp dir name)
		if($vars{'d'}=~/(\S+)/){    $SUB_DB_DIR=$1 }
		if($char_opt=~/D/){ $put_date_information='D' }
		for ($f=0; $f< @file; $f++){
			 $output_file=$file[$f];
			 $output_file=~/([^\/]+)\.\w/; $out_sub_file_base=$1;
			 open(FILE, ">$output_file") || die "\n# (E) Can not open $output_file, check permission\n";
			 print FILE "\<html\>\n";
			 if($char_opt =~/d/i){
					 print FILE "\<H2  ALIGN=CENTER\>";
					 print FILE "\<FONT COLOR=\"#DC143C\"\>$file[$f]\<\/H2\>\<\/FONT\>\n";
					 print FILE "\<br\>\n";
					 if($0=~/([^\/]+)\.pl/){ $this_prog_name=$1 }
					 print FILE "\<H6 ALIGN=RIGHT\>", `date`, "Created by  \<A href=\"$SUB_DB_DIR\/$this_prog_name\.html\"\>
								 $SUB_DB_DIR\/$this_prog_name\.html\<\/A\>\<\/H6\> \<hr\>";
			 }

			 for($i=0; $i< @hash; $i++){
					 my %in =  %{$hash[$i]};
					 my @keys = sort keys %in;
					 $name= $in{'Title'};
					 if($name=~/(\S+)\.pl/){ ## to prevent making 'xxxx.pl.pl'
							$name1=$1;
					 }elsif($name=~/\:\:(\w+)\s*$/){
							$name1=$1;  ## This prevents Bio::Bioinf  like title to be refered to a file
					 }else{
							$name1=$name;
					 }
					 print FILE "\<H3\>\<A href=\"$SUB_DB_DIR\/$name1\.html\"\>$name1\<\/A\>\<\/H3\>";
					 print FILE "Download \<A href=\"${URL}\/$name1\.pl\"\>$name1\.pl\<\/A\>\n";
					 print FILE "\<pre\>\n";
					 for($j = 0; $j < @keys; $j ++){
							 if($keys[$j]=~/(title)/i){
							 }elsif( ($keys[$j]=~/\w+/i)&&( $in{$keys[$j]}=~/\w+/) ){
										chomp( $in{$keys[$j]} );
										printf FILE ("\<b\>%-10s\<\/b\> %s\n", $keys[$j], $in{$keys[$j]});
							 }
					 }
					 print FILE "\<\/pre\>";
					 print FILE "\<hr\>\n\n";
			}
			print FILE "\<\/html\>\n";
			close FILE;
		}
		if(-d $HOME_DIR and $out_sub_file_base=~/Bioinf/){
				&cp("$HOME_DIR\/$SUB_DB_DIR\/Bioinf.html", "$HOME_DIR\/Bio.pl.html");
				print "$SUB_DB_DIR\/Bioinf.html has been copied to $HOME_DIR\/Bio.pl.html\n";
		}
}

#______________________________________________________________________________
# Title     : open_DALI_domain_table_file
# Usage     :
# Function  : reads DOMTABLE
# Example   : DOMTABLE1 in ~/Proj/Bio/3Dserver  is the INPUT file
#             $DC_number means Structural Classification number.
#         $DOM_table_hash{$DALI_str_ID}{$DALI_node_ID}=[$DC_number,
#                                                       $str_length,
#                                                       $SSC_sec_str_composition,
#                                                       $description ];
#   With 'L' option it returns the following hashes:
#      return(\%FOLDREP, \%DC, \%SSE, \%NRES, \%NFAMILY, \$largest_DC_number);
#
# Keywords  : open_DDTF_file, open_ddtf_file, open_DOMTABLE1_file
# Options   :
#    $make_Liisa_Holm_style_HASH_output=L by L    for Liisa Holm style output
# Author    : holm@ebi.ac.uk jong@biosophy.org,
# Category  :
# Returns   : ref. of hashes
# Version   : 1.5
#------------------------------------------------------------------------------
sub open_DALI_domain_table_file{
    my($DOM_table_file, $DC_number, %DOM_table_hash, $str_length,
       $SSC_sec_str_composition, $description,
       $make_Liisa_Holm_style_HASH_output, $olddc, $domid, $nres,$he,
       $ifam, %NFAMILY, %FOLDREP, %DC, %SSE, %NRES, $largest_DC_number);
    $DOM_table_file=${$_[0]} || $_[0];

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Check for Liisa Holm style HASH output flag (L)
    #_____________________________________________________
    if($_[1]=~/L/i){ $make_Liisa_Holm_style_HASH_output='L' }

    open(DOM_TABLE_file, "<$DOM_table_file") || die " open_DALI_domain_table_file : Can not open $DOM_table_file by open_DALI_domain_table_file\n\n";
    if($make_Liisa_Holm_style_HASH_output){
        $olddc=0;
        while(<DOM_TABLE_file>) {
            ($dc,$domid,$nres,$he)=/^\s*(\S+)\s+(\S+).*\s+(\d+)\s+([HE]*)\s*$/;
            if($dc != $olddc) { $ifam=0; $olddc=$dc; $FOLDREP{$dc}=$domid; }
            $ifam++;
            $DC{$domid}="$dc\.$ifam"; $REP{"$dc\.$ifam"}=$domid;
            $SSE{$domid}=$he;
            $NRES{$domid}=$nres;
            $NFAMILY{$dc}++;
        }

    }else{
        while(<DOM_TABLE_file>){
           if(/^\s+([\d\.]+)\.\s+(\w+)\:(\d+)\s+(\S.{39,46})\s(\d+)\s+([HE]+)\s*$/){
               $DC_number="D.C.$1";
               $DALI_str_ID=$2;
               $DALI_node_ID=$3;
               $description=$4;
               $str_length=$5;
               $SSC_sec_str_composition=$6;
               $DOM_table_hash{$DALI_str_ID}{$DALI_node_ID}=[$DC_number,
                                                             $str_length,
                                                             $SSC_sec_str_composition,
                                                             $description ];

           }
        }
    }
    close(DOM_TABLE_file);
    $largest_DC_number=$dc; ## remember the largest DC number
    if($make_Liisa_Holm_style_HASH_output){
        return(\%FOLDREP, \%DC, \%SSE, \%NRES, \%NFAMILY, \$largest_DC_number);
    }else{
        return(\%DOM_table_hash);
    }
}





#________________________________________________________________________
# Title     : open_sdb_files
# Usage     : %entries = %{&open_sdb_files(\$file_to_read )};
# Function  :
# Example   : Output is something like
#             ('Title', 'read_head_box', 'Tips', 'Use to parse doc', ...)
# Warning   :
# Keywords  : read_sdb_files,read_sdb,
# Options   : 'b' for remove blank lines. This will remove all the entries
#             with no descriptions
# Returns   : A hash ref.
# Argument  : One or None. If you give an argu. it should be a ref. of an ARRAY
#              or a filename, or ref. of a filename.
#             If no arg is given, it reads SELF, ie. the program itself.
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------
sub open_sdb_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($title_found, %Final_out, $variable_string, $TITLE, $title, @keys,
	  $end_found, $line, $entry, $entry_match, $End_line_num, $remove_blank,
	  $title_entry_null, $end_found, $Enclosed_entry, $Enclosed_var,
	  $blank_counter, $title_entry_exist, $entry_value, $temp_W, $Warning_part
	);

	for($r=0; $r<@file; $r++){
		open(SEQ_IN, "$file[$r]");
		my @whole_file =<SEQ_IN>;

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		for($i=0; $i<@whole_file; $i++){
	$whole_file[$i] =~ tr/\t/ {7}/;  ## This is quite important to some parsing!!!
	#"""""""""""""""""""""""""""""""""""""""""""
	##  The first and second line of box 1 ##
	#"""""""""""""""""""""""""""""""""""""""""""
	if( ($whole_file[$i]=~/^[_\*\-\/]{55,}$/)&&    ##  '______' is discarded
	   ($whole_file[$i+1]=~/^ {0,4}([TitlNam]+e) {0,8}:? {0,20}(\S[\-\w\.:]*) *(Copyright.*)/i) ){
	   $TITLE = $1;      $title = "$2\n";   $Final_out{'Warning'}.="$3\n";
	   $entry_match=$TITLE; ## The very first $entry_match is set to 'Title' to prevent null entry
	   if($TITLE =~ /^Title|Name$/i){   #
	       if( ($title=~/^\s+$/)||( $title eq "\n") ){
		  $title_entry_null =1;  $title = '';  }    }
	   $Final_out{$TITLE}=$title;
	   $title_found ++ ;   $i++;  ## << this is essential to prevent reading the same line again.
	}

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	## The first and second line of box 2, #__________ or #**************
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($whole_file[$i]=~/^[_\*]{20,}$/)&&
	   ($whole_file[$i+1]=~/^\s*(\w{1,6}\s{0,2}\w+) {0,7}: {1,5}(.*)\s*/i) ){
	   $title_found ++ ;        $i++;
	   $entry_match=$1;       $entry_value=$2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;  ## Capitalize words
	   $Final_out{$entry_match}.= "$entry_value\n";
	   last if $title_found > 1;  next;   }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	##  With PROPER entry 2 : descriptins like. 'Ussage : ssssssxxjkk  kj'
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($title_found==1)&&
	   ($whole_file[$i]=~ /^ {0,3}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)] {0,9}(\S.*)\s*/i)){
	   $entry_match = $1;
			  $entry_value = $2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	   $Final_out{$entry_match}.= "$entry_value\n"; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	#  With proper entry 3 : descriptins like. 'Ussage :', But blank description ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($title_found==1)&&
	   ($whole_file[$i]=~ /^ {0,3}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)]( {0,})$/i)){
	   $entry_match = $1;
			  $entry_value = $2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	   $Final_out{$entry_match}.= " $entry_value\n"; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  all space line matching                 ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&  ##<----- If blank line is matched. Take the line
	   ($title_found==1)&&($whole_file[$i]=~/^ {0, 110}$/) ){
	   $blank_counter++;
	   if($blank_counter > 2){ $blank_counter--; }
	   else{ $Final_out{$entry_match}.= " \n";  }     }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  Anything after 3 space to 14 positions eg: '#           HHHHHHHHH'
	###  To match 'examples' etc. INC. ':'
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&
	   ($title_found==1)&&($whole_file[$i]=~/^( {0,50})(\S.+)/) ){
	   $Final_out{$entry_match}.= "$2\n"; $blank_counter=0; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  Anything after 1 space to 11 positions  ##
	###  To match 'examples' etc. EXC. ':'       ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&
	   ($title_found==1)&&($whole_file[$i]=~/^ {0,16}([^:.]+)/) ){
	   $Final_out{$entry_match}.= "$1\n"; $blank_counter=0;}

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###-------End of the read_box reading--------##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($title_found==1)&&
	   ($whole_file[$i]=~ /^[\*\-_]{15,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
	   $End_line_num = $i;       $end_found++;
	   last;      }
		} ## < End of for loop


		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### If title is not there at all     ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		@keys=keys %Final_out;
		for(@keys){
	 if(/^Title$/i){    ## No Entry of Title at all??
	    $TITLE =$&;
	    $title_entry_exist = 1;
	    if($Final_out{$_}=~/^\s*$/){   ## if Title => Null or just space
	       $title_entry_null = 1;    }  }  }

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### When title entry is not there    ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		if( $title_entry_exist != 1){
	  for($s=$End_line_num+1; $s < $End_line_num+20; $s++){
	     if( $whole_file[$s] =~ /^sub {1,5}([\w\.]+) {0,6}\{/){
		$Final_out{'Title'} = "$1\n";   last;       }
	     elsif( $whole_file[$s] =~/^________________________________+/){
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{'Title'} = "$0";     last;
	     }else{
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{'Title'} = "$0";
	     }
	  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### When title is blank              ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		elsif($title_entry_null ==1){  ## It looks for 'sub xxxxx{ ' line to get title
	  ### $End_line_num is the last line read.
	  for($s = $End_line_num+1; $s < $End_line_num+20; $s++){
	     if( $whole_file[$s] =~ /^sub {1,5}(\w+\.*\w*) {0,7}{/){
		$Final_out{$TITLE} = "$1\n";    last;     }
	     elsif( $whole_file[$s] =~/^#________________________+/){
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{$TITLE} = "$0";     last;
	     }else{
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{$TITLE} = "$0";
	     }
	  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		## Error handling, if no head box is found   ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		if($title_found < 1){ print "\nFatal: No headbox found by read_head_box2 sub.\n";  }
		push(@ref_of_seq_entry, \%Final_out);
	}
	if(@ref_of_seq_entry > 1){
		@ref_of_seq_entry;
	}else{ return (\%Final_out) }
}


#_______________________________________________________________
# Title     : open_stride_files
# Usage     :
# Function  :
# Example   :
#           H        Alpha helix
#           G        3-10 helix
#           I        PI-helix
#           E        Extended conformation
#           B or b   Isolated bridge
#           T        Turn
#           C        Coil (none of the above)
#           S        A bend without a hydrogen bond
# Warning   :
# Class     : Utility
# Keywords  :
# Options   :
#   $chain_number= by C=
#   $simplify=s by s     # reduces to H E C
# Package   :
# Reference : http://sonja.acad.cai.cam.ac.uk/bioperl.html
# Returns   :
# Tips      :
# Argument  :
# Todo      :
# Author    : A Biomatic
# Version   : 2.0
#-----------------------------------------------------------
sub open_stride_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my (%sequence, %HASH, @Keys, $seq_found1, $S_start, $S_end, $seq_found,
      $present_seq, @seq_Names, %Sizes, $bare_seq_name, $fasta_seq_idx_file,
      %seq_fragments, $min_seq_leng_accepted, $No_gap_char_allowed,
      $remove_strange_and_masked_residues, $make_Upper_case_seq,
      %sec_str, $chain, $name,$number_of_residues,$simplify, $read_any_chain,
      $sec_str, $secondary_structure, $chain_number_wanted, $sequence);
    $min_seq_leng_accepted=0;
    my %AA=%{&convert_3_to_1_letter};
    $name='name';
    $prediction_reliability=1;
    if(@file<1){
      print "\n \@file has less than 1 elem. There is no fileinput for open_stride_files\n";
      die
    }

    if($vars{'C'}=~/(\w+)/){ $chain_number_wanted=$1; print "\n# (i) open_stride_files: Chain wanted= $1\n"; }
    if($vars{'m'}=~/(\d+)/){ $min_seq_leng_accepted=$1; }
    if($char_opt=~/N/){  $No_gap_char_allowed='N'; }
    if($char_opt=~/s/){  $simplify='s'; }
    if($char_opt=~/R/){  $remove_strange_and_masked_residues='R'; }
    if($char_opt=~/U/){  $make_Upper_case_seq='U'; }

    $stride_file=$file[0]; ## for the time being
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # The following is to handle name input like 8rnxA.stride
    #_______________________________________________________________________________
    if(! $chain_number_wanted and $stride_file=~/\d\w\w\w(\w)\./){
       $chain_number_wanted=$1;
    }elsif(!$chain_number_wanted and $stride_file=~/^\d\w\w\w\./){
       $read_any_chain=1;
       print "\nWARN: The STRIDE file does not have pdb chain information!! \n";
    }
    open(STRIDE_FILE, $stride_file) || die "\n# (E) Cant open $stride_file\n";
    while(<STRIDE_FILE>){
       if(/ASG\s+(\w+)\s+(\S)\s+(\d+)\s+(\d+)\s+(\w) .+?([\~\w]+)\s*$/){  #>>->  ASG  LYS -  305  287    H    AlphaHelix    -69.14    -26.24     171.0      1FND
           $chain =$2;  # _ , A or B like things
           if($read_any_chain){
           }elsif($chain eq '-'){ $chain = ''; ## NO chain case
           }elsif($chain ne $chain_number_wanted){ next } ## skipping NOT maching chain number
           $name="\L$6";
           $s=$4;
           $pdb_position=$3;
           $residue=$AA{$1};
           $position=$s;
           $residues ={$position, $residue};
           $sequence .=$residue;
           $sec_str .=$5;
       }
    }
    close(STRIDE_FILE);
    if($simplify){ $sec_str=~tr/IiGgBbTt/CcCcCcCc/; }   # secture conversion rule
    $secondary_structure=[split(//, $sec_str)];
    $number_of_residues=length($sec_str);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
    # Definition of Seq information detail, Ver 1.1
    #______________________________________________________________________________
    %sec_str=('name'                  =>$name,
              'protein_name'          =>$name,
              'ID'                    =>$name,
              'PDB_chain_number'      =>$chain,
              'chain_number'          =>$chain,
              'number_of_residues'    =>$number_of_residues,
              'seq_name'              =>$name,
              'residue_array'         =>$residues,
              'residue_string'        =>$sequence,
              'sec_str_array'         =>$secondary_structure,
              'sec_str_string'        =>$sec_str,
              'sec_str'               =>$sec_str,
              'mol_type'              =>'protein',
              'type'                  =>'protein',
              'prediction_reliability'=>$prediction_reliability,
              'prediction'            =>'prediction',
              'sequence'              =>"$sequence",
              'seq_string'            =>$sequence,
              'structure_assignment'  =>0); # this is not a struc. assignment as in DSSP
    #__________________________________________________________________________________
    return(\%sec_str);
}



#______________________________________________________________________________
# Title     : open_stride_dat_files
# Usage     : @out=@{&open_stride_dat_files(@ARGV)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@biosophy.org
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub open_stride_dat_files{
		 #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
		 my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
		 my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
		 my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
		 my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
		 my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
		 if($debug==1){print "\n\t\@hash=\"@hash\"
		 \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		 \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 my( @residues, @sec_str, $seq_name, $seq, %stride_sec_str_hash,
				$stride_dat_file, @stride_sec_str_hash_final);

		 for($i=0; $i< @file; $i++){
				 my $stride_dat_file=$file[$i];
				 print "\n# (INFO) \$stride_dat_file $stride_dat_file is being opened\n";
				 open(STRIDE_DAT_FILE, $stride_dat_file);
				 while(<STRIDE_DAT_FILE>){
						if(/^NM\s+(\S+)\.brk/){
								$seq_name=$1;
						}elsif(/^NM\s+(\S+)/){
								$seq_name=$1;
						}elsif(/^\s*SQ\s+(\S+)/){
								$seq=$1;
						}elsif(/^\s*SS\s+(\S+)/){
								$sec_str=$1;
								my %stride_sec_str_hash;
								@residues=split(//, $seq);
								@sec_str=split(//, $sec_str);
								for($j=0; $j < @residues; $j++){
										$stride_sec_str_hash{$j}=[$residues[$j], $sec_str[$j], '1.0',  "$seq_name"];
								}
								push(@stride_sec_str_hash_final, \%stride_sec_str_hash);
						}
				 }
				 close(STRIDE_DAT_FILE);
		 }
		 if(@stride_sec_str_hash_final > 1){
				 return(\@stride_sec_str_hash_final);
		 }elsif(@stride_sec_str_hash_final==1){
				 return(\%stride_sec_str_hash);
		 }
}



#_______________________________________________________________
# Title     : get_pdb_file_start_number
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : start_number_of_pdb, startnumber, start number of PDB,
#             get_start_number_of_pdb_file,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub get_pdb_file_start_number{
	my($start_number, $pdb_file);
	if( ref($_[0]) ){
	 $pdb_file = ${$_[0]};
	}else{
	 $pdb_file = $_[0];
	}
	open (PDB_FILE, "$pdb_file");
	while(<PDB_FILE>){
	 if(/^ATOM      1\s+\w+\s+\w+\s+[\w]*\s+(\d+) \s+/){
		 $start_number = $1; last;
	 }
	}
	\$start_number;
}
#_______________________________________________________________
# Title     : write_modeller_top_file
# Usage     : &write_modeller_top_file(\%hash, [v]);
# Function  : Writes Modeller command file format.
# Example   :
#     $modelname = 'gfct';
#     $template = '1ovt';
#     %hash=($modelname, $template);
#     &write_modeller_top_file(\%hash);
# Warning   :
# Keywords  :
# Options   : v  for verbose. You will get STDOUT of the result as well as file
# Returns   : a file of xxxx.top form.
# Argument  : 1 hash ref which has model name and template name -> (\%hash)
#             while %hash is (modelname, tempalatename)
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub write_modeller_top_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
format MODELLER_TOP_FORMAT =
INCLUDE
SET ALNFILE = @<<<<<<<<<<<<<<<<
$ali_file
SET KNOWNS = @<<<<<<<<<<<<<<<<
$pdb_file
SET SEQUENCE = @<<<<<<<<<<<<<<<<
$model
SET ATOM_FILES_DIRECTORY = './:../atom_files'
SET STARTING_MODEL = 1
SET ENDING_MODEL = 1
CALL ROUTINE = 'model'
.
format STDOUT =
INCLUDE
SET ALNFILE = @<<<<<<<<<<<<<<<<
$ali_file
SET KNOWNS = @<<<<<<<<<<<<<<<<
$pdb_file
SET SEQUENCE = @<<<<<<<<<<<<<<<<
$model
SET ATOM_FILES_DIRECTORY = './:../atom_files'
SET STARTING_MODEL = 1
SET ENDING_MODEL = 1
CALL ROUTINE = 'model'
.
	########## Program starts ####################
	for($i=0; $i<@hash;$i++){
		($model, $pdb_file) = each %{$hash[$i]};
		$out_file = "$model.top";
		$ali_file = "$model.ali";
		open (MODELLER_TOP_FORMAT, ">$out_file");
		$pdb1 = "$ENV{'PDB'}\/$pdb_file.brk";
		$pdb2 = "$ENV{'PDB'}\/$pdb_file.pdb";
		if( !(-e $pdb1 ) && !( -e $pdb2 ) ){
			print "\n Error the file $pdb1  or  $pdb2\n";
		}
		$model ="\'$model\'";
		$pdb_file ="\'$pdb_file\'";
		$ali_file ="\'$ali_file\'";
		write MODELLER_TOP_FORMAT;
		if( $char_opt=~/v/i){ write STDOUT; }
	}
}
#_______________________________________________________________
# Title     : write_modeller_ali_file
# Usage     : &write_modeller_ali_file(\%model, \%template, [\$outfilename], [v]);
# Function  : Writes Modeller alignment format.
# Example   :
#             $out = 'test.ali';
#             %model =    qw(model AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAccccccccccc);
#             %template = qw(templ CCAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCCCCCCC 3 42);
#             &write_modeller_ali_file(\%model, \%template, \$out);
# Warning   :
# Keywords  :
# Options   : You can put 2 numbers for the second set of key and element for
#             the second hash input as the starting and ending points of
#             template(i.e. pdb file seq). Unless I calculate the size of seq.
#             By default, it reads PDB file defined by ENV setting of 'PDB' and
#             gets the starting number of pdb. If starting number is defined
#             explicitly at input hash, the given starting number is used instead
#             of PDB's.
#             v  for verbose. You will get STDOUT of the result as well as file
# Returns   : a file of xxxx.ali form.
# Argument  : 2 ref. of hash for seq. and optional output.name and option(s).
#             If second input hash (for template) has 3rd and 4th element which are
#             numbers they are regarded as the starting and ending number of the
#             template(i.e. pdb file seq)
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub write_modeller_ali_file{
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.2 >""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	if($debug==1){ print "   \@hash has \"@hash\"\n   \@raw_string has \"@raw_string\"
	\@array has \"@array\"\n   \@char_opt has \"@char_opt\"\n   \@file has \"@file\"\n"; }
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
format MODELLER_ALI_FORMAT =
>P1;@<<<<<<<<<<<<<<<
$name
structureX: @<<<<<<<<<<<<<<: @<<<: : @<<<<<: : @<<<<<<<<<<<<<: :
$name $start_seq $seq_leng $name
@*
$seq
*
.
format STDOUT =
>P1;@<<<<<<<<<<<<<<<
$name
structureX: @<<<<<<<<<<<<<<: @<<<: : @<<<<<: : @<<<<<<<<<<<<<: :
$name $start_seq $seq_leng $name
@*
$seq
*
.
	########## Program starts ####################
	if($file[0]){
		 open (MODELLER_ALI_FORMAT, ">$file[0]");
	}else{
		@seq  = %{$hash[0]};
		$name = $seq[0]; print "\n\$name is $name" if $debug ==1;
		open (MODELLER_ALI_FORMAT, ">$name.ali");
	}
	for($i=0; $i<@hash;$i++){
		@seq  = %{$hash[$i]};
		$name = $seq[0]; print "\n\$name is $name" if $debug ==1;
		$seq  = $seq[1]; print "\n\$seq  is $seq" if $debug ==1;
		$seq_leng = length($seq);
		## checking PDB entry of the template ##
		$pdb1 = "$ENV{'PDB'}\/$name.brk";
		$pdb2 = "$ENV{'PDB'}\/$name.pdb";
		if( (-e $pdb1 ) || ( -e $pdb2 ) ){
			$pdb_file = $pdb1;
			$start_seq = ${&get_pdb_file_start_number( $pdb_file )};
		}
		## Handling the starting and ending seq points of template
		if( defined($seq[2])&&($seq[2]=~/^\d+/) ){
		  $start_seq=$seq[2]; $seq_leng+=$start_seq; }
		elsif( defined($start_seq) ){
		  $seq_leng+=$start_seq; }
		else{ $start_seq =1; }
		if( (defined($seq[2])) &&($end_seq != $seq_leng ) ){
		  print "\n Your template seq length does not match with actual seq size
					\n I will put the calculated value \"$seq_leng\" as the template length\n\n";
		}
		print "\n\$seq_leng is $seq_leng\n" if $debug ==1;
		write MODELLER_ALI_FORMAT;
		if( $char_opt=~/v/i){ write STDOUT; }
	}
}

#_______________________________________________________________
# Title     : make_template_from_sec_str
# Usage     : %target   = %{&make_template_from_sec_str(\%seq)};
# Function  : makes template of sec. str. like: 'H5 E4 E2' out of '__HHHHH__EEEE__EE__'
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#-----------------------------------------------------------
sub make_template_from_sec_str{
	my(%out, @name, @fragments, %in, $name, $leng, $frag_seq, $name2);
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	for($j = 0; $j < @hash ; $j ++){
	 %in = %{$hash[$j]};

	 @name = keys  %in;
	 print "\@name is @name\n", if($debug eq 1);

	 for($t=0; $t < @name; $t++){
		$name = $name[$t];
			print "\$name is $name\n", if($debug eq 1);
		$leng=length($in{$name});
			print "$leng\n", if($debug eq 1);
		$name2 ="$name"."$leng"; # to attach sequence length
		@fragments = split(/_+/, $in{$name});
			print "\@fragments is @fragments\n", if($debug eq 1);
		for($i = 0; $i < @fragments; $i++){
		  if($fragments[$i] =~/(\w)\w+/){
			  $fraglength = length($fragments[$i]);
			  $frag_seq .= "$1"."$fraglength "; # space is delimiter  'H5 E3 E5 E4'
			  print "\$frag_seq is $frag_seq\n", if($debug eq 1);
		  }
		  $out{$name2}=$frag_seq;
		}
	 }
	}
	return(\%out);
}

#______________________________________________________________________________
# Title     : calculate_protein_symmetry_index
# Usage     :
# Function  : Using symst files
# Example   :  Conversion rule
#               $sec_str=~tr/IiGgBbTt/CcCcCcCc/;
#           H        Alpha helix
#           G        3-10 helix
#           I        PI-helix
#           E        Extended conformation
#           B or b   Isolated bridge
#           T        Turn
#           C        Coil (none of the above)
#           S        A bend without a hydrogen bond (a kind of coil)
# Keywords  : PROSIX (protein symmetry index)
# Options   :
# Author    : jong@biosophy.org,
# Category  :
# Returns   :
# Version   : 2.4
#------------------------------------------------------------------------------
sub calculate_protein_symmetry_index{
    local($symst_file, $pdb_id_query, $range_query, $seqlet_query, $secturlet_query,
        $pdb_id_match, $range_match, $seqlet_match, $secturlet_match, $simplify,
        $NON_self_stragment_match, $SELF_self_stragment_match,
        $SELF_palindrome_count, $NON_palindrome_count, $entry_count,
        $Total_unique_pdb_match, $num_of_unique_pdb_match,
        $Total_SELF_monotonous_seqlet, $Total_NON_monotonous_seqlet,
        $Total_NON_palindrome_seqlet, $Total_NON_palindrome_seqlet,
        $Total_palindrome, %NON_self_PDBD_entry, %SELF_secture_composition_hash,
        $composition_simil_counter, $sum_query_occur, $sum_match_occur,
        $running_av_compos_simil, $skip_self_stragment, %secture_compos_hash_query,
        %secture_compos_hash_match, %PDB_seq_containing_rev_stragment);
    $simplify='s'; ## simplify secture types
    $SELF_stragment_match=0;
    $symst_file=${$_[0]} || $_[0];
    $skip_self_stragment=${$_[1]} || $_[1];
    open(SYMST, "<$symst_file") || die "\n Cannot open $symst_file file\n";

    while(<SYMST>){
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Matching the very first occurance of '>xxxx_38-123_KJKJKJKJ...'
       #_______________________________________________________________
       if(/^\>(\S+)\_(\d+\-\d+)_(\S+):(\S+)/){
           $pdb_id_query   =$1;       $range_query    =$2;
           $seqlet_query   =$3;       $secturlet_query=$4;
           $number_of_unique_frag_entry{$pdb_id_query}++;

           if($simplify){ $secturlet_query=~tr/SsIiGgBbTt/CcCcCcCcCc/; }
           @secture_residues_query=split(//, $secturlet_query);
           @seqlet_residues_query =split(//, $seqlet_query);

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Show interim summary for each PDB entry when new pdb_id query entries is met
           #_________________________________________________________________________________
           if($seqlet_match and $pdb_id_query ne $pdb_id_query_prev){
                $entry_count++;
                print "      (2-1) calculate_and_print_prosix \n";
                &calculate_and_print_prosix;
                $seqlet_match='';
           }elsif($seqlet_match){  #print "\nNO new $entry_count $pdb_id_query  $seqlet_query $secturlet_query";
                &calculate_composition_prosix($running_av_compos_simil,
                                              $seqlet_match);
           }
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Calculating secture composition(query)
           #______________________________________
           #print "\n\n\n----------------------\n  (1) putting H E C to secture_compos_hash_query hash \n";
           for($i=0;$i<@secture_residues_query; $i++){
               $secture_compos_hash_query{$secture_residues_query[$i]}++;
           }
           #print "\n   (2) \$seqlet_query $seqlet_query  @secture_residues_query\t", %secture_compos_hash_query, "\n";
           $secturlet_query_prev=$secturlet_query;
           $range_query_prev=$range_query;
           $seqlet_query_prev=$seqlet_query;
           $pdb_id_query_prev=$pdb_id_query;  %NON_self_PDBD_entry=();

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Matching the indented match line: '    2bpa1_80-86_QWIKFMK:HHHHHHH'
       #________________________________________________________________________
       }elsif(/^\s+[\>]?(\S+)\_(\d+\-\d+)_(\S+):(\S+)/){
           $pdb_id_match   =$1;      $range_match    =$2;
           $seqlet_match   =$3;      $secturlet_match=$4;

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Exact reverse of the same region is skipped
           #__________________________________________________________________________
           if($skip_self_stragment and
              $pdb_id_match eq $pdb_id_query and
              $range_query eq $range_match){ next }
           if($simplify){ $secturlet_match=~tr/SsIiGgBbTt/CcCcCcCcCc/; }

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
           # The critical step is here !! There has been no reversing by me with previous programs
           #___________________________________________________________________________________________
           $secturlet_match_rv=reverse($secturlet_match); ## this is theoretically correct
           #$secturlet_match_rv=$secturlet_match;         ## however, non-reverse has better secture matching as many of sectures are palindrome

           @secture_rv_residues_match=split(//, $secturlet_match_rv);
           @seqlet_residues_match=split(//, $seqlet_match);
           $num_of_unique_pdb_match{$pdb_id_match}++;

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # SELF-SELF stragment
           #___________________________________________________
           if($pdb_id_query_prev eq $pdb_id_match){
              my(%monotony_check);
              $PDB_seq_containing_rev_stragment{$pdb_id_match}++;
              $SELF_stragment_match++;
              $Total_SELF_stragment_match =$SELF_stragment_match;
              if($seqlet_query eq $seqlet_match){  $SELF_palindrome_count++;    }

              for($i=0; $i<@secture_residues_query; $i++){
                 $monotony_check{$secture_residues_query[$i]};
                 if($secture_residues_query[$i] eq $secture_rv_residues_match[$i]){
                    $SELF_self_identical_secture++;
                    $SELF_same_secture_AA_residue{$seqlet_residues_query[$i]}++;
                    $SELF_same_secture_type{$secture_residues_query[$i]}++;
                 }else{
                    $SELF_self_non_identical_secture++;
                    $SELF_diff_secture_AA_residue{$seqlet_residues_query[$i]}++;
                    $SELF_diff_secture_type{$secture_residues_query[$i]}+=0.5;
                    $SELF_diff_secture_type{$secture_residues_match[$i]}+=0.5;
                 }
              }
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # Calculating secture composition(match)
              #________________________________________
              for($i=0;$i<@secture_rv_residues_match; $i++){
                  $secture_compos_hash_match{$secture_rv_residues_match[$i]}++;
              }
              #print "\n     (3) $pdb_id_match @secture_rv_residues_match ==> ", %secture_compos_hash_match;
              if((keys %monotony_check)==1){  $SELF_monotonous_seqlet++;   }
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # NELF-NELF stragment (Nelf == non-self)
           #___________________________________________________
           }elsif($pdb_id_query ne $pdb_id_match){
              #print "\n     (3-1) $pdb_id_query != $pdb_id_match \n";
              $NON_self_PDBD_entry{$pdb_id_match}++;
              my(%monotony_check);
              $NON_self_stragment_match++;
              $Total_NON_stragment_match +=$NON_self_stragment_match;
              if($seqlet_query eq $seqlet_match){  $NON_palindrome_count++;       }

              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # Comparing Secture to count the identical secture for the query and match seqlets
              #________________________________________________________________________________________
              for($i=0; $i<@secture_residues_query; $i++){
                 $monotony_check{$secture_residues_query[$i]};
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # Compare the SECTUREs. Note that I reverse the matched secture.
                 #__________________________________________________________________
                 if($secture_residues_query[$i] eq $secture_rv_residues_match[$i]){
                    $NON_self_identical_secture++;
                    $NON_same_secture_AA_residue{$seqlet_residues_query[$i]}++;
                    $NON_same_secture_type{$secture_residues_query[$i]}++;
                 }else{
                    $NON_self_non_identical_secture++;
                    $NON_diff_secture_AA_residue{$seqlet_residues_query[$i]}++;
                    $NON_diff_secture_type{$secture_residues_query[$i]}+=0.5;
                    $NON_diff_secture_type{$secture_residues_match[$i]}+=0.5;
                 }
              }
              if((keys %monotony_check)==1){   $NON_monotonous_seqlet++;    }
           }
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # When end of line is matched
       #______________________________________________
       }elsif(eof){
           #print "\n    END of file reached , $running_av_compos_simil $seqlet_match\n";
           $entry_count++;
           #&calculate_composition_prosix($running_av_compos_simil,
           #                              $seqlet_match);
           &calculate_and_print_prosix;
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Title   : calculate_and_print_prosix
           # Usage   : An internal subroutine
           #_____________________________________________________
           sub calculate_and_print_prosix{
               $num_of_unique_pdb_match       = keys %num_of_unique_pdb_match;
               $Total_unique_pdb_match       +=$num_of_unique_pdb_match;
               #print "\n $seqlet_query_prev : $seqlet_match ,,,,,,,\n";
               &calculate_composition_prosix($running_av_compos_simil,
                                             $seqlet_match);

               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
               # $Total_identical_secture!! <-- most important
               #___________________________________________________________
               $Total_identical_secture        =$SELF_self_identical_secture+$NON_self_identical_secture;
               $Total_non_identical_secture    =$SELF_self_non_identical_secture+$NON_self_non_identical_secture;
               $Total_residue_number           =$Total_identical_secture+$Total_non_identical_secture;
               $Total_SELF_monotonous_seqlet  +=$SELF_monotonous_seqlet;
               $Total_NON_monotonous_seqlet   +=$NON_monotonous_seqlet;
               $Total_SELF_palindrome_seqlet   =$SELF_palindrome_count;
               $Total_NON_palindrome_seqlet    =$NON_palindrome_count;
               $Total_palindrome               =$SELF_palindrome_count + $NON_palindrome_count;
               @NON_self_PDBD_entry            =keys %NON_self_PDBD_entry;
               $number_of_unique_frag_entry    =keys %number_of_unique_frag_entry;
               $Total_SELF_self_secture_residue=$SELF_self_identical_secture+$SELF_self_non_identical_secture;
               $Total_NON_self_secture_residue =$NON_self_identical_secture +$NON_self_non_identical_secture;
               $rate_SELF_self_secture_residue =$SELF_self_identical_secture/$Total_SELF_self_secture_residue if $Total_SELF_self_secture_residue;
               $rate_NON_self_secture_residue  =$NON_self_identical_secture /$Total_NON_self_secture_residue  if $Total_NON_self_secture_residue;
               if($Total_residue_number < 1){ $Total_residue_number=1 }
               $rate_Total_iden_secture        =$Total_identical_secture/$Total_residue_number;
               print "#($entry_count, $pdb_id_query_prev) $pdb_id_match $seqlet_query_prev $secturlet_query_prev:$secturlet_match_rv <-(reversed)\n";
               print "\$Total_identical_secture           : $Total_identical_secture /$Total_residue_number ($rate_Total_iden_secture)\n";
               print "  \$SELF_stragment_match            :  $SELF_stragment_match     for $pdb_id_match\n";
               print "  \$Total_SELF_stragment_match      :  $Total_SELF_stragment_match     for $pdb_id_match\n";
               print "  \$NON_self_stragment_match        :  $NON_self_stragment_match (@NON_self_PDBD_entry)\n";
               print "  \$num_of_unique_pdb_match(+self)  :  $num_of_unique_pdb_match\n";
               print "\$SELF_self_identical_secture       :  $SELF_self_identical_secture / $Total_SELF_self_secture_residue ($rate_SELF_self_secture_residue)\n";
               print "  \$NON_self_identical_secture      :  $NON_self_identical_secture / $Total_NON_self_secture_residue($rate_NON_self_secture_residue)\n";
               print "  \$Total_non_identical_secture     :  $Total_non_identical_secture / $Total_residue_number\n";
               print "  \$SELF_monotonous_seqlet          :  $SELF_monotonous_seqlet / $Total_SELF_monotonous_seqlet\n";
               print "  \$SELF_palindrome_count           :  $SELF_palindrome_count / $Total_SELF_palindrome_seqlet\n";
               print "  \$NON_monotonous_seqlet           :  $NON_monotonous_seqlet / $Total_NON_monotonous_seqlet\n";
               print "  \$NON_palindrome_count            :  $NON_palindrome_count  / $Total_NON_palindrome_seqlet\n";
               print "  \$Total_palindrome                :  $Total_palindrome / $Total_residue_number\n";
               print "   Number of unique PDB entry      :  $number_of_unique_frag_entry for $pdb_id_query_prev\n";
               print "   SELF_running_av_compos_simil    : \"$running_av_compos_simil\" (only self-self stragment considered)\n";
               print "  \$NON_SELF_running_av_compos_simil: $NON_SELF_running_av_compos_simil \n";
               $SELF_stragment_match=$NON_self_stragment_match=$seqlet_match=0;
           }
       }
    }
    $num_PDB_seq_with_rev_stragment_inside=keys %PDB_seq_containing_rev_stragment;
    print "\n    There were $num_PDB_seq_with_rev_stragment_inside PDB seq with rev stragment inside\n\n";
    close(SYMST);
}




#_______________________________________________________________
# Title     : calculate_protein_volume
# Usage     : %volumes=%{&calculate_protein_volume(\%seq)}
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub calculate_protein_volume{
	my %final_volume;
	my %volume=("A","88.6" ,"C","108.5","D","111.1","E","138.4",
					"F","189.9","G","60.1" ,"H","153.2","I","166.7",
					"K","168.6","L","166.7","M","162.9","N","117.7",
					"P","122.7","Q","143.9","R","173.4","S","89.0",
					"T","116.1","V","140.0","W","227.8","Y","193.6",
					"a","88.6" ,"c","108.5","d","111.1","e","138.4",
					"f","189.9","g","60.1" ,"h","153.2","i","166.7",
					"k","168.6","l","166.7","m","162.9","n","117.7",
					"p","122.7","q","143.9","r","173.4","s","89.0",
					"t","116.1","v","140.0","w","227.8","y","193.6");
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >""""""""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my %seq_hash =  %{$hash[0]};
	my @names = keys %seq_hash;
	#if($debug == 1){
	  print "\n ",__LINE__, " Seq names are @names\n";
	# }

	for( $i=0 ; $i < @names; $i++){
	  my @seq=split(//, $seq_hash{$names[$i]});
	  for( $j=0; $j < @seq; $j ++){
		  $final_volume{$names[$i]} += $volume{$seq[$j]};
	  }
	}
	\%final_volume;
}




#_______________________________________________________________
# Title     : extract_words
# Usage     : @words = @{&extract_words(\$string)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#-----------------------------------------------------------
sub extract_words{
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >"""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]}; my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]}; my(@file)   =@{$A[4]}; my(@dir   )  =@{$A[3]}; my(@array)=@{$A[1]};
	my(@string)=@{$A[2]}; my(@num_opt)=@{$A[5]}; my(@char_opt)=@{$A[6]}; my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	if($debug==1){
	  print __LINE__, " Args to extract_words are: \"@raw_string\"\n";
	}
	for($i=0; $i < @raw_string; $i ++){
	  if(ref($raw_string[$i]) eq 'SCALAR'){
		 $line = ${$raw_string[$i]};
	  }else{
		 $line = $raw_string[$i];
	  }
	  push( @words, split(/[\W\-\_]+/, $line) );
	}
	if($debug==1){
	  $num = @words;
	  print __LINE__, " Num of words are : \"$num\"\n";
	}
	\@words;
}

#________________________________________________________________________
# Title     : replace_subroutines
# Usage     :
# Function  : replaces subroutines of given file(s) with supplied subs.
#             Doesn't care version
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------
sub replace_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 1.6 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my($i,$j,$c,$d,$e,$f,$g,$h,$k,
	$l,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($each_sub, %out_subs, $VER, %left_out, @lib, $ver, $sub_name,
	  $real_sub_entry_found);
	my %hash=%{&merge_hash(@hash)};
	@array= keys %hash;
	my @values= values %hash;

	for($i=0; $i < @file; $i++){
		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		@lib =<LIB_FILE>;
		for($j=0; $j < @lib; $j++){
			  for($s=0; $s < @array; $s++){
				  if($array[$s] =~/^([_a-zA-Z\-]+)(\d*\.*\d*)$/){
					  $sub_name=$1;
					  $VER =$2;
				  }
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#[_\-\*]{10,130}\s*$/)&&($lib[$j+1]=~/^(#\s*title\s*:\s*$sub_name)[^\.pl]/i) ){
					  $real_sub_entry_found=0;
					  $out_subs{"$sub_name"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub\s*\w+\s*\{/)||($lib[$j]=~/^#[\-_\*]{10,130}\s*$/) ){
							 $lib[$j]=~s/(\s*)$//;  #<-- removing ending space
							 #"""""""""""""""""""""""""""""""""""
							 #  Taking version no.
							 #"""""""""""""""""""""""""""""""""""
							 if( ($char_opt !~ /nv/i) && ($lib[$j]=~/^#\s*version\s*:\s*([\d+\.\d+]*)\s*/i) ){
								  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
								  elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
							 }
							 $out_subs{"$sub_name"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;  ## essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if($lib[$j]=~/^sub\s+$sub_name\s*\{/){
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;
					  until($lib[$j]=~/^\}/){
						  $out_subs{"$sub_name"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";  ## to fetch '}'

					  $j++;

					  splice(@array, $s, 1); ## removing the subnames found
					  $s--;
					  unless(defined($ver)){ $ver = '1.0' }
					  unless($char_opt=~/nv/i){ ## if No version attachment option is set
						  $out_subs{"$sub_name$ver"}=$out_subs{$sub_name};
						  delete $out_subs{$sub_name};
					  }
				  }
			  }
			  $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
			                                  ## just in case you want the left out ones.
		}
		close LIB_FILE;
		open (LEFT_FILE, ">$file[$i]");
		for($h= 0; $h < @values; $h++){### appending the new subs.
			 $left_out{$file[$i]} .= $values[$h];
		}
		print LEFT_FILE $left_out{$file[$i]};
		close LEFT_FILE;

	}#""""""""""""" end of for (@file)

	@no_of_subs_fetched = keys %out_subs;
	if(@array>0){
		print chr(7);
		print "\n# Following subs are not found in \"", "@file","\"\n  ", "@array", "\n\n";
	}
	return( \%left_out ); # this has all the sub routines and other lines.
}


#________________________________________________________________________
# Title     : replace_text_recursively
# Usage     : &replace_text_recursively(<old_string>, <new_string>, <dir_name>);
# Function  : finds patterns of text and replaces them in multiple input files
# Example   :
# Warning   : This produces a temporary file and rename it...
# Class     :
# Keywords  :
# Options   :
# Package   :
# Reference :
# Returns   : nothing
# Tips      :
# Argument  : reference of one array of file names in pwd
# Todo      :
# Author    : jong
# Version   : 1.1
