#!/usr/bin/perl
##---------------------------------------------------------------------------##
##  File:
##      @(#) RepeatMasker
##  Author:
##      Arian Smit <asmit@systemsbiology.org>
##      Robert Hubley <rhubley@systemsbiology.org>
##  Description:
##      Takes one or more DNA sequence files, in fasta format, and returns
##      masked sequence file(s) (repetitive DNA is masked) for database
##      searches and a file with detailed annotation of repeat locations.The
##      sequence data are screened against a library of repetitive sequences
##      using the program cross_match (Phil Green, unpublished) or
##      ABBlast ( Gish et al ), RMBlast ( NCBI, Hubley et al ), or
##      nhmmer ( Wheeler et al ).
##
## NOTE: See RepeatMaskerConfig.pm for necessary installation
##       customization.
##
#******************************************************************************
#* Copyright (C) University of Washington 1996-1999 Developed by Arian Smit,
#* Philip Green and Colin Wilson of the University of Washington Department of
#* Genomics.
#*
#* Copyright (C) Arian Smit 2000-2001
#*
#* Copyright (C) Institute for Systems Biology 2002-2021 Developed by
#* Arian Smit and Robert Hubley.
#*
#* This work is licensed under the Open Source License v2.1.  To view a copy
#* of this license, visit http://www.opensource.org/licenses/osl-2.1.php or
#* see the license.txt file contained in this distribution.
#*
###############################################################################
#  ChangeLog:
#
#    $Log$
#
###############################################################################
#
# To Do:
#
#

=head1 NAME

RepeatMasker - Mask repetitive DNA

=head1 SYNOPSIS

  RepeatMasker [-options] <seqfiles(s) in fasta format>

=head1 DESCRIPTION

The options are:

=over 4

=item -h(elp)

Detailed help

=back

Default settings are for masking all type of repeats in a 
primate sequence.

=over 4

=item -e(ngine) [crossmatch|wublast|abblast|ncbi|rmblast|hmmer]

Use an alternate search engine to the default.  Note: 'ncbi' and 'rmblast' 
are both aliases for the rmblastn search engine engine.  The generic NCBI 
blastn program is not sensitive enough for use with RepeatMasker at this 
time.

=item -pa(rallel) [number]   

The number of sequence batch jobs [50kb minimum] to run in parallel.  
RepeatMasker will fork off this number of parallel jobs, each running 
the search engine specified. For each search engine invocation 
( where applicable ) a fixed the number of cores/threads is used:

  RMBlast     4 cores
  ABBlast     4 cores
  nhmmer      2 cores
  crossmatch  1 core

To estimate the number of cores a RepeatMasker run will use simply 
multiply the -pa value by the number of cores the particular search
engine will use.

=item -s             

Slow search; 0-5% more sensitive, 2-3 times slower than default

=item -q             

Quick search; 5-10% less sensitive, 2-5 times faster than default

=item -qq            

Rush job; about 10% less sensitive, 4->10 times faster than default
(quick searches are fine under most circumstances)
repeat options 

=item -nolow   

Does not mask low_complexity DNA or simple repeats

NOTE: This is an important step in RepeatMasker, the identification
of low-divergence simple repeats early in RepeatMasker's search
phase lowers the overall false-positive rate for TE annotations 
considerably.  To simply remove simple repeats from the final output
of RepeatMasker use postprocessing tools such as:

 egrep -v "Simple|Satellite" my_data.out > filtered.out

To remove these annotations from the final output.  The -nolow option
should only be used when there is a specific reason to avoid pre/post
masking tandem/simple/low-complexity sequences.

=item -noint   

Only masks low complex/simple repeats (no interspersed repeats)

=item -norna         

Does not mask small RNA (pseudo) genes

=item -alu           

Only masks Alus (and 7SLRNA, SVA and LTR5)(only for primate DNA)

=item -div [number]  

Masks only those repeats with [number] percent diverged from consensus

=item -lib [filename] 

Allows use of a custom library (e.g. from another species)

=item -cutoff [number]

Sets cutoff score for masking repeats when using -lib (default 225)

=item -species  <query species>   

Specify the species or clade of the input sequence.  The species
name must be a valid NCBI Taxonomy Database species name and be
contained in the RepeatMasker repeat database.  Some examples are:

  -species human
  -species mouse
  -species rattus
  -species "ciona savignyi"
  -species arabidopsis

Other commonly used species:

mammal, carnivore, rodentia, rat, cow, pig, cat, dog, chicken, 
fugu, danio, "ciona intestinalis" drosophila, anopheles, worm, 
diatoaea, artiodactyl, arabidopsis, rice, wheat, and maize 

=item -uncurated  

Use uncurated and curated families from FamDB rather than curated
only (default).

The latest version of RepeatMasker is designed to work with the new
FamDB partitioned data format (v1.0).  This format supports storing
both curated and uncurated TE data in smaller (optionally present)
partitions. Rather than controlling the type of data used (curated/
uncurated) by downloading the corresponding database, both 
curated/uncurated data will now be in the same files and use of that
data can be controlled using this new flag.

=back

Contamination options

=over 4

=item -is_only       

Only clips E coli insertion elements out of fasta and .qual files

=item -is_clip       

Clips IS elements before analysis (default: IS only reported)

=item -no_is         

Skips bacterial insertion element check

=back

Running options

=over 4

=item -gc [number]   

Use matrices calculated for 'number' percentage background GC level 

=item -gccalc        

RepeatMasker calculates the GC content even for batch files/small seqs

=item -frag [number] 

Maximum sequence length masked without fragmenting (default 60000)

=item -nocut         

Skips the steps in which repeats are excised

=item -noisy         

Prints search engine progress report to screen (defaults to .stderr file)  

=item -nopost

Do not postprocess the results of the run ( i.e. call ProcessRepeats ).
NOTE: This options should only be used when ProcessRepeats will be run manually
on the results.

=back

output options

=over 4

=item -dir [directory name] 

Writes output to this directory (default is query file directory, 
"-dir ." will write to current directory).

=item -a(lignments) 

Writes alignments in .align output file

=item -inv     

Alignments are presented in the orientation of the repeat (with option -a)

=item -lcambig

Outputs ambiguous DNA transposon fragments using a lower case
name.  All other repeats are listed in upper case.  Ambiguous
fragments match multiple repeat elements and can only be
called based on flanking repeat information.

=item -small   

Returns complete .masked sequence in lower case

=item -xsmall  

Returns repetitive regions in lowercase (rest capitals) rather than masked

=item -x       

Returns repetitive regions masked with Xs rather than Ns

=item -poly    

Reports simple repeats that may be polymorphic (in file.poly)

=item -source

Includes for each annotation the HSP "evidence".  Currently this option
is only available with the "-html" output format listed below.

=item -html

Creates an additional output file in xhtml format.

=item -ace     

Creates an additional output file in ACeDB format

=item -gff     

Creates an additional Gene Feature Finding format output

=item -u       

Creates an additional annotation file not processed by ProcessRepeats

=item -xm      

Creates an additional output file in cross_match format (for parsing)

=item -no_id   

Leaves out final column with unique ID for each element (was default)

=item -e(xcln) 

Calculates repeat densities (in .tbl) excluding runs of >=20 N/Xs 
in the query

=back

=head1 CONFIGURATION OVERRIDES

=head1 SEE ALSO

=over 4

Crossmatch, ProcessRepeats

=back

=head1 COPYRIGHT

2002-2025 Copyright (C) Institute for Systems Biology Developed by
Arian Smit and Robert Hubley.

2000-2001 Copyright (C) Arian Smit.

1996-1999 Copyright (C) University of Washington, Developed by Arian Smit,
Philip Green and Colin Wilson of the University of Washington Department of
Genomics.

=head1 AUTHORS

Arian Smit <asmit@systemsbiology.org>

Robert Hubley <rhubley@systemsbiology.org>

=cut

#
# Module Dependence
#
use strict;
use FindBin;
use lib $FindBin::RealBin;
use Carp;
use Getopt::Long;
use POSIX qw(:sys_wait_h);
use Storable qw(nstore retrieve);
use Pod::Text;
use File::Copy;
use File::Spec;
use File::Path;
use Data::Dumper;
use Cwd;

# RepeatMasker Libraries
use EMBL;
use DFAM;
use RepeatMaskerConfig;
use SearchResult;
use SearchResultCollection;
use Taxonomy;
use CrossmatchSearchEngine;
use WUBlastSearchEngine;
use HMMERSearchEngine;
use NCBIBlastSearchEngine;
use SimpleBatcher;
use FastaDB;
use TRF;
use TRFSearchResult;
use LibraryUtils;
# For timing only
#use Time::HiRes qw(gettimeofday);

# A bug in 5.8 produces way too many warnings
if ( $] && $] >= 5.008003 ) {
  use warnings;
}

#
# Version
#
my $version = $RepeatMaskerConfig::VERSION;
print "RepeatMasker version $version\n";

if ( $ARGV[ 0 ] && $ARGV[ 0 ] eq '-v' ) {
  exit( 0 );
}

my $cmdLine = $0 . join( ' ', @ARGV );

#
# Option processing
#  e.g.
#   -t: Single letter binary option
#   -t=s: String parameters
#   -t=i: Number paramters
#
my @opts = qw( a|alignments ace alu cutoff=s dir=s
    div=s excln frag=s gc=s gccalc gff
    help int noint inv is_clip is_only lib=s
    low nolow lcambig libdir=s no_id no_is nocut
    noisy norna parallel=i poly q qq
    s small species|sp=s uncurated u fptest rbrm_only
    x xm xsmall nopost engine|e=s source html debug=s );
# Add configuration parameters as additional command-line options
push @opts, RepeatMaskerConfig::getCommandLineOptions();

#
# Provide the POD text from this file and 
# from the config file by merging them 
# together.  The heading "CONFIGURATION
# OVERRIDES" provides the insertion point
# for the configuration POD.
#
sub usage {
  my $p = Pod::Text->new();
  $p->output_fh(*STDOUT);
  my $pod_str;
  open IN,"<$0" or die "Could not open self ($0) for generating documentation!";
  while (<IN>){
    if ( /^=head1\s+CONFIGURATION OVERRIDES\s*$/ )
    {
      my $c_pod = RepeatMaskerConfig::getPOD();
      if ( $c_pod ) {
        $pod_str .= $_ . $c_pod;
      }
    }else {
      $pod_str .= $_;
    }
  }
  close IN;
  print "$0 - $version\n";
  $p->parse_string_document($pod_str);
  exit(1);
}

#
# Get the supplied command line options, and set flags
#
my %options = ();
unless ( &GetOptions( \%options, @opts ) ) {
  usage();
}

# Print the internal POD documentation if something is missing
if ( $#ARGV == -1 && !$options{'help'} ) {
  print "No query sequence file indicated\n\n";
  usage();
}

# Print out the big help file if requested
my $REPEATMASKER_DIR = $FindBin::RealBin;
my $PAGER = $ENV{PAGER};
$PAGER = "more" if !defined $PAGER;
system( "$PAGER $REPEATMASKER_DIR/repeatmasker.help\n" ),
    exit( 0 )
    if $options{'help'};

#
# Resolve configuration settings using the following precedence: 
# command line first, then environment, followed by config
# file.
#
RepeatMaskerConfig::resolveConfiguration(\%options);
my $config = $RepeatMaskerConfig::configuration;
my $NHMMSCAN_PRGM = $config->{'HMMER_DIR'}->{'value'} . "/nhmmscan";
my $HMMPRESS_PRGM = $config->{'HMMER_DIR'}->{'value'} . "/hmmpress";
my $HMMSTAT_PRGM = $config->{'HMMER_DIR'}->{'value'} . "/hmmstat";
my $RMBLASTN_PRGM = $config->{'RMBLAST_DIR'}->{'value'} . "/rmblastn";
my $NCBIBLASTDB_PRGM = $config->{'RMBLAST_DIR'}->{'value'} . "/makeblastdb";
my $CROSSMATCH_PRGM = $config->{'CROSSMATCH_DIR'}->{'value'} . "/cross_match";
my $WUBLASTP_PRGM = $config->{'ABBLAST_DIR'}->{'value'} . "/blastp";
my $SETDB_PRGM = $config->{'ABBLAST_DIR'}->{'value'} . "/setdb";
my $TRF_PRGM = $config->{'TRF_PRGM'}->{'value'};

# Determine the installation directory using 
# location of the program that was invoked.
my $LIBDIR = $config->{'LIBDIR'}->{'value'};
if ( $LIBDIR eq "" ) {
  die "The path to the RepeatMasker libraries is not set.  This\n" .
      "is typically the case when the RepeatMasker has not been\n" .
      "initially configured.  Please run the RepeatMasker/configure\n" .
      "program before retrying RepeatMasker";
}
unless ( -d $LIBDIR ) {
  die "The path to the Libraries directory is set incorrectly:\n" .
      "    $LIBDIR\n".
      "does not exist.  Please re-run the configure program.\n";
}

my $MATDIR = "$REPEATMASKER_DIR/Matrices";
die "The assumed RepeatMasker installation directory\n" .
    "    $REPEATMASKER_DIR\n" . 
    "does not appear to be correct.  E.g it does not\n" .
    "contain the 'Matrices' subdirectory.\n" .
    "This can occur if hard links are used to invoke\n" .
    "this script.\n"
    unless ( -d $MATDIR );

#
# Get the date
#
my $date = localtime( time() );

# Debugging flag
my $DEBUG = 0;
$DEBUG = 1 if ( $RepeatMaskerConfig::DEBUGALL == 1 ||
                ( exists $options{'debug'} 
                  && $options{'debug'} ) );

# Windows does not support the use of ":" in a filename.
$date =~ s/[ ,\t,\n:]//g;

#
# Setup the search engine
#
my $searchEngine;
my $engine = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'};
if ( defined $options{'engine'} ) {
  if ( $options{'engine'} =~
       /^(ncbi|rmblast|wublast|abblast|crossmatch|hmmer|nhmmer)$/i )
  {
    $engine = lc( $options{'engine'} );
  }
  else {
    die "I have never heard of the search engine $options{'engine'}.  Please\n"
        . "use rmblast/abblast/hmmer or crossmatch.\n";
  }
}

# Normalize engine names, so that later code does not have to worry about e.g.
# "nhmmer" vs "hmmer" and can assume "hmmer"
$engine = "wublast" if ( $engine eq "abblast" );
$engine = "hmmer" if ( $engine eq "nhmmer" );
$engine = "ncbi" if ( $engine eq "rmblast" );

# Save normalized engine name back into options as well
$options{'engine'} = $engine;

if ( defined $options{'int'} || defined $options{'low'} ) {
  die "\nThe options -int and -low have been deprecated.  Please use either\n"
      . "-noint or -nolow instead.\n\n";
}

if ( defined $options{'nolow'} ) {
  print "\nWARNING: The nolow option should be used with caution.  This option\n" . 
        "         doesn't simply filter out simple repeats and low-complexity\n" .
        "         annotations from the output, rather it doesn't run these\n" .
        "         searches at all.  The simple repeats, and low-complexity\n" . 
        "         sequences may then be falsely annotated as fragments of\n" .
        "         TE families that contain short stretches of them.\n\n";
}

if ( !-e $TRF_PRGM ) {
  die "TRF program not configured! This version of RepeatMasker requires a\n"
      . "local installatoin of TRF.  Please visit:\n"
      . "               http://tandem.bu.edu/trf/trf.html\n"
      . "to obtain the current version.  Once installed please re-run the \n"
      . "RepeatMasker configure script to setup RepeatMasker to use the new\n"
      . "installation.";
}

my $engineDEBUG = 0;
$engineDEBUG = 1 if ( $RepeatMaskerConfig::DEBUGALL == 1 ||
                      ( exists $options{'debug'}
                        && $options{'debug'} & 2 ) );
if ( $engine eq "wublast" ) {
  $searchEngine = WUBlastSearchEngine->new(
                             pathToEngine => $WUBLASTP_PRGM,
                             DEBUG        => $engineDEBUG );

  if ( not defined $searchEngine ) {
    die "Cannot execute $WUBLASTP_PRGM\n";
  }
  print "Search Engine: ABBlast/WUBlast [ "
      . $searchEngine->getVersion() . " ]\n";
}
elsif ( $engine eq "hmmer" ) {
  $searchEngine = HMMERSearchEngine->new(
                             pathToEngine => $NHMMSCAN_PRGM, 
                             DEBUG        => $engineDEBUG );

  # Used to allow a single instance to use all the cores ( ie. default nhmmer
  # prior to 3.2 ).  Now ( like nhmmer 3.2 ) we require the user to be explicit
  # about how many parallel proceses to use.
  # BEFORE:
  #if ( exists $options{'parallel'} && $options{'parallel'} > 1 ) {
  # Limit nhmmer cores to 2 per batch.
  #$searchEngine->setCores( 2 );
  #}
  # NOW:
  # For each parallel batch invoke nhmmer with 2 cores each.  I.e -pa 4
  # would run 4 nhmmer jobs each using 2 cores
  $searchEngine->setCores( 2 );

  if ( not defined $searchEngine ) {
    die "Cannot execute $NHMMSCAN_PRGM\n";
  }
  print "Search Engine: HMMER [ " . $searchEngine->getVersion() . " ]\n";

  # TODO: We do not support IS searches with HMMER yet.
  $options{'no_is'} = 1;

}
elsif ( $engine eq "crossmatch" ) {
  $searchEngine = CrossmatchSearchEngine->new(
                           pathToEngine => $CROSSMATCH_PRGM,
                           DEBUG        => $engineDEBUG );
  if ( not defined $searchEngine ) {
    die "Cannot execute $CROSSMATCH_PRGM\n";
  }
  print "Search Engine: Crossmatch [ " . $searchEngine->getVersion() . " ]\n";
}
elsif ( $engine eq "ncbi" ) {
  $searchEngine = NCBIBlastSearchEngine->new(
                             pathToEngine => $RMBLASTN_PRGM,
                             DEBUG        => $engineDEBUG );
  if ( not defined $searchEngine ) {
    die "Cannot execute $RMBLASTN_PRGM";
  }
  print "Search Engine: NCBI/RMBLAST [ " . $searchEngine->getVersion() . " ]\n";
}
elsif ( $engine eq "" ) {
  die "\nThe default search engine is not configured!  Please use the command line\n"
    . "parameter '-engine <rmblast|crossmatch|hmmer>' to specify which engine to\n"
    . "use or rerun the configure script to set the default engine.\n";
}
else {
  die "Search engine ( $engine ) is unknown to RepeatMasker.  Please check "
      . "the RepeatMaskerConfig.pm or rerun the configure script!.\n";
}

#
# Verify that the libraries exist
#
my $libType = "CONSENSUS";
$libType = "HMM" if ( $engine eq "hmmer" );

#
# RepeatMasker Batching Parameters
#
my $fragmentSize = 60000;

#
# Selection of a batch overlap length can have large impacts on the program.
# The overlap boundaries are places where edge effects produce partial
# overlapping annotations.  Also matrix differences in flanking batches
# can cause the same repeat to have different divergence, score and alignment
# length characteristics.
#
my $overlapLen = 2000;

#
# User supplied fragment length
#
if ( defined $options{'frag'} ) {
  if ( $options{'frag'} < ( 2 * $overlapLen ) ) {
    warn "RepeatMasker: You may not use a fragment size (-frag "
        . "$options{'frag'} ) which is less than 2 times the overlap "
        . "length (overlapLen = $overlapLen).  Defaulting to $fragmentSize\n";
  }
  else {
    $fragmentSize = $options{'frag'};
  }
}

#
# Parse filenames
#
foreach my $file ( @ARGV ) {
  if ( $file =~ /\s/ ) {
    die "RepeatMasker can not handle filenames with spaces "
        . "like the file \"$file\"\n";
  }
  elsif ( $file =~ /([\`\!\$\^\&\*\(\)\{\}\[\]\|\\\;\"\'\<\>\?])/ ) {
    die "RepeatMasker can not handle filenames with the special "
        . "character \"$1\" as in the file \"$file\"\n";
  }
}

#
# If $file is across a system boundary writing temporary files
# to the file's directory takes a lot of time, so these are written
# to a temporary directory:
my ( $tempdir, $runnumber ) = &createTempDir( \%options, $date, $ARGV[ 0 ] );

#
# Species & Library Setup
#
my $tax =
    Taxonomy->new( famdb_dir => "$LIBDIR/famdb" );

#
# The search path for finding a place to store the cache
#
my @LIBPATH = ( $LIBDIR,
                $ENV{'HOME'} . "/.RepeatMaskerCache" );

# Add tempdir to the end of the library search path
push @LIBPATH, $tempdir;


my ( $resolvedSpecies, $generalLibDir, $speciesLibDir, $customLibDir, $rmLibraryVersion ) =
    &initLibrariesFromFamdb(
                    $options{'species'},
                    $options{'lib'},
                    $LIBDIR,
                    $tempdir,
                    \@LIBPATH,
                    $searchEngine);
my $dbversion = $rmLibraryVersion;
$options{'species'} = $resolvedSpecies;

# Call isA a few times to populate the isACache. This must be done before
# forking so that each fork sees the cached value instead of re-checking
$tax->isA($options{'species'}, 'primates');
$tax->isA($options{'species'}, 'rodentia');

# Read in a frozen hash of element id's which are refineable.  This
# hash is created by initLibraries the first time a library is initialized.
my $refineableHashRef;
if ( !$options{"lib"} ) {
  $refineableHashRef = retrieve( "$speciesLibDir/refineableHash.dat" );
}

# Functions saveOldFiles and cleanUp expect the -dir directory to
# already exist.
if ( $options{'dir'} && !-d $options{'dir'} ) {
  mkdir( $options{'dir'}, 0777 )
      or die "Output directory ( -dir "
      . $options{'dir'}
      . " ) doesn't exist and could not be created.\n";
}

#
# Main loop
#
FILECYCLE:
foreach my $file ( @ARGV ) {

  unless ( -r $file ) {
    print "cannot read file $file in " . cwd() . "\n";
    next;
  }

  unless ( -s $file ) {
    print "File $file appears to be empty.\n";
    next;
  }

  my $compressed = "";
  if ( $file =~ /\.gz$/ ) {
    unless ( `gunzip $file 2>&1` ) {

      # Name $file only changes if gunzip did not complain
      # (file may end with .gz but not be zipped
      $file =~ s/\.gz$//;
      $compressed = "zipped";
    }
  }
  elsif ( $file =~ /\.Z$/ ) {
    unless ( `uncompress $file 2>&1` ) {
      $file =~ s/\.Z$//;
      $compressed = "Zed";
    }
  }

  # With one file $#ARGV == 0
  print "\nanalyzing file $file\n" if ( $#ARGV >= 0 );

  # Don't mess with original query file and remember original
  # location, e.g. for quality file.
  my $fileori = $file;
  my ( $originaldir, $fileend ) = ( File::Spec->splitpath( $file ) )[ 1, 2 ];
  $originaldir = "." if ( $originaldir eq "" );

  my $file = "$tempdir\/$fileend";

  ## Look for quality files and read in
  my $qualFile = "$fileori" . ".qual";

  # foo.seq files have foo.qual quality files;
  $qualFile =~ s/\.seq.qual$/\.qual/;

  # Check if files will be overwritten
  &saveOldFiles( $fileori, $fileend, $originaldir, $date, \%options );

  ## Create a batcher object.  Upon construction the
  ## object will survey the fasta file, check for syntax
  ## errors, and create a byte index for all parseable sequences
  ## in the file.  Copy the file so we don't mess with the
  ## original.
  system( " cp $fileori $file " );
  ## If the input file has read-only permissions simply copying
  ## the file through the OS will retain these permissions
  chmod 0700, $file;
  my $db = FastaDB->new(
                         fileName    => $file,
                         openMode    => SeqDBI::ReadWrite,
                         maxIDLength => 50
  );
  my $batcher = SimpleBatcher->new( $db, $fragmentSize, $overlapLen );

  ## How many sequences are in the fasta file?
  my $totseqcnt = $db->getSeqCount();

  ## Don't process this file unless we have some
  ## unambiguous (ACGT) sequence.  Sublength
  ## is the length of the sequence minus any
  ## ambiguous base codes ( ie. N, S, X, W ... )
  my $sublength = 0;
  unless ( $sublength = $db->getSubtLength() ) {
    &SkipFile( $file );
    &cleanUp( \%options, $runnumber, $tempdir, $fileori, $fileend, $file,
              $originaldir, $compressed )
        unless ( $DEBUG );

    next FILECYCLE;
  }

  # GC level is calculated over the length of the non-ambiguous sequence.
  my $totGClevel = 100 * $db->getGCLength() / $sublength;
  $totGClevel = sprintf "%4.2f", $totGClevel;

  my $maskfile        = "$file.masked";
  my $fullmaskfile    = "$file.masked.all";
  my $fullcatfile     = "$file.cat.all";
  my $fullRefinedFile = "$file.ref";
  my $fullcutfile     = "$file.cut.all";
  my $fullLogFile     = "$file.log";
  my $fullErrFile     = "$file.stderr";
  my $numX      = 1;                  # The number of X's to take the place of a
                                      # repeat base.
  my $totseqlen = $db->getSeqLength();
  my $compressCatFile = 1 if ( $totseqlen > 10000000 );

  my $batchCount = $batcher->getBatchCount();
  my $numberChildren = $options{'parallel'} ? $options{'parallel'} : 1;
  $numberChildren = $batchCount if ( $batchCount < $numberChildren );

  my $badForkCount = 0;     # A failsafe for in case the fork goes badly
  my $badForkMax   = 20;    # The number of bad forks ( in a row ) before exit
  my $retryLimit   = 2;     # Attempt to run a batch 2 times before failing it
  my %children     = ();    # A hash of children PIDs with their batch nums
  my $child_id     = 0;     # The PID of returned by fork
  my %batchStatus  = ();    # A hash which holds the retry count for
                            #  each batch number.
  my $nextBatchToConcatenate = 1;

  # Initialize the batchStatus hash
  for ( my $k = 1 ; $k <= $batchCount ; $k++ ) {
    $batchStatus{$k} = {
                         'retry'    => 0,
                         'childPID' => -1
    };
  }

  #
  # Job processing loop
  #   Process all batches stored in the batchStatus hash.  If
  #   for some reason a job fails the entry in $batchStatus is
  #   incremented.  We continue to re-run this batch until the
  #   $retryLimit is reached.
  #
JOBLOOP:
  while ( keys( %batchStatus ) ) {

    #
    # First check the status of currently running
    # proceses.
    #
    if ( keys( %children ) ) {

      # Wait for at least one to exit;
      print "Waiting for a child to finish or die\n" if ( $DEBUG );
      my $childPID = wait();
      my $retVal   = ( $? >> 8 );

      # Check if we returned with a valid PID
      if ( $childPID > 0 ) {

        ## Child process is gone
        # Find out what batch it was working on
        my $batchNum = $children{$childPID};

        # Delete it from the children list
        delete $children{$childPID};

        my $batchFile = $file . "_batch-" . $batchNum;

        # Check it's status
        if (    $retVal == 0
             && -e "$batchFile.cat"
             && -s "$batchFile.masked" )
        {

          print "Child completed: PID=$childPID, batch=$batchNum\n"
              . " RetVal=$retVal\n"
              if ( $DEBUG );
          ## Child completed ok.
          ## Append log, and stderrs
          system( "cat $batchFile.masked.log >> $fullLogFile" )
              if ( -e "$batchFile.masked.log" );
          system( "cat $batchFile.masked.stderr >> $fullErrFile" )
              if ( -e "$batchFile.masked.stderr" );
          system( "touch $batchFile.cat" ) if ( !-e "$batchFile.cat" );

          # Now remove them
          unlink( "$batchFile.masked", "$batchFile.masked.log",
                  "$batchFile.masked.stderr" )
              unless ( $DEBUG );
          print "Child for batch=$batchNum completed ok....\n" if ( $DEBUG );

          # Delete the batchStatus entry
          delete $batchStatus{$batchNum};
        }
        else {
          ## Process failed.

          print "Child die'd.  Organize the funeral for PID=$childPID, "
              . " batch=$batchNum, RetVal=$retVal\n"
              if ( $DEBUG );

          # Check how many times we have run it
          if ( $batchStatus{$batchNum}->{'retry'} < $retryLimit ) {
            ## Under the retry limit...rerun it!
            print "Child for batch=$batchNum failed ($retVal) " . "retry#"
                . $batchStatus{$batchNum}->{'retry'} . "\n"
                if ( $DEBUG );
            $batchStatus{$batchNum}->{'retry'}++;
            $batchStatus{$batchNum}->{'childPID'} = -1;
            print "WARNING: Retrying batch ( $batchNum ) [ $retVal,"
                . ( -e "$batchFile.cat" ) . ", "
                . ( -s "$batchFile.masked" )
                . "]...\n";
          }
          else {
            ## Too many retries.
            ## We are out of here!
            print "\n\nFATAL ERROR: RepeatMasker giving up. One or more\n"
                . "batches failed!  Unfortunately this type of error\n"
                . "cannot be recovered from. Please submit the following\n"
                . "details to the feedback page at the repeatmasker\n"
                . "website:\n\n"
                . "       http://www.repeatmasker.org\n\n"
                . "RepeatMasker Version: $version\n"
                . "Library Version: $rmLibraryVersion\n"
                . "Search Engine: $engine [ "
                . $searchEngine->getVersion() . " ]\n"
                . "Command Line: $cmdLine\n"
                . "Batch Number: $batchNum\n"
                . "Disk Space:\n"
                . `df $tempdir` . "\n";
            if ( -e "/proc/meminfo" ) {
              print "System Memory:\n";
              open IN, "</proc/meminfo";
              while ( <IN> ) {
                print if ( /Mem|Cache|Swap/ );
              }
              close IN;
            }
            print "Further details about this problem may be found in\n"
                . "the directory: $tempdir\n";

            opendir TDIR, "$tempdir"
                or die "RepeatMasker: Could not open $tempdir for reading!\n";
            my $mostCurrent = 0;
            my $prefix      = "";
            while ( my $tFile = readdir( TDIR ) ) {
              if ( $tFile =~ /^(.*)Results-(\d+)\.out/ ) {
                $prefix = $1;
                $mostCurrent = $2 if ( $2 > $mostCurrent );
              }
            }
            close TDIR;
            if ( $mostCurrent > 0 ) {
              print "The following file(s) in this directory may be useful\n"
                  . "for debugging this failure:\n"
                  . "      $tempdir/$prefix\Results-$mostCurrent.out\n";
              if ( -s "$tempdir/$prefix\Results-$mostCurrent\.err" ) {
                print "      $tempdir/$prefix\Results-$mostCurrent.err\n";
              }
            }
            print "\n\n";
            exit( -1 );
          }
        }
      }
      else {

        # Child is still running
      }
    }    # End if ( keys( %children ...

    # Append annotations in batch-order.  Using -pa # ( multithreading )
    # there is no guarantee they will be ready in batch order.
    while ( $nextBatchToConcatenate <= $batchCount ) {
      if ( !exists $batchStatus{$nextBatchToConcatenate}
           && -e "$file" . "_batch-" . "$nextBatchToConcatenate.cat" )
      {
        if ( $compressCatFile ) {
          system(   "gzip -c $file"
                  . "_batch-"
                  . "$nextBatchToConcatenate.cat "
                  . ">> $fullcatfile.gz" );
        }
        else {
          system(   "cat $file"
                  . "_batch-"
                  . "$nextBatchToConcatenate.cat "
                  . ">> $fullcatfile" );
        }
        unlink( "$file" . "_batch-" . "$nextBatchToConcatenate.cat" )
            unless ( $DEBUG );
        $nextBatchToConcatenate++;
      }
      else {
        last;
      }
    }

    # Gather a list of batches to work on
    my @batchNums = grep { ( $batchStatus{$_}->{'childPID'} < 0 ) }
        sort( { $a <=> $b } keys( %batchStatus ) );

    # Decide how many jobs to start
    my $numberToStart = 0;
    if ( @batchNums > ( $numberChildren - keys( %children ) ) ) {

      # Simply the number requested - the number running
      $numberToStart = ( $numberChildren - keys( %children ) );
    }
    else {

      # Simply the remaining batches
      $numberToStart = @batchNums;
    }

    #
    # Loop through and fork to our hearts
    # content.
    #
    for ( my $k = 0 ; $k < $numberToStart ; $k++ ) {

  FORK:
      if ( $child_id = fork ) {
        # Our children are our future
        print "Parent produced child $child_id\n" if ( $DEBUG );
        $children{$child_id} = $batchNums[ $k ];
        $batchStatus{ $batchNums[ $k ] }->{'childPID'} = $child_id;
      }
      elsif ( $child_id == 0 ) {
        my $batchSeqFile  = $file . "_batch-" . $batchNums[ $k ];
        my $batchCatFile  = $batchSeqFile . ".cat";
        my $batchMaskFile = $batchSeqFile . ".masked";

        ## Get batch parameters
        my $seq_cnt = $batcher->getBatchSeqCount( $batchNums[ $k ] );
        my $seqlen  = $batcher->getBatchSeqLength( $batchNums[ $k ] );
        my $frac_GC = $batcher->getBatchAverageGC( $batchNums[ $k ] );
        print "Creating Batch "
            . $batchNums[ $k ]
            . " seq count = $seq_cnt "
            . "len = $seqlen, average GC = $frac_GC\n"
            if ( $DEBUG );

        ## Create the batch file
        $batcher->writeBatchFile( $batchNums[ $k ], $batchMaskFile );

        my $GC_frac = 0;
        if ( $options{'gc'} ) {

          # user decides GC background
          $GC_frac = $options{'gc'};
        }
        elsif ( ( $seq_cnt > 1 || $seqlen <= 2000 )
                && !$options{'gccalc'} )
        {

          # More than one sequence *or* too short to get
          # acccurate measure of background then take average matrices
          # NOTE: Option -gccalc overules this behaviour
          $GC_frac = 43;
        }
        else {

          # program calculates GC background from sequence
          $GC_frac = $frac_GC;
        }

        # Check for E coli insertion elements
        unless (    $options{'no_is'}
                 || $options{'fptest'}
                 || !-s "$generalLibDir/is.lib" )
        {
          my $sequenceArrayRef;
          my $seqWithNameHashRef;
          &locateISElements(
                             \%options,
                             $batcher,
                             $batchNums[ $k ],
                             $batchMaskFile,
                             $qualFile,
                             $REPEATMASKER_DIR,
                             $searchEngine,
                             $file,
                             $generalLibDir
          );
        }

        unless ( $options{'is_only'} ) {
          my $batchSeqDB = FastaDB->new(
                                         fileName    => "$batchSeqFile.masked",
                                         openMode    => SeqDBI::ReadWrite,
                                         maxIDLength => 50
          );

          if ( $options{'fptest'} ) {
            &runLowComplexTests(
                      \%options,          $REPEATMASKER_DIR,
                      $GC_frac,           $batchSeqFile,
                      $batchMaskFile,     $generalLibDir,
                      $speciesLibDir,     $batchCount,
                      $searchEngine,      $numX,
                      $batchSeqDB,        "batch $batchNums[$k] of $batchCount",
                      $tax,               $customLibDir,
                      $tempdir,           $batchNums[ $k ],
                      $refineableHashRef, $batcher
            );
          }
          else {
            if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
              &runHMMERSearchStages(
                      \%options,          $REPEATMASKER_DIR,
                      $GC_frac,           $batchSeqFile,
                      $batchMaskFile,     $generalLibDir,
                      $speciesLibDir,     $batchCount,
                      $searchEngine,      $numX,
                      $batchSeqDB,        "batch $batchNums[$k] of $batchCount",
                      $tax,               $customLibDir,
                      $tempdir,           $batchNums[ $k ],
                      $refineableHashRef, $batcher
              );
            }
            else {
              &runSearchStages(
                      \%options,          $REPEATMASKER_DIR,
                      $GC_frac,           $batchSeqFile,
                      $batchMaskFile,     $generalLibDir,
                      $speciesLibDir,     $batchCount,
                      $searchEngine,      $numX,
                      $batchSeqDB,        "batch $batchNums[$k] of $batchCount",
                      $tax,               $customLibDir,
                      $tempdir,           $batchNums[ $k ],
                      $refineableHashRef, $batcher
              );
            }
          }

          ## The rule of thumb is that if we didn't return
          ## results we at least have an empty *.cat file.
          if ( !-e "$batchSeqFile.cat" ) {
            system( "touch $batchSeqFile.cat" );
          }
          unlink( <$batchSeqFile.tmp.* $batchSeqFile.temp.*> );
        }
        else {

          # Create an emtpy cat file.
          system( "touch $batchSeqFile.cat" );
        }

        if ( $batcher->isBatchFragmented( $batchNums[ $k ] ) ) {

          # The batch file included fragments so
          # the annotation positions need updating
          # before we can integrate them into the
          # the final results.
          &adjustFragmentPositions( $batcher, $batchCatFile );
        }

        # Exit child process with clean status
        exit( 0 );

      }    # elsif ( $child_id == 0 )...
      elsif ( $! =~ /No more process/ ) {

        # Supposedly recoverable fork error
        $badForkCount++;
        sleep 5;
        redo FORK;
      }
      else {    # Weird fork error
        print "\nWARNING: Cannot fork...for unknown reasons!\n";
        $badForkCount++;
        last;
      }
    }    # End for ( my $k = 0 ; $k < $numberToStart...

    # Give up if it looks bad for us
    ## TODO: Consider die'ng here...since there
    ##       is nothing more we can do to recover.
    last if ( $badForkCount == $badForkMax );

    #
    # Just so we don't loop endlessly.  Lets
    # hang around here until at least one
    # process quits. NOTE: This may be dangerous
    # consider this more....
    #
    print "Parent waiting for child to finish...\n" if ( $DEBUG );

  }    # End of JOBLOOP

  ## TODO: This is where we used to make the cut file.  This
  ##       option should be rolled into a new utility which
  ##       annotates/cuts based on the annotation database.
  
  #
  # Setup options for processRepeats
  #
  my $speed = "undef";
  if ( $options{'q'} ) {
    $speed = "quick";
  }
  elsif ( $options{'qq'} ) {
    $speed = "rushjob";
  }
  elsif ( $options{'s'} ) {
    $speed = "sensitive";
  }
  else {
    $speed = "default";
  }

  ## Build cat file header
  my $engine     = $searchEngine->getPathToEngine();
  my @path       = split( /[\\\/]/, $engine );
  my $engineInfo =
      "run with " . pop( @path ) . " version " . $searchEngine->getVersion();

  if ( $compressCatFile ) {
    open( CATOUT, "| gzip -c >$file.cat.gz" );
  }
  else {
    open( CATOUT, ">$file.cat" );
  }
  print CATOUT "## RepeatMasker version $version , $speed mode\n";
  print CATOUT "## $engineInfo\n";
  print CATOUT "## RM Library: $dbversion\n";
  print CATOUT "## Total Sequences: " . $db->getSeqCount() . "\n";
  print CATOUT "## Total Length: " . $db->getSeqLength() . "\n";
  print CATOUT "## Total NonMask ( excluding >20bp runs of N/X bases ): "
      . $db->getXNLength() . "\n";
  print CATOUT "## Total NonSub ( excluding all non ACGT bases ):"
      . $db->getSubtLength() . "\n";
  undef $db;
  #
  # Report batch overlap boundaries for anyone who cares
  #
  my $overlapBoundariesHashRef = $batcher->getOverlapBoundaries();
  if ( keys( %{$overlapBoundariesHashRef} ) > 0 ) {
    print CATOUT "## Batch Overlap Boundaries\n";
    foreach my $fragSeqName ( keys( %{$overlapBoundariesHashRef} ) ) {
      my $overlapList =
          join( ", ", @{ $overlapBoundariesHashRef->{$fragSeqName} } );
      print CATOUT "##   $fragSeqName  $overlapList\n";
    }
  }
  print CATOUT "## RAW Annotations:\n";
  close CATOUT;

  # Rename final cat file
  if ( -s "$fullcatfile.gz" ) {
    system("cat $fullcatfile.gz >> $file.cat.gz");
    unlink "$fullcatfile.gz";
  }
  elsif ( -s "$fullcatfile" ) {
    system("cat $fullcatfile >> $file.cat");
    unlink "$fullcatfile";
  }
  rename $fullcutfile, "$file.cut" if -s $fullcutfile;

  # TODO: Check on .seqending stuff in SEQDBI

  if ( -s "$fileori.alert" && !$options{'is_clip'} ) {
    if ( $compressCatFile ) {
      &systemint( "gzip -c $fileori.alert >> $file.cat.gz" );
    }
    else {
      &systemint( "cat $fileori.alert >> $file.cat" );
    }
  }

  unless ( exists $options{'nopost'} || $options{'fptest'} ) {

    #
    # Alter source file for masking if we want to go with
    # the clipped IS sequence file.  All annotations will
    # be based on the $file.withoutIS rather than the original
    # file.
    #
    my $maskSourceFile = $file;
    $maskSourceFile = "$file.withoutIS"
        if ( $options{'is_clip'}
             && -s "$file.withoutIS" );

    my $prOptions = "";
    $prOptions .= "-ace " if ( $options{'ace'} );
    $prOptions .= "-a "   if ( $options{'a'} );
    $prOptions .= "-lib " . $options{'lib'} . " "
        if ( $options{'lib'} && !$options{'species'} );
    $prOptions .= "-gff "     if ( $options{'gff'} );
    $prOptions .= "-no_id "   if ( $options{'no_id'} );
    $prOptions .= "-poly "    if ( $options{'poly'} );
    $prOptions .= "-u "       if ( $options{'u'} );
    $prOptions .= "-lcambig " if ( $options{'lcambig'} );
    $prOptions .= "-source "  if ( $options{'source'} || $options{'a'} );
    $prOptions .= "-html "    if ( $options{'html'} );
    $prOptions .= "-xm "      if ( $options{'xm'} );
    $prOptions .= "-excln "   if ( $options{'excln'} );
    $prOptions .= "-noint "   if ( $options{'noint'} );
    $prOptions .= "-species \"" . $options{'species'} . "\" "
        if ( defined $options{'species'}
             && $options{'species'} ne "" );
    $prOptions .= "-orifile $fileori ";
    $prOptions .= "-maskSource $maskSourceFile ";
    $prOptions .= "-x " if ( $options{'x'} );
    $prOptions .= "-xsmall " if ( $options{'xsmall'} );

    if ( $compressCatFile ) {
      $prOptions .= "$file.cat.gz";
    }
    else {
      $prOptions .= "$file.cat";
    }

    #
    # Run processrepeats
    #
    # For timing only
    #my $t0 = gettimeofday( );
    print "\n\n";
    print "RepeatMasker: Running: $prOptions\n" if ( $DEBUG );
    &systemint(
        "$REPEATMASKER_DIR/ProcessRepeats " . "$prOptions" )
        unless $options{'is_only'};


    # For timing only
    #if ( $DEBUG ) {
    #  my $t1 = gettimeofday( );
    #  my $elapsed = $t1 - $t0;
    #  print "ProcessRepeats runtime: $elapsed secs\n";
    #}

    if ( $options{'is_clip'} && -s "$file.withoutIS.masked" ) {
      rename( "$maskSourceFile.masked", "$file.masked" );
    }

  }

  &cleanUp( \%options, $runnumber, $tempdir, $fileori, $fileend, $file,
            $originaldir, $compressed )
             unless ( $DEBUG );
}    # END FILECYCLE

&systemint( "rm -R $tempdir" ) unless ( $DEBUG );

# We are soooo done
exit( 0 );

######################## S U B R O U T I N E S ############################

##-------------------------------------------------------------------------##
## Use:  my   &adjustFragmentPositions( $batcher, $catfile );
##
##  Returns
##
##      Adjust positions of results from a fragmented
##      batch.
##
## Eample cross_match hit line.  All positions are 1-based
##
## Foward Strand:
##
## Column  Description                                      Eg
## ==============================================================
##   0     Smith-Waterman Score                             2334
##   1     Percent Divergence                               8.44
##   2     Percent Deletions                                0.00
##   3     Percent Insertions                               3.25
##   4     Query Sequence                                   Human
##   5     Query Begin Pos                                  127
##   6     Query End Pos                                    737
##   7     Query Left - The bp left after query end         (8222)
##   8     Subject Sequence                                 AluSx#SINE/Alu
##   9     Subject Begin                                    1
##   10    Subject End                                      298
##   11    Subject Left                                     (14)
##   12    ID - A unique id for this run of crossmatch      2
##
## Reverse Strand:
##
## Column  Description                                      Eg
## ==============================================================
##   0     Smith-Waterman Score                             2334
##         ... same as above..
##   8     Strand Designator                                C
##   9     Subject Sequence                                 AluSx#SINE/Alu
##   10    Subject Left                                     (14)
##   11    Subject End                                      298
##   12    Subject Begin                                    1
##   13    ID                                               3
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub adjustFragmentPositions {
  my $batcher = shift;
  my $catfile = shift;

  my $seqDBObj      = $batcher->getSeqDBObj();
  my $searchResults =
      CrossmatchSearchEngine::parseOutput( searchOutput => "$catfile" );

  my %deletedIDs = ();
  for ( my $i = $searchResults->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result     = $searchResults->get( $i );
    my $batchSeqID = $result->getQueryName();
    next if (    $batchSeqID eq "refinement"
              || $result->getId() =~ /\[/ );
    my $origSeqID  = $batcher->getSeqIDFromBatchSeqID( $batchSeqID );
    my $adjustment = $batcher->translateBatchSeqPositionToFastaSeq( $batchSeqID,
                          $result->getQueryStart() ) - $result->getQueryStart();
    $result->setQueryStart( $result->getQueryStart() + $adjustment );
    $result->setQueryEnd( $result->getQueryEnd() + $adjustment );
    $result->setQueryRemaining(
               $seqDBObj->getSeqLength( $origSeqID ) - $result->getQueryEnd() );
    $result->setQueryName( $origSeqID );

    # Truncate repeats starting at the middle of the overlap.
    my ( $beginValidPos, $endValidPos ) =
        $batcher->getSeqIDValidRange( $batchSeqID );

    # Rules for deleting elements outside our boundary
    if (    $result->getQueryStart() <= $beginValidPos
         && $result->getQueryEnd() <= $beginValidPos )
    {
      if ( $DEBUG ) {
        print STDERR " DELETING ANNOT!\n";
        print STDERR $result->toStringFormatted( SearchResult::OutFileFormat );
        print STDERR "beginValidPos = $beginValidPos "
            . "endValidPos = $endValidPos\n";
      }
      $deletedIDs{ "[" . $result->getId() . "]" } = 1;
      $searchResults->remove( $i );
      next;
    }

    if (    $endValidPos > 0
         && $result->getQueryEnd() > $endValidPos
         && $result->getQueryStart() > $endValidPos )
    {
      if ( $DEBUG ) {
        print STDERR " DELETING ANNOT2!\n";
        print STDERR $result->toStringFormatted( SearchResult::OutFileFormat );
        print STDERR "beginValidPos = $beginValidPos "
            . "endValidPos = $endValidPos\n";
      }
      $deletedIDs{ "[" . $result->getId() . "]" } = 1;
      $searchResults->remove( $i );
      next;
    }
  }

  # Cleanup the refinement entries which are children of a removed
  # annotation.
  for ( my $i = $searchResults->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result = $searchResults->get( $i );
    my $id     = $result->getId();
    $searchResults->remove( $i )
        if ( $id =~ /\[/ && exists $deletedIDs{$id} );
  }
  $searchResults->write( "$catfile", SearchResult::AlignWithQuerySeq );

}

##-------------------------------------------------------------------------##
## Use:  my &locateISElements( \%options, $batcher,
##                             $batchNum, $file, $qualFile,
##                             $REPEATMASKER_DIR,
##                             $searchEngine,
##                             $outFilesPrefix, $generalLibDir );
##
##
##  Returns
##
##   Only needs to support two atomic operations.  Report and
##   clip.
##
##   Flow:
##      1. Searches $file for IS elements and saves output
##         to $file.iscat (Note...removes .masked from name if
##         it exists).
##
##      2. If a quality file is handed to us it reads the *entire*
##         quality file into memory ( qualData local datastructure ).
##
##      3. Parse search results and locate IS elements.
##
##      4. Clip IS elements from $file if is_clip or is_only options
##         used.
##
##      5. Clip qual values from qualData local datastructure if
##         is_clip or is_only options used.
##
##      6. Saves a copy of the sequence/qual at this stage
##         if is_clip or is_only options are used.  The
##         saved copies are named:
##                $file.withoutIS, $file.qual.withoutIS.
##
##   NOTE: A basic understanding of Dutch and French is necessary
##         to decypher this subroutine.
##
##   Globals Used: none
##-------------------------------------------------------------------------##
sub locateISElements {
  my %options        = %{ shift() };
  my $batcher        = shift;
  my $batchNum       = shift;
  my $file           = shift;
  my $qualFile       = shift;
  my $DIRECTORY      = shift;
  my $searchEngine   = shift;
  my $outFilesPrefix = shift;
  my $generalLibDir  = shift;

  print "\nChecking for E. coli insertion elements\n";

  my @ISlines     = ();
  my @ISFailed    = ();
  my $ISclipornot = 0;
  my $qualprob    = 0;
  my %clipfailed  = ();

  my $seqDB = FastaDB->new(
                            fileName    => $file,
                            openMode    => SeqDBI::ReadWrite,
                            maxIDLength => 50
  );

  my $filePrefix = $file;
  $filePrefix =~ s/\.masked//;

  my $minscore = 17;
  my $minmatch = 15;
  my $lib      = "$generalLibDir/is.lib";
  my $matrix   = "identity.matrix";
  my ( $gap_initValue, $ins_gap_extValue, $del_gap_extValue ) = ( "", "", "" );
  my $bandwidth = "";
  my $masklevel = "";
  my $maskfile  = $file;
  my $raw       = "";
  my $wordraw   = "";
  my $outfile   = "$filePrefix" . ".iscat";
  my $searchResults;

  ## Perform the search
  ( $minmatch, $bandwidth, $searchResults ) = &search(
                           \%options,         $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
  );

  #
  # Read in quality file data if available
  #
  #   This should probably be generalized and made
  #   into an object for dealing with quality files.
  #   For now we will leave well enough alone.
  #
  my %qualData  = ();
  my %qualHdr   = ();
  my @qualNames = ();
  my $qualname  = "";
  if ( -s $qualFile ) {
    if ( -r $qualFile ) {
      open( INQUAL, $qualFile );
      while ( <INQUAL> ) {
        chomp;
        if ( /^>/ ) {
          $qualname = $_;
          $qualname =~ s/^>(\S*).*/$1/;
          push @qualNames, $qualname;
          $qualHdr{$qualname} = $_;
        }
        else {
          $qualData{$qualname} .= $_;
        }
      }
      close INQUAL;
    }
    else {
      $qualprob = 1;
    }
  }
  else {
    $qualprob = 1;
  }

  #
  # Open the search results file.
  #
  my $lastname  = "";
  my $lastbegin = "";
  my $lastend   = "";
  my $lastscore = 0;
  my $lastis    = "";
  my $lastor    = "";
  my $naam      = "";
  my $begin     = 0;
  my $end       = 0;
  my $or        = "";
  my $is        = "";
  my $beginis   = 0;
  my $leftis    = 0;
  my $deleted   = 0;
  my $remaining = 0;

  #
  # Loop through search results
  #
  for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
    my $result = $searchResults->get( $i );
    $naam      = $result->getQueryName();
    $begin     = $result->getQueryStart();
    $end       = $result->getQueryEnd();
    $remaining = $result->getQueryRemaining();
    if ( $result->getOrientation() ne "C" ) {
      $or = 'pos';
    }
    else {
      $or = 'neg';
    }
    $is      = $result->getSubjName();
    $leftis  = $result->getSubjRemaining();
    $beginis = $result->getSubjStart();

    $deleted = 0 unless $lastname && $naam eq $lastname;
    ## $deleted keeps track of nr of bp in IS elements already
    ## deleted from the same query sequence;
    $leftis =~ tr/()//d;
    ## remove classification
    $is =~ s/\#\w+//;

    ## Allows to clip an IS element with a gap in the middle
    ## (may be a sequencing gap)

    ## $lastis is set if last IS was incomplete but did either
    ## start at pos 1 and had + orientation or end at last bp
    ## and had - orientation
    if ( $lastis ) {
      ## Following allows to clip an IS element with a gap
      ## in the middle (may be a sequencing gap)
      if (
              $is   eq $lastis
           && $naam eq $lastname
           && $or   eq $lastor
           && (    $or eq 'pos' && $leftis == 0
                || $or eq 'neg' && $beginis == 1 )
           && $begin - $lastend <= 2
          )
      {    # rather conservatively
        $begin   = $lastbegin;
        $beginis = 1 if $or eq 'pos' && $leftis == 0;
        $leftis  = 0 if $or eq 'neg' && $beginis == 1;
      }
      else {
        push @ISFailed, ( $i - 1 ) if $lastscore > 25;
        $lastis = $lastbegin = $lastend = "";
        $lastor = $lastscore = "";
      }
    }
    ## complete element
    if ( $beginis == 1 && $leftis == 0 ) {
      my $length = $end + 1 - $begin;
      ## This section needs occasional updating (new
      ## elements, new information on flexibility in
      ## duplication lengths)
      my @dupLengths = ();
      if ( $is eq 'IS1' ) {
        @dupLengths = ( 9, 8, 10 );
      }
      elsif ( $is eq 'IS2' ) {
        @dupLengths = ( 5 );
      }
      elsif ( $is eq 'IS3' ) {
        @dupLengths = ( 3 );
      }
      elsif ( $is eq 'IS5' ) {
        @dupLengths = ( 4 );
      }
      elsif ( $is eq 'IS10' ) {
        @dupLengths = ( 9 );
      }
      elsif ( $is eq 'IS30' ) {
        @dupLengths = ( 2 );
      }
      elsif ( $is eq 'IS150' ) {
        @dupLengths = ( 3, 4 );
      }
      elsif ( $is eq 'IS186' ) {
        @dupLengths = ( 10, 11 );
      }
      elsif ( $is eq 'Tn1000' ) {
        @dupLengths = ( 5 );
      }
      else {
        @dupLengths = ();
      }
      ## Get the left and right flanking site duplications
      my ( $links, $rechts ) = qw(links rechts);
      my $dupLength = 0;
      ## Try all known flanking site lengths
      while ( @dupLengths && $links ne $rechts ) {
        $dupLength = shift( @dupLengths );
        next if (    $dupLength > ( $begin - 1 )
                  || $dupLength > $remaining );
        $links =
            $seqDB->getSubstr( $naam, $begin - $dupLength - 1 - $deleted,
                               $dupLength );
        $rechts = $seqDB->getSubstr( $naam, $end - $deleted, $dupLength );
      }

      ## If flanking site duplications are identical (these
      ## are cloning artifacts; there was no time for
      ## substitutions to take place to make the sites different
      if ( $links ne "" && $links eq $rechts ) {
        ## Alters $seqWithName by deleting a segment
        ## including the IS element and one flanking site,
        ## thereby reconstructing the original sequence
        if ( $options{'is_only'} || $options{'is_clip'} ) {
          $seqDB->setSubstr( $naam,
                             $begin - 1 - $deleted,
                             $length + $dupLength, "" );
          ## Similarly removes qual values in $qualData
          if ( $qualname ) {
            my @qual = split( / /, $qualData{$naam} );
            splice @qual, $begin - 1 - $deleted, $length + $dupLength;
            $qualData{$naam} = join " ", @qual;
          }
          $deleted += $length + $dupLength;
        }
        push @ISlines, $i;
      }
      else {
        push @ISFailed, $i;
      }
      $lastis = $lastbegin = $lastend = $lastscore = "";
    }
    elsif (    $beginis == 1 && $or eq 'pos'
            || $leftis == 0 && $or eq 'neg' )
    {
      ## save info to see if rest of the IS element follows
      $lastis    = $is;
      $lastbegin = $begin;
      $lastend   = $end;
      $lastor    = $or;
      $lastscore = $result->getScore( $i );
    }
    else {
      $lastis = $lastbegin = $lastend = "";

      # only report if match scores > 25; otherwise may be
      # freak false positive
      push @ISFailed, ( $i ) if ( $result->getScore( $i ) > 25 );
    }
    $lastname = $naam;
  }

  if ( $lastis ) {    #often with IS_only
    push @ISFailed, ( $searchResults->size() - 1 ) if ( $lastscore > 25 );
  }

  ## adjust position for broken up very large sequences
  if ( @ISlines && $batcher->isBatchFragmented( $batchNum ) ) {
    my $seqDBObj = $batcher->getSeqDBObj();
    foreach my $resultIndex ( @ISlines ) {
      my $ISResult = $searchResults->get( $resultIndex );
      my $qryName  = $ISResult->getQueryName();
      my $qryBegin = $ISResult->getQueryStart();
      my $qryEnd   = $ISResult->getQueryEnd();
      my $qryLeft  = $ISResult->getQueryRemaining();

      my $adjustment =
          $batcher->translateBatchSeqPositionToFastaSeq( $qryName, $qryBegin ) -
          $qryBegin;

      $ISResult->setQueryStart( $qryBegin + $adjustment );
      $ISResult->setQueryEnd( $qryEnd + $adjustment );

      $ISResult->setQueryRemaining(
         $seqDBObj->getSeqLength( $batcher->getSeqIDFromBatchSeqID( $qryName ) )
             - $qryEnd );
    }
  }

  if ( @ISlines ) {
    my $qualthere = "";
    if ( $qualprob ) {
      $qualthere =
            "\nNo corresponding Phred file ($qualFile) "
          . "could be read in this directory,\n so no "
          . "modified quality file has been made\n";
    }
    else {
      $qualthere =
            "\nThe quality file $outFilesPrefix.qualwithoutIS "
          . "corresponds to this clipped fasta sequence file\n";
    }
    if ( $options{'is_only'} ) {
      $ISclipornot =
            "These elements and one flanking duplicated "
          . "site have been clipped out\nof the sequence "
          . "in the file $outFilesPrefix.withoutIS\n$qualthere\n";
    }
    elsif ( $options{'is_clip'} ) {
      $ISclipornot =
            "These elements and one flanking duplicated site "
          . "have been clipped out\nbefore the repeatmasker "
          . "run, so that coordinates do not correspond\n"
          . "everywhere with the original sequence. A clipped "
          . "version of the\nsequence(s) is in the file "
          . "$outFilesPrefix.withoutIS\n$qualthere\n";
    }
    else {
      $ISclipornot =
            "These elements can be clipped out with the "
          . "options is_clip or is_only.  The latter does "
          . "not run the 'normal' RepeatMasker routine and "
          . "positions in the current\n.out file will not "
          . "correspond with the -is_only reconstructed "
          . "sequence.\n";
    }

    my $ISreport = "One or more E. coli IS elements were found:\n";
    foreach my $resultIndex ( @ISlines ) {
      my $ISResult = $searchResults->get( $resultIndex );
      $ISreport .= "  "
          . $ISResult->getSubjName() . " in "
          . $ISResult->getQueryName() . ": "
          . $ISResult->getQueryStart() . " - "
          . $ISResult->getQueryEnd() . "\n";
    }
    $ISreport .= "\n$ISclipornot\n";

    if ( @ISFailed ) {
      $ISreport .=
            "The following E coli IS elements could not be "
          . "confidently clipped out:\n";
      foreach my $resultIndex ( @ISFailed ) {
        my $ISResult = $searchResults->get( $resultIndex );
        $ISreport .= "  "
            . $ISResult->getSubjName() . " in "
            . $ISResult->getQueryName() . ": "
            . $ISResult->getQueryStart() . " - "
            . $ISResult->getQueryEnd() . "\n";
      }
    }
    print "$ISreport\n";
    if ( $options{'is_only'} || $options{'is_clip'} ) {
      system( "cat $file >> $outFilesPrefix.withoutIS" );
      if ( $qualname ) {
        my $qualout = "$outFilesPrefix.qual.withoutIS";
        $qualout =~ s/.seq.qual.withoutIS/.qual.withoutIS/;
        open( QUALOUT, ">$qualout" );
      }
      ## $nom French too!
      foreach my $nom ( @qualNames ) {

        # fixed with \n sept 03
        print QUALOUT "$qualHdr{$nom}\n$qualData{$nom}\n";
      }
      close QUALOUT if $qualname;
    }
    open( ALERTOUT, ">>$outFilesPrefix.alert" );
    print ALERTOUT "$ISreport";
    close ALERTOUT;
  }

  #elsif ( $clipfailed{$file} ) {
  elsif ( @ISFailed ) {
    my $ISreport =
          "The following E coli IS elements could not be "
        . "confidently clipped out:\n";
    foreach my $resultIndex ( @ISFailed ) {
      my $ISResult = $searchResults->get( $resultIndex );
      $ISreport .= "  "
          . $ISResult->getSubjName() . " in "
          . $ISResult->getQueryName() . ": "
          . $ISResult->getQueryStart() . " - "
          . $ISResult->getQueryEnd() . "\n";
    }
    open( ALERTOUT, ">>$outFilesPrefix.alert" );
    print ALERTOUT "$ISreport";
    close ALERTOUT;
    print "$ISreport";
  }

  #unlink $iscat;
}

##-------------------------------------------------------------------------##
## Use:  my ( $minmatch, $bandwidth, $resultsCollection ) =
##                   &search( \%options, $DIRECTORY, $outfile, $maskfile,
##                                $lib, $minmatch, $bandwidth, $matrix,
##                                $gap_initValue, $ins_gap_extValue,
##                                $del_gap_extValue, $minscore, $masklevel,
##                                $searchEngine, $wordraw,
##                                $raw );
##
##    Runs the search engine using the settings specified in the
##    subroutine runSearchStages, traps error results and aborts
##    on search engine failure.
##
##  Input
##
##       \%options : The RepeatMasker option hashtable
##
##  Returns
##
##       $minmatch : The new minmatch ( as it may have changed )
##       $bandwidth : The new bandwidth ( dito )
##       $resultsCollection :
##
##
##  Globals Used: None
##  Globals Modified: None
##
##-------------------------------------------------------------------------##
sub search {
  my %options          = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY        = shift;
  my $outfile          = shift;
  my $maskfile         = shift;
  my $lib              = shift;
  my $minmatch         = shift;
  my $bandwidth        = shift;
  my $matrix           = shift;
  my $gap_initValue    = shift;
  my $ins_gap_extValue = shift;
  my $del_gap_extValue = shift;
  my $minscore         = shift;
  my $masklevel        = shift;
  my $searchEngine     = shift;
  my $wordraw          = shift;
  my $raw              = shift;

  my $alignments = "";
  my $poly       = "";
  my $quiet      = "";

  print "RepeatMasker::search( \%options, $DIRECTORY, $outfile, $maskfile,\n"
      . "                      $lib, $minmatch, $bandwidth, $matrix\n"
      . "                      $gap_initValue, $ins_gap_extValue, $del_gap_extValue\n"
      . "                      $minscore, $masklevel, \$searchEngine, $wordraw\n"
      . "                      $raw );\n"
      if ( $DEBUG );

  # Set options for searchengine
  $poly = "-poly" if $options{'poly'};
  $quiet = "2>> $outfile.stderr" unless $options{'noisy'};

  if ( $options{'div'} && $options{'div'} <= 20 ) {
    ++$minmatch;
    ++$minmatch if $options{'div'} <= 10;
  }

  my $matrixPrefix = "crossmatch";
  if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
    $matrixPrefix = "wublast/aa";
  }
  elsif (    $searchEngine->isa( "NCBIBlastSearchEngine" )
          || $searchEngine->isa( "HMMERSearchEngine" ) )
  {
    $matrixPrefix = "ncbi/nt";
  }

  if (    $searchEngine->isa( "WUBlastSearchEngine" )
       && $options{'s'}
       && $lib !~ /simple|at\.lib/ )
  {
    $minmatch -= 1;
    $bandwidth += 15;
  }

  ##
  ## Special case.  If we are refining a previous alignment we are
  ## starting with a fixed region from which we want to make a pseudo
  ## global alignment.  We expect the refined alignments to be close
  ## to be close to the length of the full region.  If we pass -1
  ## to the bandwidth parameter of NCBIBlastSearchEngine it will
  ## respond by relaxing the -xdrop_gap_final parameter to something
  ## higher than the minscore threshold.
  ##
  if (    $searchEngine->isa( "NCBIBlastSearchEngine" )
       && $lib =~ /refine/ )
  {
    # RMH: Reverted 9/24
    #$bandwidth = '0';
    $bandwidth = '-1';
  }

  my $cycle = 1;

  $searchEngine->setQuery( $maskfile );
  $searchEngine->setSubject( $lib );
  $searchEngine->setMatrix( "$DIRECTORY/Matrices/$matrixPrefix/$matrix" );
  if ( $wordraw ) {
    $searchEngine->setWordRaw( 1 );
  }
  else {
    $searchEngine->setWordRaw( 0 );
  }
  if ( $raw ) {
    $searchEngine->setScoreMode( SearchEngineI::basicScoreMode );
  }
  else {
    $searchEngine->setScoreMode( SearchEngineI::complexityAdjustedScoreMode );
  }

  ## RMH: 11/20/12
  ##   We now insist upon alignments for all search engines, even if the user
  ##   doesn't need them.  Now the -a flag simply tells the program whether or
  ##   not to generate a final *.align file.  The alignment data is really
  ##   critical to the program's annotation decisions.
  $searchEngine->setGenerateAlignments( 1 );
  $searchEngine->setGapInit( $gap_initValue );
  $searchEngine->setInsGapExt( $ins_gap_extValue );
  $searchEngine->setDelGapExt( $del_gap_extValue );

  my $maskLevelNum = $masklevel;
  if ( $masklevel =~ /-masklevel\s+(\d+)\s*/ ) {
    $maskLevelNum = $1;
  }
  $searchEngine->setMaskLevel( $maskLevelNum );
  $searchEngine->setMinScore( $minscore );

  my $cmd     = "";
  my $status  = 0;
  my $outFile = "";
  my $errFile = "";
  my $retry   = 0;
  my $resultCollection;
  while ( $cycle ) {

    ( $status, $resultCollection, $outFile, $errFile ) = $searchEngine->search(
                                                         minMatch  => $minmatch,
                                                         bandwidth => $bandwidth
    );

    if ( $status ) {
      if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {

        # HMMER Errors
        print "WARNING: The search engine returned an error ("
            . ( $? >> 8 )
            . ", status = $status )\n"
            . "Engine parameters: "
            . $searchEngine->getParameters() . "\n"
            . "A search phase could not complete on this batch.\n"
            . "The batch file will be re-run and if possible the\n"
            . "program will resume.\n";
        exit( -1 );
      }
      elsif ( $bandwidth > 14 ) {
        $bandwidth = 14;
        warn "WARNING: Comparison failed. Retrying with smaller bandwidth\n";
      }
      elsif ( $bandwidth == 4 ) {    # second or third simple repeats check
            # Extreme measures for very long simple satellites
        $bandwidth = 1;
        warn "WARNING: Comparison failed. Retrying with smaller bandwidth\n";
      }
      elsif ( $minmatch < 10 ) {
        $minmatch++;
        warn "WARNING: Comparison failed. Retrying with larger minmatch "
            . "($minmatch)\n";
      }
      else {
        print "WARNING: The search engine returned an error ("
            . ( $? >> 8 )
            . ", status = $status )\n"
            . "Engine parameters: "
            . $searchEngine->getParameters() . "\n"
            . "A search phase could not complete on this batch.\n"
            . "The batch file will be re-run and if possible the\n"
            . "program will resume.\n";
        exit( -1 );
      }
    }
    else {
      $cycle = 0;
    }
  }

  return ( $minmatch, $bandwidth, $resultCollection );
}    # sub search

##-------------------------------------------------------------------------##
## Use:  my $searchRecipeHashRef = &getSearchRecipes();
##
##  Returns
##    A data structure holding the parameters used for
##    all our searches.  These are recipes that can be
##    used in multiple locations in this program.
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub getSearchRecipes {
  my %searchRecipes = (
           "perfect_simple_repeats" => {
                                         'description'      => "",
                                         'minscore'         => 180,
                                         'minmatch'         => [ 8, 9, 10, 11 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "simple1.matrix",
                                         'gap_initValue'    => -40,
                                         'ins_gap_extValue' => -15,
                                         'del_gap_extValue' => -15,
                                         'bandwidth'        => 1,
                                         'filterContained'  => 0,
                                         'masklevel'        => 1,
                                         'raw'              => 1,
                                         'wordraw'          => 1,
                                         'chooseClass'      => "simple",
                                         'excise'           => 1,
           },
           "general_search_parameters" => {
                                            'description'   => "",
                                            'minscore'      => 225,
                                            'minmatch'      => [ 8, 9, 11, 13 ],
                                            'max_matrix_gc' => -1,
                                            'matrix'        => "20p##g.matrix",
                                            'gap_initValue' => -30,
                                            'ins_gap_extValue' => -6,
                                            'del_gap_extValue' => -5,
                                            'bandwidth'        => 14,
                                            'filterContained'  => 0,
                                            'masklevel'        => 90,
                                            'raw'              => 0,
                                            'wordraw'          => 0,
                                            'chooseClass'      => "masking",
                                            'excise'           => 0,
           },
           "cut_young_sines_in_primates" => {
                                              'description' => "primates",
                                              'minscore'    => 1200,
                                              'minmatch'    => [ 7, 8, 10, 12 ],
                                              'max_matrix_gc' => -1,
                                              'matrix' => "14p##g.matrix",
                                              'gap_initValue'    => -35,
                                              'ins_gap_extValue' => -7,
                                              'del_gap_extValue' => -6,
                                              'bandwidth'        => 20,
                                              'filterContained'  => 0,
                                              'masklevel'        => 1,
                                              'raw'              => 0,
                                              'wordraw'          => 0,
                                              'chooseClass'      => "alu",
                                              'excise'           => 1,
           },
           "mask_young_sines_in_primates" => {
                                     'description' => "primates - nocut option",
                                     'minscore'    => 1500,
                                     'minmatch'    => [ 7, 8, 10, 12 ],
                                     'max_matrix_gc'    => -1,
                                     'matrix'           => "14p##g.matrix",
                                     'gap_initValue'    => -35,
                                     'ins_gap_extValue' => -7,
                                     'del_gap_extValue' => -6,
                                     'bandwidth'        => 20,
                                     'filterContained'  => 0,
                                     'masklevel'        => 80,
                                     'raw'              => 0,
                                     'wordraw'          => 0,
                                     'chooseClass'      => "alumask",
                                     'excise'           => 0,
           },
           "mask_sines_in_primates" => {
                                         'description'      => "primates",
                                         'minscore'         => 225,
                                         'minmatch'         => [ 7, 8, 8, 9 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "20p##g.matrix",
                                         'gap_initValue'    => -30,
                                         'ins_gap_extValue' => -6,
                                         'del_gap_extValue' => -5,
                                         'bandwidth'        => 14,
                                         'filterContained'  => 0,
                                         'masklevel'        => 80,
                                         'raw'              => 0,
                                         'wordraw'          => 0,
                                         'chooseClass'      => "alumask",
                                         'excise'           => 0,
           },
           "mask_sines_in_non_primate_mammals" => {
                                         'description' => "non-primate-mammals",
                                         'minscore'    => 225,
                                         'minmatch'    => [ 6, 7, 8, 10 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "18p##g.matrix",
                                         'gap_initValue'    => -30,
                                         'ins_gap_extValue' => -6,
                                         'del_gap_extValue' => -5,
                                         'bandwidth'        => 14,
                                         'filterContained'  => 0,
                                         'masklevel'        => 1,
                                         'raw'              => 0,
                                         'wordraw'          => 0,
                                         'chooseClass'      => "cut1",
                                         'excise'           => 1,
           },
           "general_full_length_repeats" => {
             'description' => "larger bandwidth allows spanning of larger gaps",
             'minscore'    => 300,
             'minmatch'    => [ 9, 10, 11, 13 ],
             'max_matrix_gc'    => -1,
             'matrix'           => "18p##g.matrix",
             'gap_initValue'    => -33,
             'ins_gap_extValue' => -5,
             'del_gap_extValue' => -4,
             'bandwidth'        => 40,
             'filterContained'  => 0,
             'masklevel'        => 1,
             'raw'              => 0,
             'wordraw'          => 0,
             'chooseClass'      => "cut1",
             'excise'           => 1,
           },
           "complete_3end_of_young_line1s" => {
                                                'description' => "",
                                                'minscore'    => 300,
                                                'minmatch' => [ 9, 10, 11, 13 ],
                                                'max_matrix_gc' => -1,
                                                'matrix' => "18p##g.matrix",
                                                'gap_initValue'    => -33,
                                                'ins_gap_extValue' => -5,
                                                'del_gap_extValue' => -4,
                                                'bandwidth'        => 40,
                                                'filterContained'  => 0,
                                                'masklevel'        => 90,
                                                'raw'              => 0,
                                                'wordraw'          => 0,
                                                'chooseClass'      => "cut2",
                                                'excise'           => 1,
           },
           "older_ALUs_in_primates" => {
                                         'description'      => "",
                                         'minscore'         => 800,
                                         'minmatch'         => [ 7, 8, 10, 12 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "14p##g.matrix",
                                         'gap_initValue'    => -35,
                                         'ins_gap_extValue' => -7,
                                         'del_gap_extValue' => -6,
                                         'bandwidth'        => 20,
                                         'filterContained'  => 1,
                                         'masklevel'        => 1,
                                         'raw'              => 0,
                                         'wordraw'          => 0,
                                         'chooseClass'      => "alumask",
                                         'excise'           => 0,
           },
           "more_ALUs_in_primates" => {
                                        'description'      => "",
                                        'minscore'         => 400,
                                        'minmatch'         => [ 7, 8, 9, 11 ],
                                        'max_matrix_gc'    => -1,
                                        'matrix'           => "18p##g.matrix",
                                        'gap_initValue'    => -30,
                                        'ins_gap_extValue' => -6,
                                        'del_gap_extValue' => -5,
                                        'bandwidth'        => 14,
                                        'filterContained'  => 0,
                                        'masklevel'        => 10,
                                        'raw'              => 0,
                                        'wordraw'          => 0,
                                        'chooseClass'      => "alumask",
                                        'excise'           => 0,
           },
           "short_repeats_and_satellites_rodents" => {
                                               'description' => "rodentia only",
                                               'minscore'    => 210,
                                               'minmatch'    => [ 7, 8, 9, 10 ],
                                               'max_matrix_gc' => -1,
                                               'matrix' => "25p##g.matrix",
                                               'gap_initValue'    => -27,
                                               'ins_gap_extValue' => -5,
                                               'del_gap_extValue' => -5,
                                               'bandwidth'        => 14,
                                               'filterContained'  => 0,
                                               'masklevel'        => 90,
                                               'raw'              => 0,
                                               'wordraw'          => 0,
                                               'chooseClass'      => "sines",
                                               'excise'           => 0,
           },
           "short_repeats_and_satellites" => {
                                               'description' => "",
                                               'minscore'    => 225,
                                               'minmatch' => [ 7, 8, 10, 12 ],
                                               'max_matrix_gc' => -1,
                                               'matrix' => "20p##g.matrix",
                                               'gap_initValue'    => -30,
                                               'ins_gap_extValue' => -6,
                                               'del_gap_extValue' => -5,
                                               'bandwidth'        => 14,
                                               'filterContained'  => 0,
                                               'masklevel'        => 90,
                                               'raw'              => 0,
                                               'wordraw'          => 0,
                                               'chooseClass'      => "sines",
                                               'excise'           => 0,
           },
           "long_interspersed_repeats" => {
                                            'description'   => "",
                                            'minscore'      => 225,
                                            'minmatch'      => [ 7, 8, 10, 12 ],
                                            'max_matrix_gc' => -1,
                                            'matrix'        => "20p##g.matrix",
                                            'gap_initValue' => -30,
                                            'ins_gap_extValue' => -6,
                                            'del_gap_extValue' => -5,
                                            'bandwidth'        => 14,
                                            'filterContained'  => 0,
                                            'masklevel'        => 90,
                                            'raw'              => 0,
                                            'wordraw'          => 0,
                                            'chooseClass'      => "longlib",
                                            'excise'           => 0,
           },
           "ancient_repeats" => {
                                  'description'      => "",
                                  'minscore'         => 180,
                                  'minmatch'         => [ 6, 6, 8, 9 ],
                                  'max_matrix_gc'    => -1,
                                  'matrix'           => "25p##g.matrix",
                                  'gap_initValue'    => -27,
                                  'ins_gap_extValue' => -6,
                                  'del_gap_extValue' => -5,
                                  'bandwidth'        => [ 25, 14, 14, 14 ],
                                  'filterContained'  => 0,
                                  'masklevel'        => 90,
                                  'raw'              => 0,
                                  'wordraw'          => 0,
                                  'chooseClass'      => "mirs",
                                  'excise'           => 0,
           },
           "tough_ancient_repeats" => {
                                        'description'      => "",
                                        'minscore'         => 250,
                                        'minmatch'         => [ 6, 6, 8, 9 ],
                                        'max_matrix_gc'    => -1,
                                        'matrix'           => "25p##g.matrix",
                                        'gap_initValue'    => -27,
                                        'ins_gap_extValue' => -6,
                                        'del_gap_extValue' => -5,
                                        'bandwidth'       => [ 25, 14, 14, 14 ],
                                        'filterContained' => 0,
                                        'masklevel'       => 90,
                                        'raw'             => 1,
                                        'wordraw'         => 0,
                                        'chooseClass'     => "masking",
                                        'excise'          => 0,
           },
           "retroviruses" => {
                               'description'      => "",
                               'minscore'         => 250,
                               'minmatch'         => [ 9, 10, 11, 13 ],
                               'max_matrix_gc'    => -1,
                               'matrix'           => "20p##g.matrix",
                               'gap_initValue'    => -30,
                               'ins_gap_extValue' => -6,
                               'del_gap_extValue' => -5,
                               'bandwidth'        => 14,
                               'filterContained'  => 0,
                               'masklevel'        => 90,
                               'raw'              => 0,
                               'wordraw'          => 0,
                               'chooseClass'      => "masking",
                               'excise'           => 0,
           },
           "tough_line1s_in_eutheria" => {
                                           'description'   => "Eutheria",
                                           'minscore'      => 300,
                                           'minmatch'      => [ 7, 8, 9, 11 ],
                                           'max_matrix_gc' => 49,
                                           'matrix'        => "25p##g.matrix",
                                           'gap_initValue' => -27,
                                           'ins_gap_extValue' => -6,
                                           'del_gap_extValue' => -5,
                                           'bandwidth'        => 14,
                                           'filterContained'  => 0,
                                           'masklevel'        => 90,
                                           'raw'              => 1,
                                           'wordraw'          => 0,
                                           'chooseClass'      => "l1",
                                           'excise'           => 0,
           },
           "simple_repeats_again" => {
                                       'description'      => "",
                                       'minscore'         => 180,
                                       'minmatch'         => [ 7, 8, 9, 10 ],
                                       'max_matrix_gc'    => -1,
                                       'matrix'           => "simple1.matrix",
                                       'gap_initValue'    => -40,
                                       'ins_gap_extValue' => -15,
                                       'del_gap_extValue' => -15,
                                       'bandwidth'        => 4,
                                       'filterContained'  => 0,
                                       'masklevel'        => 25,
                                       'raw'              => 1,
                                       'wordraw'          => 1,
                                       'chooseClass'      => "masking",
                                       'excise'           => 0,
           },
           "simple_repeats_flanking" => {
                                          'description'      => "",
                                          'minscore'         => 200,
                                          'minmatch'         => [ 6, 7, 8, 9 ],
                                          'max_matrix_gc'    => -1,
                                          'matrix'           => "simple.matrix",
                                          'gap_initValue'    => -35,
                                          'ins_gap_extValue' => -10,
                                          'del_gap_extValue' => -10,
                                          'bandwidth'        => 4,
                                          'filterContained'  => 0,
                                          'masklevel'        => 75,
                                          'raw'              => 1,
                                          'wordraw'          => 1,
                                          'chooseClass'      => "masking",
                                          'excise'           => 0,
           },
           "low_complexity" => {
                                 'description'      => "",
                                 'minscore'         => 21,
                                 'minmatch'         => [ 5, 5, 5, 5 ],
                                 'max_matrix_gc'    => -1,
                                 'matrix'           => "at.matrix",
                                 'gap_initValue'    => -10,
                                 'ins_gap_extValue' => -3,
                                 'del_gap_extValue' => -3,
                                 'bandwidth'        => 2,
                                 'filterContained'  => 0,
                                 'masklevel'        => 95,
                                 'raw'              => 1,
                                 'wordraw'          => 1,
                                 'chooseClass'      => "",
                                 'excise'           => 0,
           },
           "refinement" => {
                             'description'      => "",
                             'minscore'         => 180,
                             'minmatch'         => [ 7, 8, 9, 11 ],
                             'max_matrix_gc'    => -1,
                             'matrix'           => "18p##g.matrix",
                             'gap_initValue'    => -30,
                             'ins_gap_extValue' => -6,
                             'del_gap_extValue' => -5,
                             'bandwidth'        => 40,
                             'filterContained'  => 0,
                             'masklevel'        => 101,
                             'raw'              => 1,
                             'wordraw'          => 1,
                             'chooseClass'      => "",
                             'excise'           => 0,
           }
  );
  return \%searchRecipes;
}

##-------------------------------------------------------------------------##
## Use:  my $param = &selectParameter( $options, $arrayRef );
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub selectParameter {
  my $options  = shift;
  my $arrayRef = shift;

  if ( ref( $arrayRef ) eq "ARRAY" ) {
    if ( $options->{'qq'} ) {
      return $arrayRef->[ 3 ];
    }
    elsif ( $options->{'q'} ) {
      return $arrayRef->[ 2 ];
    }
    elsif ( $options->{'s'} ) {
      return $arrayRef->[ 0 ];
    }
    else {
      return $arrayRef->[ 1 ];
    }
  }
  else {
    return ( $arrayRef );
  }
}

##-------------------------------------------------------------------------##
## Use:  &runTestStage( $options, ... );
##
##      In Development:  This routine is meant to test sub
##                       regions of repeat models/sequences that may
##                       give rise to false positives.
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runTestStage {
  my $options             = shift;
  my $stageText           = shift;
  my $batchIdentifierText = shift;
  my $searchParams        = shift;
  my $lib                 = shift;
  my $outfile             = shift;
  my $maskfile            = shift;
  my $searchEngine        = shift;
  my $batchNum            = shift;
  my $batcher             = shift;
  my $tax                 = shift;
  my $excisions           = shift;
  my $numX                = shift;
  my $refineableHashRef   = shift;
  my $refinementHash      = shift;
  my $stage               = shift;
  my $filterType          = shift;
  my $DIRECTORY           = shift;
  my $seqDB               = shift;

  if ( $stageText ne "" ) {
    print $stageText;
    if ( $batchIdentifierText ne "" ) {
      print " in " . $batchIdentifierText . "\n";
    }
    else {
      print "\n";
    }
  }

  # Or better
  # $batcher->isBatchFragmented( $batchNum );
  # Or even better
  # $batcher->isBatchFragmented( $seqId, $batchNum ) not implemented
  my $fragCnt = $batcher->getBatchCount();

  #
  # Resolve matrix parameter
  #
  my $matrix = $searchParams->{'matrix'};
  if ( $searchParams->{'matrix'} =~ /(\d+)p\#+g.matrix/ ) {
    my $div = $1;
    my $GC_frac;
    if ( $options->{'gc'} ) {

      # user decides GC background
      $GC_frac = $options->{'gc'};
    }
    else {
      my $seq_cnt = $batcher->getBatchSeqCount( $batchNum );
      my $seqlen  = $batcher->getBatchSeqLength( $batchNum );
      if ( $options->{'gccalc'} || ( $seq_cnt == 1 && $seqlen > 2000 ) ) {
        $GC_frac = $batcher->getBatchAverageGC( $batchNum );
      }
      else {
        $GC_frac = 43;
      }
    }
    if (    defined $searchParams->{'max_matrix_gc'}
         && $searchParams->{'max_matrix_gc'} > 0
         && $GC_frac > $searchParams->{'max_matrix_gc'} )
    {
      $GC_frac = $searchParams->{'max_matrix_gc'};
    }
    my $GC = &chooseMatrices( $GC_frac );
    $matrix = $div . "p" . $GC . ".matrix";
  }
  my $minscore =
      $searchParams->{'minscore'} - int( $searchParams->{'minscore'} * 0.075 );
  print "minscore is now $minscore\n";
  my $minmatch = selectParameter( $options, $searchParams->{'minmatch'} );
  my $gap_initValue = $searchParams->{'gap_initValue'};
  my $ins_gap_extValue = $searchParams->{'ins_gap_extValue'};
  my $del_gap_extValue = $searchParams->{'del_gap_extValue'};
  my $bandwidth   = selectParameter( $options, $searchParams->{'bandwidth'} );
  my $masklevel   = 101;
  my $raw         = $searchParams->{'raw'};
  my $wordraw     = $searchParams->{'wordraw'};
  my $chooseClass = $searchParams->{'chooseClass'};
  my $excise      = $searchParams->{'excise'};

  my ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           $options,          $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
  );

  for ( my $k = 0 ; $k < $resultsCollection->size() ; $k++ ) {
    my $result = $resultsCollection->get( $k );
    $result->setId( $stage );
  }

  $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );

  return ( $resultsCollection->size() );
}

##-------------------------------------------------------------------------##
## Use:  &runTRFStage( $options, ... );
##
##      TRF replacement for Simple/Low_complexity Repeat consensus
##      based search stages.  TODO: Document
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runTRFStage {
  my $options             = shift;
  my $stageText           = shift;
  my $batchIdentifierText = shift;
  my $searchParams        = shift;
  my $lib                 = shift;
  my $outfile             = shift;
  my $maskfile            = shift;
  my $searchEngine        = shift;
  my $batchNum            = shift;
  my $batcher             = shift;
  my $tax                 = shift;
  my $excisions           = shift;
  my $numX                = shift;
  my $refineableHashRef   = shift;
  my $refinementHash      = shift;
  my $stage               = shift;
  my $filterType          = shift;
  my $DIRECTORY           = shift;
  my $seqDB               = shift;

  my $saveDEBUG = $DEBUG;
  $DEBUG = 0;
  my ( $workingVol, $workingDir, $workingFile ) =
      File::Spec->splitpath( $outfile );
  $workingDir = "." if ( $workingDir eq "" );

  if ( $stageText ne "" ) {
    print $stageText;
    if ( $batchIdentifierText ne "" ) {
      print " in " . $batchIdentifierText . "\n";
    }
    else {
      print "\n";
    }
  }

  my $trf = TRF->new( pathToEngine => $TRF_PRGM,
                      workDir      => $workingDir );

  my $minCopyNumber;
  my $excise = 0;
  my $lambda = 0;
  my @mu     = ();
  if ( $searchParams eq "PERFECT" ) {

    #
    # Search for young tandem repeats
    #
    $trf->setMatchWeight( 2 );
    $trf->setMismatchPenalty( 7 );
    $trf->setDelta( 7 );
    $trf->setPm( 80 );
    $trf->setPi( 10 );
    $trf->setMinScore( 50 );
    $trf->setMaxPeriod( 10 );
    $excise        = 1;
    $minCopyNumber = 4;
    $lambda        = 0.41;
    @mu = ( 8.51, 1.04, 6.26, 8.65, 10.36, 7.19, 8.81, 10.84, 12.97, 15.07 );
  }
  else {

    #
    # Search for old tandem repeats
    #
    $trf->setMatchWeight( 2 );
    $trf->setMismatchPenalty( 3 );
    $trf->setDelta( 5 );
    $trf->setPm( 75 );
    $trf->setPi( 20 );
    $trf->setMinScore( 33 );
    $trf->setMaxPeriod( 7 );
    $excise        = 0;
    $minCopyNumber = 5;
    $lambda        = 0.32;
    @mu            = ( 0.79, 1.00, 4.78, 6.94, 8.68, 9.84, 11.94 );
  }

  my ( $retCode, $trfResults, $trfOutFile, $trfErrFile ) = $trf->search(
                                                      sequenceFile => $maskfile,
                                                      workDir => $workingDir );

  if ( defined $trfErrFile && $trfErrFile ne "" ) {
    # TRF Errors
    my $signal = $retCode & 127;
    my $prgcode = $retCode >> 8;
    print "WARNING: TRF returned an error ("
        . "Return code = $prgcode, signal = $signal)\n"
        . "TRF parameters: "
        . $trf->getParameters() . "\n"
        . "A search phase could not complete on this batch.\n"
        . "The batch file will be re-run and if possible the\n"
        . "program will resume.\n";
    exit( -1 );
  }

  my $matrix =
      Matrix->new(
                  fileName => "$DIRECTORY/Matrices/crossmatch/simple1.matrix" );
  my $newResultCol = SearchResultCollection->new();
  if ( $DEBUG ) {
    print "  TRF Returned: " . $trfResults->size() . " results\n";
  }
  for ( my $i = $trfResults->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result = $trfResults->get( $i );
    bless $result, "TRFSearchResult";

    if ( $result->getCopyNumber() > $minCopyNumber ) {
      my ( $newScore, $kimura, $CpGSites, $percIns, $percDel, $scoreArray,
           $goodRegions )
          = $result->rescoreAlignment(
                                       scoreMatrix    => $matrix,
                                       gapOpenPenalty => -30,
                                       gapExtPenalty  => -15,
                                       xDrop          => 500
          );

      # Cutoff for simple1.matrix score.  NOTE: This score is comparable
      # to other interpersed repeat scores from crossmatch/abblast/rmblast
      # but not from the bitscores of hmmer. Looking into converting TRF
      # raw scores into bitscores directly for hmmer runs.
      #
      # Now that we are using bitscores perhaps we should base this
      # cutoff on the bitscore?
      next if ( $newScore < 20 );

      my $bitScore = sprintf(
                              "%.0d",
                              $result->rawToBitScore(
                                        $lambda, $mu[ $result->getPeriod() - 1 ]
                              )
      );
      $result->setScore( $bitScore );

      $result->setPctDiverge( sprintf( "%0.2f", $kimura ) );
      $result->setPctInsert( sprintf( "%0.2f",  $percIns ) );
      $result->setPctDelete( sprintf( "%0.2f",  $percDel ) );
      if ( $#{$goodRegions} > 1 ) {
        my $frags = $result->fragmentSearchResult( regionList => $goodRegions );
        foreach my $frag ( @{$frags} ) {
          $newResultCol->add( $frag );
        }
      }
      else {
        $newResultCol->add( $result );
      }
    }
  }
  $trfResults = $newResultCol;
  if ( $DEBUG ) {
    print "  Filtered TRF Set: " . $trfResults->size() . " results\n";
  }

  #
  # Mask level filtering
  #   - Only important if we plan to excise the repeats.
  #     Then we can't afford overlap.
  #
  if ( $searchParams eq "PERFECT" ) {
    $trfResults->maskLevelFilter( value => 1 );
    if ( $DEBUG ) {
      print "  TRF Set After Masklevel: " . $trfResults->size() . " results\n";
    }
  }

  # Or better
  # $batcher->isBatchFragmented( $batchNum );
  # Or even better
  # $batcher->isBatchFragmented( $seqId, $batchNum ) not implemented
  my $fragCnt = $batcher->getBatchCount();

  &filterResults( $options, $filterType, $fragCnt, $lib, $trfResults, $tax );

  &postProcessSearch(
                      $options,          $trfResults, $excisions,
                      $excise,           $numX,       $seqDB,
                      $options->{'inv'}, $outfile,    $refineableHashRef,
                      $refinementHash,   $batchNum,   $stage
  );
  $DEBUG = $saveDEBUG;

  return ( $trfResults->size() );
}

##-------------------------------------------------------------------------##
## Use:  &runStage( $options, ... );
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runStage {
  my $options             = shift;
  my $stageText           = shift;
  my $batchIdentifierText = shift;
  my $searchParams        = shift;
  my $lib                 = shift;
  my $outfile             = shift;
  my $maskfile            = shift;
  my $searchEngine        = shift;
  my $batchNum            = shift;
  my $batcher             = shift;
  my $tax                 = shift;
  my $excisions           = shift;
  my $numX                = shift;
  my $refineableHashRef   = shift;
  my $refinementHash      = shift;
  my $stage               = shift;
  my $filterType          = shift;
  my $DIRECTORY           = shift;
  my $seqDB               = shift;

  if ( $stageText ne "" ) {
    print $stageText;
    if ( $batchIdentifierText ne "" ) {
      print " in " . $batchIdentifierText . "\n";
    }
    else {
      print "\n";
    }
  }

  # Or better
  # $batcher->isBatchFragmented( $batchNum );
  # Or even better
  # $batcher->isBatchFragmented( $seqId, $batchNum ) not implemented
  my $fragCnt = $batcher->getBatchCount();

  #
  # Resolve matrix parameter
  #
  my $matrix = $searchParams->{'matrix'};
  if ( $searchParams->{'matrix'} =~ /(\d+)p\#+g.matrix/ ) {
    my $div = $1;
    my $GC_frac;
    if ( $options->{'gc'} ) {

      # user decides GC background
      $GC_frac = $options->{'gc'};
    }
    else {
      my $seq_cnt = $batcher->getBatchSeqCount( $batchNum );
      my $seqlen  = $batcher->getBatchSeqLength( $batchNum );
      if ( $options->{'gccalc'} || ( $seq_cnt == 1 && $seqlen > 2000 ) ) {
        $GC_frac = $batcher->getBatchAverageGC( $batchNum );
      }
      else {
        $GC_frac = 43;
      }
    }
    if (    defined $searchParams->{'max_matrix_gc'}
         && $searchParams->{'max_matrix_gc'} > 0
         && $GC_frac > $searchParams->{'max_matrix_gc'} )
    {
      $GC_frac = $searchParams->{'max_matrix_gc'};
    }
    my $GC = &chooseMatrices( $GC_frac );
    $matrix = $div . "p" . $GC . ".matrix";
  }

  my $minscore      = $searchParams->{'minscore'};
  my $minmatch      = selectParameter( $options, $searchParams->{'minmatch'} );
  my $gap_initValue = $searchParams->{'gap_initValue'};
  my $ins_gap_extValue = $searchParams->{'ins_gap_extValue'};
  my $del_gap_extValue = $searchParams->{'del_gap_extValue'};
  my $bandwidth = selectParameter( $options, $searchParams->{'bandwidth'} );
  my $filterContained = $searchParams->{'filterContained'};
  my $masklevel       = $searchParams->{'masklevel'};
  my $raw             = $searchParams->{'raw'};
  my $wordraw         = $searchParams->{'wordraw'};
  my $chooseClass     = $searchParams->{'chooseClass'};
  my $excise          = $searchParams->{'excise'};

  #
  #  Turn off masklevel filtering at the SearchEngine level.  This
  #  is a hack to get around a bug in the way cross_match is handling
  #  the filtering of overlapping hits.  We apply the masklevel after
  #  we get back the results now for all engines.
  #
  my $cmMaskLevel = 101;

  if ( $DEBUG ) {
    print "Saving pre-stage maskfile to: $maskfile-$stage\n";
    system( "cp $maskfile $maskfile-$stage" );
  }

  my ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           $options,          $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $cmMaskLevel,      $searchEngine,     $wordraw,
                           $raw
  );

  if ( $filterContained ) {
    $resultsCollection->filterContainedResults( value => $masklevel );
  }

  #if ( $DEBUG ) {
  #  print "Pre-MaskLevel Annotations:\n";
  #  for ( my $i = 0 ; $i < $resultsCollection->size() ; $i++ ) {
  #    print "#$i:  "
  #        . $resultsCollection->get( $i )
  #        ->toStringFormatted( SearchResult::NoAlign );
  #  }
  #  print "\n";
  #}

  # shortcutlib & shortlib & masklib
  if ( $stage == 401 || $stage == 501 || $stage == 502 || $stage == 452 ) {
    &preMaskLevelFilter( $resultsCollection );
  }

  #
  # Mask level filtering
  #
  #print "Using: masklevel = $masklevel\n" if ( 1 );
## TODO: This may not be necessary for rmblast....validate and remove to
##       improve performance.  Check preMaskLevelFilter as well.
  $resultsCollection->maskLevelFilter( value => $masklevel );

  #print $CLASS
  #    . "::search: "
  #    . $resultsCollection->size()
  #    . " hits "
  #    . "after masklevel filtering\n"
  #  if ( $this->getDEBUG() );

  &filterResults( $options, $filterType, $fragCnt, $lib, $resultsCollection,
                  $tax );

  &postProcessSearch(
                      $options,           $resultsCollection,
                      $excisions,         $excise,
                      $numX,              $seqDB,
                      $options->{'inv'},  $outfile,
                      $refineableHashRef, $refinementHash,
                      $batchNum,          $stage
  );

  return ( $resultsCollection->size() );
}

##-------------------------------------------------------------------------##
## Use:  &runLowComplexTests (
##           \%options, $REPEATMASKER_DIR, $GC,
##           $file, $maskfile,
##           $generalLibDir,
##           $speciesLibDir, $fragCnt,
##           $searchEngine,
##           $numX, $seqDB, [$batchIdentifierText],
##           $tax, $customLibDir, $tmpDir, $batchNum, $refineableHashRef );
##
##  Returns
##
##         Phases of a search:
##          - Search for simple repeats ( simple.lib )
##
##          Species Specific:
##            - Search user supplied library
##
##          Mammal Specific:
##          - Search sinecutlib if it exists ( full length abundant SINEs )
##          - Search shortcutlib if it exists ( full-length IRs )
##          - Search cutlib if it exists
##
##          Homo:
##              - Search sinecutlib again
##
##          - Search shortlib if it exists
##          - Search longlib if it exists
##          - Search mirs.lib
##          - Search mir.lib
##          - Search retro.lib ***if it exists***
##          - Search l1.lib
##
##          - Search simple.lib (again?)
##          - Search at.lib
##
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runLowComplexTests {
  my %options   = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY = shift;
  my $GC_frac   = shift;
  my $file      = shift;
  my $maskfile  = shift;

  #  my $sinecutlib          = shift;
  #  my $cutlib              = shift;
  #  my $shortcutlib         = shift;
  #  my $shortlib            = shift;
  #  my $longlib             = shift;
  #  my $retrolib            = shift;
  my $generalLibDir       = shift;
  my $speciesLibDir       = shift;
  my $fragCnt             = shift;
  my $searchEngine        = shift;
  my $numX                = shift;
  my $seqDB               = shift;
  my $batchIdentifierText = shift;
  my $tax                 = shift;
  my $customLibDir        = shift;
  my $tempdir             = shift;
  my $batchNum            = shift;
  my $refineableHashRef   = shift;
  my $batcher             = shift;

  my $searchRecipes = &getSearchRecipes();

  my $repBoundRef;
  my $stage = 0;
  my $resultsCollection;
  my %refinementHash = ();
  my $cutAlus        = 0;

  # reused to hold overlaplist
  my $excisions = [];

  my (
       $minscore,         $minmatch,      $lib,
       $matrix,           $gap_initValue, $ins_gap_extValue,
       $del_gap_extValue, $bandwidth,     $masklevel,
       $raw,              $wordraw,       $outfile
      )
      = ();

  # Stages block.
  {

    $options{'species'} = "human";
    my ( $custLibVol, $custLibDir, $custLibFile ) =
        File::Spec->splitpath( $options{'lib'} );
    my $message = "identifying matches to " . $custLibFile . " sequences";
    $lib = "$customLibDir/$custLibFile";

    my $db = FastaDB->new(
                           fileName    => $options{'lib'},
                           openMode    => SeqDBI::ReadOnly,
                           maxIDLength => 50
    );

    # >SINEC_Fc#SINE/tRNA-Lys @Felis @Carnivora  [S:40,50]

    my %stageToLibName = (
                           35 => "sinecutlib",
                           40 => "shortcutlib",
                           45 => "cutlib",
                           50 => "shortlib",
                           55 => "longlib",
                           60 => "mirslib",
                           65 => "mirlib",
                           70 => "retrolib",
                           75 => "l1.lib",
                           85 => "refinelib",
                           80 => "specieslib"
    );

    my %inStage = ();

    #print "$lib contains " . $db->getSeqCount() . "\n";
    foreach my $seqID ( $db->getIDs() ) {
      my $desc = $db->getDescription( $seqID );

      #print "Desc = $desc\n";
      if ( $desc =~ /\[S:([\d,]+)\]/ ) {
        my @values = split( /,/, $1 );
        foreach my $value ( @values ) {
          $inStage{ $stageToLibName{$value} } = 1;
        }
      }
      else {
        $inStage{'specieslib'} = 1;
      }
    }

    #print "Stages: " . Dumper( \%inStage ) . "\n";

    # all species but mammals are currently treated in a naive fashion
    my $message = "Testing using species_specific params";
    &runTestStage(
            \%options,            $message,
            $batchIdentifierText, $searchRecipes->{'general_search_parameters'},
            $lib,                 "$file.tmp.t$stage",
            $maskfile,            $searchEngine,
            $batchNum,            $batcher,
            $tax,                 $excisions,
            $numX,                $refineableHashRef,
            \%refinementHash,     "general_search_parameters-" . $stage++,
            "masking",            $DIRECTORY,
            $seqDB
        )
        if ( $inStage{'specieslib'} );

    if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
      &runTestStage(
                     \%options,
                     "Testing using full-length ALUs (masking) params",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_young_sines_in_primates'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "mask_young_sines_in_primates-" . $stage++,
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'sinecutlib'} );
      &runTestStage(
                     \%options,
                     "Testing using full-length ALUs (cut) params",
                     $batchIdentifierText,
                     $searchRecipes->{'cut_young_sines_in_primates'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "cut_young_sines_in_primates-" . $stage++,
                     "alu",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'sinecutlib'} );
      &runTestStage(
                     \%options,
                     "",
                     $batchIdentifierText,
                     $searchRecipes->{'cut_young_sines_in_primates'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "cut_young_sines_in_primates-" . $stage++,
                     "alu",
                     $DIRECTORY,
                     $seqDB
      );

      &runTestStage(
               \%options,            "Testing using remaining ALU params",
               $batchIdentifierText, $searchRecipes->{'mask_sines_in_primates'},
               $lib,                 "$file.tmp.t$stage",
               $maskfile,            $searchEngine,
               $batchNum,            $batcher,
               $tax,                 $excisions,
               $numX,                $refineableHashRef,
               \%refinementHash,     "mask_sines_in_primates-" . $stage++,
               "alumask",            $DIRECTORY,
               $seqDB
          )
          if ( $inStage{'sinecutlib'} );

      ##
      ## Non-primate mammals
      ##
    }
    else {
      &runTestStage(
                     \%options,
                     "Testing using young abundant SINEs params",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_sines_in_non_primate_mammals'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "mask_sines_in_non_primate_mammals-" . $stage++,
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'sinecutlib'} );
    }

    ##
    ## shortcut lib exists
    ##
    &runTestStage(
                   \%options,
                   "Testing using full-length interspersed repeats params",
                   $batchIdentifierText,
                   $searchRecipes->{'general_full_length_repeats'},
                   $lib,
                   "$file.tmp.t$stage",
                   $maskfile,
                   $searchEngine,
                   $batchNum,
                   $batcher,
                   $tax,
                   $excisions,
                   $numX,
                   $refineableHashRef,
                   \%refinementHash,
                   "general_full_length_repeats-" . $stage++,
                   "cut1",
                   $DIRECTORY,
                   $seqDB
        )
        if ( $inStage{'shortcutlib'} );

    &runTestStage(
                   \%options,
                   "",
                   $batchIdentifierText,
                   $searchRecipes->{'complete_3end_of_young_line1s'},
                   $lib,
                   "$file.tmp.t$stage",
                   $maskfile,
                   $searchEngine,
                   $batchNum,
                   $batcher,
                   $tax,
                   $excisions,
                   $numX,
                   $refineableHashRef,
                   \%refinementHash,
                   "complete_3end_of_young_line1s-" . $stage++,
                   "cut2",
                   $DIRECTORY,
                   $seqDB
        )
        if ( $inStage{'cutlib'} );

    ##
    ##  primates
    ##
    if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
      ######  mask more alus #####
      &runTestStage(
               \%options,            "Testing using remaining ALUs params",
               $batchIdentifierText, $searchRecipes->{'older_ALUs_in_primates'},
               $lib,                 "$file.tmp.t$stage",
               $maskfile,            $searchEngine,
               $batchNum,            $batcher,
               $tax,                 $excisions,
               $numX,                $refineableHashRef,
               \%refinementHash,     "older_ALUs_in_primates-" . $stage++,
               "alumask",            $DIRECTORY,
               $seqDB
          )
          if ( $inStage{'sinecutlib'} );

      ######  mask even more alus #####
      &runTestStage(
                \%options,            "",
                $batchIdentifierText, $searchRecipes->{'more_ALUs_in_primates'},
                $lib,                 "$file.tmp.t$stage",
                $maskfile,            $searchEngine,
                $batchNum,            $batcher,
                $tax,                 $excisions,
                $numX,                $refineableHashRef,
                \%refinementHash,     "more_ALUs_in_primates-" . $stage++,
                "alumask",            $DIRECTORY,
                $seqDB
          )
          if ( $inStage{'sinecutlib'} );
    }

    if ( $tax->isA( $options{'species'}, "rodentia" ) == 1 ) {
      &runTestStage(
                     \%options,
                     "Testing using most interspersed repeats params",
                     $batchIdentifierText,
                     $searchRecipes->{'short_repeats_and_satellites_rodents'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "short_repeats_and_satellites_rodents-" . $stage++,
                     "sines",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'shortlib'} );
    }
    else {
      &runTestStage(
                     \%options,
                     "Testing using most interspersed repeats params",
                     $batchIdentifierText,
                     $searchRecipes->{'short_repeats_and_satellites'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "short_repeats_and_satellites" . $stage++,
                     "sines",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'shortlib'} );
    }

    # currently long and short together for
    # non-primate/non-rodent mammals
    ##### mask longer rep seqs  #####
    &runTestStage(
            \%options,            "identifying long interspersed repeats",
            $batchIdentifierText, $searchRecipes->{'long_interspersed_repeats'},
            $lib,                 "$file.tmp.t$stage",
            $maskfile,            $searchEngine,
            $batchNum,            $batcher,
            $tax,                 $excisions,
            $numX,                $refineableHashRef,
            \%refinementHash,     "long_interspersed_repeats-" . $stage++,
            "longlib",            $DIRECTORY,
            $seqDB
        )
        if ( $inStage{'longlib'} );
    &runTestStage(
                   \%options,            "identifying ancient repeats",
                   $batchIdentifierText, $searchRecipes->{'ancient_repeats'},
                   $lib,                 "$file.tmp.t$stage",
                   $maskfile,            $searchEngine,
                   $batchNum,            $batcher,
                   $tax,                 $excisions,
                   $numX,                $refineableHashRef,
                   \%refinementHash,     "ancient_repeats-" . $stage++,
                   "mirs",               $DIRECTORY,
                   $seqDB
        )
        if ( $inStage{'mirslib'} );

    &runTestStage(
                \%options,            "",
                $batchIdentifierText, $searchRecipes->{'tough_ancient_repeats'},
                $lib,                 "$file.tmp.t$stage",
                $maskfile,            $searchEngine,
                $batchNum,            $batcher,
                $tax,                 $excisions,
                $numX,                $refineableHashRef,
                \%refinementHash,     "tough_ancient_repeats-" . $stage++,
                "masking",            $DIRECTORY,
                $seqDB
        )

#        if ( $inStage{'mirlib'} );
#    &runTestStage(
#                  \%options,            "identifying retrovirus-like sequences",
#                  $batchIdentifierText, $searchRecipes->{'retroviruses'},
#                  $lib,                 "$file.tmp.t$stage",
#                  $maskfile,            $searchEngine,
#                  $batchNum,            $batcher,
#                  $tax,                 $excisions,
#                  $numX,                $refineableHashRef,
#                  \%refinementHash,     "retroviruses-" . $stage++,
#                  "masking",            $DIRECTORY,
#                  $seqDB
#        )
#        if ( $inStage{'retrolib'} );

#    if ( $tax->isA( $options{'species'}, "eutheria" ) == 1 ) {
#
#      # these LINEs are not scanned in marsupials; perhaps will
#      # find LINEs to put in later
#      ##### mask undetected LINE1 bodies #####
#      &runTestStage(
#             \%options,            "identifying tough LINE1s",
#             $batchIdentifierText, $searchRecipes->{'tough_line1s_in_eutheria'},
#             $lib,                 "$file.tmp.t$stage",
#             $maskfile,            $searchEngine,
#             $batchNum,            $batcher,
#             $tax,                 $excisions,
#             $numX,                $refineableHashRef,
#             \%refinementHash,     "tough_line1s_in_eutheria-" . $stage++,
#             "l1",                 $DIRECTORY,
#             $seqDB
#          )
#          if ( $inStage{'l1.lib'} );
#    }    # if eutheria
  }

  if ( <$file.tmp.t*> ) {
    systemint( "cat $file.tmp.t* > $file.cat" );
  }

  return;
}
##-------------------------------------------------------------------------##
## Use:  &runHMMERSearchStages (
##           \%options, $REPEATMASKER_DIR, $GC,
##           $file, $maskfile,
##           $generalLibDir,
##           $speciesLibDir, $fragCnt,
##           $searchEngine,
##           $numX, $seqDB, [$batchIdentifierText],
##           $tax, $customLibDir, $tmpDir, $batchNum, $refineableHashRef );
##
##  Returns
##
##         Phases of a search:
##          - Search for simple repeats ( simple.lib )
##
##          Species Specific:
##            - Search user supplied library
##
##          Mammal Specific:
##          - Search sinecutlib if it exists ( full length abundant SINEs )
##          - Search shortcutlib if it exists ( full-length IRs )
##          - Search cutlib if it exists
##
##          Homo:
##              - Search sinecutlib again
##
##          - Search shortlib if it exists
##          - Search longlib if it exists
##          - Search mirs.lib
##          - Search mir.lib
##          - Search retro.lib ***if it exists***
##          - Search l1.lib
##
##          - Search simple.lib (again?)
##          - Search at.lib
##
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runHMMERSearchStages {
  my %options   = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY = shift;
  my $GC_frac   = shift;
  my $file      = shift;
  my $maskfile  = shift;

  #  my $sinecutlib          = shift;
  #  my $cutlib              = shift;
  #  my $shortcutlib         = shift;
  #  my $shortlib            = shift;
  #  my $longlib             = shift;
  #  my $retrolib            = shift;
  my $generalLibDir       = shift;
  my $speciesLibDir       = shift;
  my $fragCnt             = shift;
  my $searchEngine        = shift;
  my $numX                = shift;
  my $seqDB               = shift;
  my $batchIdentifierText = shift;
  my $tax                 = shift;
  my $customLibDir        = shift;
  my $tempdir             = shift;
  my $batchNum            = shift;
  my $refineableHashRef   = shift;
  my $batcher             = shift;

  my $searchRecipes = &getSearchRecipes();

  my $repBoundRef;
  my $resultsCollection;
  my %refinementHash = ();
  my $cutAlus        = 0;

  #
  # Data structures which help in recording the locations
  # of repeat excision.
  #
  my $excisions = {};

  my (
       $minscore,         $minmatch,      $lib,
       $matrix,           $gap_initValue, $ins_gap_extValue,
       $del_gap_extValue, $bandwidth,     $masklevel,
       $raw,              $wordraw,       $outfile
      )
      = ();

  # Stages block.
  {
    ##
    ## Simple Repeats
    ##    - excise almost perfect simple repeats
    ##
    # 11/9/2015
    #unless ( $options{'nocut'} || $options{'low'} || $options{'alu'} ) {
    unless ( $options{'nolow'} || $options{'alu'} ) {
      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "PERFECT",
                    "",                   "$file.tmp.simple1",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "251",
                    "simple",             $DIRECTORY,
                    $seqDB
      );
    }    # Simple repeats

    ##
    ## High complexity repeat searches
    ##
    unless ( $options{'noint'} ) {

      #unless only low complexity DNA is to be masked

      ##
      ## Single species specific library
      ##
      if (    defined $options{'lib'}
           || -s "$speciesLibDir/specieslib"
           || -s "$speciesLibDir/specieslib.bsq"
           || -s "$speciesLibDir/specieslib.hmm"
           || -s "$speciesLibDir/specieslib.xps" )
      {
        my $message = "";
        if ( defined $options{'lib'} ) {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $options{'lib'} );
          $message = "identifying matches to " . $custLibFile . " sequences";
          $lib     = "$customLibDir/$custLibFile";
        }
        else {
          $message =
              "identifying matches to " . $options{'species'} . " sequences";
          $lib = "$speciesLibDir/specieslib";
        }

        # all species but mammals are currently treated in a naive fashion
        my $searchParams = $searchRecipes->{'general_search_parameters'};

        # BAD BAD BAD...this changes the DS.
        $searchParams->{'minscore'} = $options{'cutoff'} if $options{'cutoff'};
        &runStage(
                   \%options,            $message,
                   $batchIdentifierText, $searchParams,
                   $lib,                 "$file.tmp.custom",
                   $maskfile,            $searchEngine,
                   $batchNum,            $batcher,
                   $tax,                 $excisions,
                   $numX,                $refineableHashRef,
                   \%refinementHash,     "001",
                   "masking",            $DIRECTORY,
                   $seqDB
        );
        last if ( $seqDB->getSubtLength() < 15 );
      }    # User supplied lib

      ##
      ## RepeatMasker specific libraries
      ##
      else {
        ##
        ## Primates
        ##
        my $aluIdx = 1;
        print "is a $options{'species'} a primate?" .  $tax->isA( $options{'species'}, "primates" ) . "\n";
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {

          # 11/9/2015
          #if ( $options{'nocut'} ) {
          if ( 0 ) {
            &runStage(
                       \%options,
                       "identifying full-length ALUs",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_young_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu0",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "351",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
          }
          else {
            $cutAlus = &runStage(
                                \%options,
                                "identifying full-length ALUs",
                                $batchIdentifierText,
                                $searchRecipes->{'cut_young_sines_in_primates'},
                                "$speciesLibDir/sinecutlib",
                                "$file.tmp.alu0",
                                $maskfile,
                                $searchEngine,
                                $batchNum,
                                $batcher,
                                $tax,
                                $excisions,
                                $numX,
                                $refineableHashRef,
                                \%refinementHash,
                                "352",
                                "alu",
                                $DIRECTORY,
                                $seqDB
            );

            if ( $cutAlus ) {

              # Any following full-length Alus only were
              # exposed after excising previous Alus.
              &runStage(
                         \%options, "",
                         $batchIdentifierText,
                         $searchRecipes->{'cut_young_sines_in_primates'},
                         "$speciesLibDir/sinecutlib",
                         "$file.tmp.alu1", $maskfile, $searchEngine,
                         $batchNum, $batcher, $tax, $excisions,
                         $numX, $refineableHashRef, \%refinementHash,
                         "353", "alu", $DIRECTORY, $seqDB
              );
            }    # if ( $cutAlus )
          }

          if ( $options{'alu'} ) {
            ######  mask remaining Alus  #####
            &runStage(
                       \%options,
                       "",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu4",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "354",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
            last;
          }    # if ( $options{'alu'} )

          ##
          ## Non-primate mammals
          ##
        }
        elsif (    -s "$speciesLibDir/sinecutlib"
                || -s "$speciesLibDir/sinecutlib.bsq"
                || -s "$speciesLibDir/sinecutlib.hmm"
                || -s "$speciesLibDir/sinecutlib.xps" )
        {
          &runStage(
                     \%options,
                     "identifying young abundant SINEs",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_sines_in_non_primate_mammals'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "355",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );
        }

        ##
        ## shortcut lib exists
        ##
        if (    -s "$speciesLibDir/shortcutlib"
             || -s "$speciesLibDir/shortcutlib.bsq"
             || -s "$speciesLibDir/shortcutlib.hmm"
             || -s "$speciesLibDir/shortcutlib.xps" )
        {
          ###### excise all short full-length elements ######
          &runStage(
                     \%options,
                     "identifying full-length interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'general_full_length_repeats'},
                     "$speciesLibDir/shortcutlib",
                     "$file.tmp.cut1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "401",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );

          if (    -s "$speciesLibDir/cutlib"
               || -s "$speciesLibDir/cutlib.bsq"
               || -s "$speciesLibDir/cutlib.hmm"
               || -s "$speciesLibDir/cutlib.xps" )
          {

            # cut out complete 3' ends of young L1
            # elements (same parameters)
            &runStage(
                       \%options, "",
                       $batchIdentifierText,
                       $searchRecipes->{'complete_3end_of_young_line1s'},
                       "$speciesLibDir/cutlib",
                       "$file.tmp.cut2", $maskfile, $searchEngine,
                       $batchNum, $batcher, $tax, $excisions,
                       $numX, $refineableHashRef, \%refinementHash,
                       "451", "cut2", $DIRECTORY, $seqDB
            );
          }    # if ( -s $cutlib )
        }    # if ( $shortcublib

        if ( $options{'cut'} && !$fragCnt ) {
          my $cutfile = $file . ".cut";
          copy( $maskfile, $cutfile );
        }

        ##
        ##  primates
        ##
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
          ######  mask more alus #####
          &runStage(
                     \%options,
                     "identifying remaining ALUs",
                     $batchIdentifierText,
                     $searchRecipes->{'older_ALUs_in_primates'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu2",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "356",
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );

        }

        ##
        ##  Most interspersed repeats
        ##
        if ( -s "$speciesLibDir/masklib.hmm" ) {
          &runStage(
                     \%options,
                     "identifying most interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'long_interspersed_repeats'},
                     "$speciesLibDir/masklib",
                     "$file.tmp.reps",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "452",
                     "longlib",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );
        }
      }    # if ( not user supplied lib or species only search
      last if ( $seqDB->getSubtLength() < 15 );
    }    # unless only low complexity sequences are masked

    unless ( $options{'nolow'} ) {

      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "DIVERGED",
                    "",                   "$file.tmp.simple2",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "252",
                    "masking",            $DIRECTORY,
                    $seqDB
      );

    }    # unless low complexity masking is skipped
  }    # end of stages block

  if (
       keys( %refinementHash )
       && (    -s "$speciesLibDir/refinelib"
            || -s "$speciesLibDir/refinelib.bsq"
            || -s "$speciesLibDir/refinelib.hmm"
            || -s "$speciesLibDir/refinelib.xps" )
      )
  {

    # The design goal is to have each repeat class have it's own refinement
    # library.  For now we just search against the same library.
    foreach my $refinementClass ( keys( %refinementHash ) ) {
      print "refining $refinementClass elements";
      if ( $batchIdentifierText ne "" ) {
        print " in " . $batchIdentifierText . "\n";
      }
      else {
        print "\n";
      }

      open OUT, ">$file.unRefinedEles.fa";
      my $index = 0;

      my $refIdx = 0;
      foreach my $ele ( @{ $refinementHash{$refinementClass} } ) {
        print OUT ">ref" . $refIdx++ . "\n";
        $index++;
        my $seq = $ele->{'seq'};
        $seq =~ s/(\S{50})/$1\n/g;
        $seq .= "\n"
            unless ( $seq =~ /.*\n+$/s );
        print OUT $seq;
      }
      close OUT;

      # At some point we may make each repeat class have
      # it's own refinment lib
      $lib       = "$speciesLibDir/refinelib";
      $minscore  = 180;                          # N/A
      $bandwidth = 40;                           # N/A
      $masklevel = "-masklevel 101";
      $raw       = "-raw";                       # N/A
      $wordraw   = "-word_raw";                  # N/A

      $minmatch = selectParameter( \%options, [ 7, 8, 9, 11 ] );

      # TODO...fix the refinement section
      my $GC = &chooseMatrices( $GC_frac );      # N/A
      $matrix = "18p" . "$GC" . ".matrix";       # N/A
      ( $gap_initValue, $ins_gap_extValue, $del_gap_extValue ) =
          ( -30, -6, -5 );                       # N/A

      $maskfile = "$file.unRefinedEles.fa";
      $outfile  = "$file.tmp.ref";
      ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           \%options,         $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
      );

      ## Adjust positions
      for ( my $i = $resultsCollection->size() - 1 ; $i >= 0 ; $i-- ) {
        my $current = $resultsCollection->get( $i );
        my $seqID   = $current->getQueryName();
        my $refIdx;
        if ( $seqID =~ /ref(\d+)/ ) {
          $refIdx = $1;
        }
        else {
          warn "Something went wrong with refinement ids $seqID\n";
        }
        my $refEntry   = $refinementHash{$refinementClass}->[ $refIdx ];
        my $origID     = $refEntry->{'sSeqID'};
        my $start      = $refEntry->{'qStart'};
        my $end        = $refEntry->{'qEnd'};
        my $origLen    = $refEntry->{'len'};
        my $parentID   = $refEntry->{'id'};
        my $refinedLen =
            $current->getQueryEnd() - $current->getQueryStart() + 1;

        if ( $origLen - $refinedLen > 10 ) {

          # Unlikely candidate -- does not fully align
          #print "Delete: $origLen $refinedLen\n";
          $resultsCollection->remove( $i );
        }
        else {
          $current->setQueryName( "refinement" );
          $current->setId( "[$parentID]" );
        }
      }

      $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );
      unlink( $maskfile ) unless($DEBUG);
      unlink( "$maskfile.log" );

    }
  }

  if ( <$file.tmp.*> ) {
    systemint( "cat $file.tmp.* > $file.cat" );
  }

  if ( -s "$file.cat" ) {

    # Once and for all let's create a sorted cat file...what do you say?
    my $combinedSearchResults =
        CrossmatchSearchEngine::parseOutput( searchOutput => "$file.cat" );
    $combinedSearchResults->sort(
      sub ($$) {
        (    ( !( $_[ 0 ]->getQueryName() cmp "refinement" ) )
          || ( $_[ 0 ]->getQueryName() cmp $_[ 1 ]->getQueryName() )
          || ( $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart() )
          || ( $_[ 0 ]->getLineageId() cmp $_[ 1 ]->getLineageId() ) );
      }
    );

    $combinedSearchResults->write( "$file.cat",
                                   SearchResult::AlignWithQuerySeq );
  }

  return;
}

##-------------------------------------------------------------------------##
## Use:  &runSearchStages (
##           \%options, $REPEATMASKER_DIR, $GC,
##           $file, $maskfile,
##           $generalLibDir,
##           $speciesLibDir, $fragCnt,
##           $searchEngine,
##           $numX, $seqDB, [$batchIdentifierText],
##           $tax, $customLibDir, $tmpDir, $batchNum, $refineableHashRef );
##
##  Returns
##
##         Phases of a search:
##          - Search for simple repeats ( simple.lib )
##
##          Species Specific:
##            - Search user supplied library
##
##          Mammal Specific:
##          - Search sinecutlib if it exists ( full length abundant SINEs )
##          - Search shortcutlib if it exists ( full-length IRs )
##          - Search cutlib if it exists
##
##          Homo:
##              - Search sinecutlib again
##
##          - Search shortlib if it exists
##          - Search longlib if it exists
##          - Search mirs.lib
##          - Search mir.lib
##          - Search retro.lib ***if it exists***
##          - Search l1.lib
##
##          - Search simple.lib (again?)
##          - Search at.lib
##
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runSearchStages {
  my %options   = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY = shift;
  my $GC_frac   = shift;
  my $file      = shift;
  my $maskfile  = shift;

  #  my $sinecutlib          = shift;
  #  my $cutlib              = shift;
  #  my $shortcutlib         = shift;
  #  my $shortlib            = shift;
  #  my $longlib             = shift;
  #  my $retrolib            = shift;
  my $generalLibDir       = shift;
  my $speciesLibDir       = shift;
  my $fragCnt             = shift;
  my $searchEngine        = shift;
  my $numX                = shift;
  my $seqDB               = shift;
  my $batchIdentifierText = shift;
  my $tax                 = shift;
  my $customLibDir        = shift;
  my $tempdir             = shift;
  my $batchNum            = shift;
  my $refineableHashRef   = shift;
  my $batcher             = shift;

  my $searchRecipes = &getSearchRecipes();

  my $repBoundRef;
  my $resultsCollection;
  my %refinementHash = ();
  my $cutAlus        = 0;

  #
  # Data structures which help in recording the locations
  # of repeat excision.
  #
  my $excisions = {};

  my (
       $minscore,         $minmatch,      $lib,
       $matrix,           $gap_initValue, $ins_gap_extValue,
       $del_gap_extValue, $bandwidth,     $masklevel,
       $raw,              $wordraw,       $outfile
      )
      = ();

  # Stages block.
  {
    ##
    ## Simple Repeats
    ##    - delete almost perfect simple repeats
    ##
    # 11/9/2015
    #unless ( $options{'nocut'} || $options{'low'} || $options{'alu'} ) {
    unless ( $options{'nolow'} || $options{'alu'} ) {
      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "PERFECT",
                    "",                   "$file.tmp.simple1",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "251",
                    "simple",             $DIRECTORY,
                    $seqDB
      );

    }    # Simple repeats

    ##
    ## High complexity repeat searches
    ##
    unless ( $options{'noint'} ) {

      #unless only low complexity DNA is to be masked

      ##
      ## Single species specific library
      ##
      if (    defined $options{'lib'}
           || -s "$speciesLibDir/specieslib"
           || -s "$speciesLibDir/specieslib.bsq"
           || -s "$speciesLibDir/specieslib.hmm"
           || -s "$speciesLibDir/specieslib.xps" )
      {
        my $message = "";
        if ( defined $options{'lib'} ) {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $options{'lib'} );
          $message = "identifying matches to " . $custLibFile . " sequences";
          $lib     = "$customLibDir/$custLibFile";
        }
        else {
          $message =
              "identifying matches to " . $options{'species'} . " sequences";
          $lib = "$speciesLibDir/specieslib";
        }

        # all species but mammals are currently treated in a naive fashion
        my $searchParams = $searchRecipes->{'general_search_parameters'};

        # BAD BAD BAD...this changes the DS.
        $searchParams->{'minscore'} = $options{'cutoff'} if $options{'cutoff'};
        &runStage(
                   \%options,            $message,
                   $batchIdentifierText, $searchParams,
                   $lib,                 "$file.tmp.custom",
                   $maskfile,            $searchEngine,
                   $batchNum,            $batcher,
                   $tax,                 $excisions,
                   $numX,                $refineableHashRef,
                   \%refinementHash,     "001",
                   "masking",            $DIRECTORY,
                   $seqDB
        );
        last if ( $seqDB->getSubtLength() < 15 );
      }    # User supplied lib

      ##
      ## RepeatMasker specific libraries
      ##
      else {
        ##
        ## Primates
        ##
        my $aluIdx = 1;
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {

          # 11/9/2015
          #if ( $options{'nocut'} ) {
          if ( 0 ) {
            &runStage(
                       \%options,
                       "identifying full-length ALUs",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_young_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu0",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "351",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
          }
          else {
            $cutAlus = &runStage(
                                \%options,
                                "identifying full-length ALUs",
                                $batchIdentifierText,
                                $searchRecipes->{'cut_young_sines_in_primates'},
                                "$speciesLibDir/sinecutlib",
                                "$file.tmp.alu0",
                                $maskfile,
                                $searchEngine,
                                $batchNum,
                                $batcher,
                                $tax,
                                $excisions,
                                $numX,
                                $refineableHashRef,
                                \%refinementHash,
                                "352",
                                "alu",
                                $DIRECTORY,
                                $seqDB
            );

            if ( $cutAlus ) {

              # Any following full-length Alus only were
              # exposed after excising previous Alus.
              &runStage(
                         \%options, "",
                         $batchIdentifierText,
                         $searchRecipes->{'cut_young_sines_in_primates'},
                         "$speciesLibDir/sinecutlib",
                         "$file.tmp.alu1", $maskfile, $searchEngine,
                         $batchNum, $batcher, $tax, $excisions,
                         $numX, $refineableHashRef, \%refinementHash,
                         "353", "alu", $DIRECTORY, $seqDB
              );
            }    # if ( $cutAlus )
          }

          if ( $options{'alu'} ) {
            ######  mask remaining Alus  #####
            &runStage(
                       \%options,
                       "",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu4",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "354",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
            last;
          }    # if ( $options{'alu'} )

          ##
          ## Non-primate mammals
          ##
        }
        elsif (    -s "$speciesLibDir/sinecutlib"
                || -s "$speciesLibDir/sinecutlib.bsq"
                || -s "$speciesLibDir/sinecutlib.hmm"
                || -s "$speciesLibDir/sinecutlib.xps" )
        {
          &runStage(
                     \%options,
                     "identifying young abundant SINEs",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_sines_in_non_primate_mammals'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "355",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );
        }

        ##
        ## shortcut lib exists
        ##
        if (    -s "$speciesLibDir/shortcutlib"
             || -s "$speciesLibDir/shortcutlib.bsq"
             || -s "$speciesLibDir/shortcutlib.hmm"
             || -s "$speciesLibDir/shortcutlib.xps" )
        {
          ###### excise all short full-length elements ######
          &runStage(
                     \%options,
                     "identifying full-length interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'general_full_length_repeats'},
                     "$speciesLibDir/shortcutlib",
                     "$file.tmp.cut1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "401",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );

          # for testing
          #copy( $maskfile, "/tmp/backup1" );

          if (    -s "$speciesLibDir/cutlib"
               || -s "$speciesLibDir/cutlib.bsq"
               || -s "$speciesLibDir/cutlib.hmm"
               || -s "$speciesLibDir/cutlib.xps" )
          {

            # cut out complete 3' ends of young L1
            # elements (same parameters)
            &runStage(
                       \%options, "",
                       $batchIdentifierText,
                       $searchRecipes->{'complete_3end_of_young_line1s'},
                       "$speciesLibDir/cutlib",
                       "$file.tmp.cut2", $maskfile, $searchEngine,
                       $batchNum, $batcher, $tax, $excisions,
                       $numX, $refineableHashRef, \%refinementHash,
                       "451", "cut2", $DIRECTORY, $seqDB
            );
          }    # if ( -s $cutlib )
        }    # if ( $shortcublib

        if ( $options{'cut'} && !$fragCnt ) {
          my $cutfile = $file . ".cut";
          copy( $maskfile, $cutfile );
        }

        ##
        ##  primates
        ##
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
          ######  mask more alus #####
          &runStage(
                     \%options,
                     "identifying remaining ALUs",
                     $batchIdentifierText,
                     $searchRecipes->{'older_ALUs_in_primates'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu2",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "356",
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );

          ######  mask even more alus #####
          &runStage(
                     \%options,
                     "",
                     $batchIdentifierText,
                     $searchRecipes->{'more_ALUs_in_primates'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu3",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "357",
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );
        }

        # for testing
        #copy( $maskfile, "/tmp/backup" );

        ######  mask the remaining other short repeats and satellites#####
        if (    -s "$speciesLibDir/shortlib"
             || -s "$speciesLibDir/shortlib.bsq"
             || -s "$speciesLibDir/shortlib.hmm"
             || -s "$speciesLibDir/shortlib.xps" )
        {

          if ( $tax->isA( $options{'species'}, "rodentia" ) == 1 ) {

            # TODO: Test that this isn't true when -lib is used!!!!
            &runStage(
                    \%options, "identifying most interspersed repeats",
                    $batchIdentifierText,
                    $searchRecipes->{'short_repeats_and_satellites_rodents'},
                    "$speciesLibDir/shortlib",
                    "$file.tmp.sines", $maskfile, $searchEngine,
                    $batchNum, $batcher,           $tax,       $excisions,
                    $numX,     $refineableHashRef, \%refinementHash,
                    "501",     "sines",            $DIRECTORY, $seqDB
            );
          }
          else {
            &runStage(
                       \%options,
                       "identifying most interspersed repeats",
                       $batchIdentifierText,
                       $searchRecipes->{'short_repeats_and_satellites'},
                       "$speciesLibDir/shortlib",
                       "$file.tmp.sines",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "502",
                       "sines",
                       $DIRECTORY,
                       $seqDB
            );
          }
          last if ( $seqDB->getSubtLength() < 15 );
        }    # if ( -s $shortlib )

        if (    -s "$speciesLibDir/longlib"
             || -s "$speciesLibDir/longlib.bsq"
             || -s "$speciesLibDir/longlib.hmm"
             || -s "$speciesLibDir/longlib.xps" )
        {

          # currently long and short together for
          # non-primate/non-rodent mammals
          ##### mask longer rep seqs  #####
          &runStage(
                     \%options,
                     "identifying long interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'long_interspersed_repeats'},
                     "$speciesLibDir/longlib",
                     "$file.tmp.reps",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "551",
                     "longlib",
                     $DIRECTORY,
                     $seqDB
          );
          ## I could run bodies and UTRs separately, ensuring
          ## better coverage at overlapping regions
          ## and allowing to set masklevel to 80 or so for UTRs,
          ## so that oddment extensions are avoided
          last if ( $seqDB->getSubtLength() < 15 );
        }

        if (    -s "$speciesLibDir/mirslib"
             || -s "$speciesLibDir/mirslib.bsq"
             || -s "$speciesLibDir/mirslib.hmm"
             || -s "$speciesLibDir/mirslib.xps" )
        {
          ##### mask MIRs #####
          &runStage(
                  \%options,                "identifying ancient repeats",
                  $batchIdentifierText,     $searchRecipes->{'ancient_repeats'},
                  "$speciesLibDir/mirslib", "$file.tmp.mirs",
                  $maskfile,                $searchEngine,
                  $batchNum,                $batcher,
                  $tax,                     $excisions,
                  $numX,                    $refineableHashRef,
                  \%refinementHash,         "601",
                  "mirs",                   $DIRECTORY,
                  $seqDB
          );

          last if ( $seqDB->getSubtLength() < 15 );
        }    # if ( -s "$speciesLibDir/mirslib....

        #if (    -s "$speciesLibDir/mirlib"
        #     || -s "$speciesLibDir/mirlib.bsq"
        #     || -s "$speciesLibDir/mirlib.hmm"
        #     || -s "$speciesLibDir/mirlib.xps" )
        #{
        #
        #          # This has to do with trying to distinguish
        #          # very low scoring MIRs which get complexity adjusted
        #          # away by previous search.
        #          ##### mask MIRs #####
        #          &runStage(
        #                     \%options,
        #                     "",
        #                     $batchIdentifierText,
        #                     $searchRecipes->{'tough_ancient_repeats'},
        #                     "$speciesLibDir/mirlib",
        #                     "$file.tmp.mir",
        #                     $maskfile,
        #                     $searchEngine,
        #                     $batchNum,
        #                     $batcher,
        #                     $tax,
        #                     $excisions,
        #                     $numX,
        #                     $refineableHashRef,
        #                     \%refinementHash,
        #                     "651",
        #                     "masking",
        #                     $DIRECTORY,
        #                     $seqDB
        #          );
        #          last if ( $seqDB->getSubtLength() < 15 );
        #        }    # if ( -s "$speciesLibDir/mirlib...

        if (    -s "$speciesLibDir/retrolib"
             || -s "$speciesLibDir/retrolib.bsq"
             || -s "$speciesLibDir/retrolib.hmm"
             || -s "$speciesLibDir/retrolib.xps" )
        {
          ##### mask retroviral internal sequences #####
          &runStage(
                     \%options,
                     "identifying retrovirus-like sequences",
                     $batchIdentifierText,
                     $searchRecipes->{'retroviruses'},
                     "$speciesLibDir/retrolib",
                     "$file.tmp.retro",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "701",
                     "masking",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );
        }

        #if ( $tax->isA( $options{'species'}, "eutheria" ) == 1 ) {

        # these LINEs are not scanned in marsupials; perhaps will
        # find LINEs to put in later
        ##### mask undetected LINE1 bodies #####
        #  &runStage(
        #             \%options,
        #             "identifying tough LINE1s",
        #             $batchIdentifierText,
        #             $searchRecipes->{'tough_line1s_in_eutheria'},
        #             "$generalLibDir/l1.lib",
        #             "$file.tmp.l1",
        #             $maskfile,
        #             $searchEngine,
        #             $batchNum,
        #             $batcher,
        #             $tax,
        #             $excisions,
        #             $numX,
        #             $refineableHashRef,
        #             \%refinementHash,
        #             "751",
        #             "l1",
        #             $DIRECTORY,
        #             $seqDB
        #  );
        #}    # if eutheria
      }    # if ( not user supplied lib or species only search
      last if ( $seqDB->getSubtLength() < 15 );
    }    # unless only low complexity sequences are masked

    unless ( $options{'nolow'} ) {

      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "DIVERGED",
                    "",                   "$file.tmp.simple2",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "252",
                    "masking",            $DIRECTORY,
                    $seqDB
      );

    }    # unless low complexity masking is skipped

  }    # end of stages block

  if (
       keys( %refinementHash )
       && (    -s "$speciesLibDir/refinelib"
            || -s "$speciesLibDir/refinelib.bsq"
            || -s "$speciesLibDir/refinelib.hmm"
            || -s "$speciesLibDir/refinelib.xps" )
      )
  {

    # The design goal is to have each repeat class have it's own refinement
    # library.  For now we just search against the same library.
    foreach my $refinementClass ( keys( %refinementHash ) ) {
      print "refining $refinementClass elements";
      if ( $batchIdentifierText ne "" ) {
        print " in " . $batchIdentifierText . "\n";
      }
      else {
        print "\n";
      }

      open OUT, ">$file.unRefinedEles.fa";
      my $index = 0;

      my $refIdx = 0;
      foreach my $ele ( @{ $refinementHash{$refinementClass} } ) {
        print OUT ">ref" . $refIdx++ . " - $ele->{'id'}\n";
        $index++;
        my $seq = $ele->{'seq'};
        $seq =~ s/(\S{50})/$1\n/g;
        $seq .= "\n"
            unless ( $seq =~ /.*\n+$/s );
        print OUT $seq;
      }
      close OUT;

      # At some point we may make each repeat class have
      # it's own refinment lib
      $lib       = "$speciesLibDir/refinelib";
      $minscore  = 180;
      $bandwidth = 40;
      $masklevel = "-masklevel 101";
      $raw       = "-raw";
      $wordraw   = "-word_raw";

      $minmatch = selectParameter( \%options, [ 7, 8, 9, 11 ] );

      # TODO...fix the refinement section
      my $GC = &chooseMatrices( $GC_frac );
      $matrix = "18p" . "$GC" . ".matrix";
      ( $gap_initValue, $ins_gap_extValue, $del_gap_extValue ) =
          ( -30, -6, -5 );

      $maskfile = "$file.unRefinedEles.fa";
      $outfile  = "$file.tmp.ref";
      ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           \%options,         $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
      );

      ## Adjust positions
      for ( my $i = $resultsCollection->size() - 1 ; $i >= 0 ; $i-- ) {
        my $current = $resultsCollection->get( $i );
        my $seqID   = $current->getQueryName();
        my $refIdx;
        if ( $seqID =~ /ref(\d+)/ ) {
          $refIdx = $1;
        }
        else {
          warn "Something went wrong with refinement ids $seqID\n";
        }
        my $refEntry   = $refinementHash{$refinementClass}->[ $refIdx ];
        my $origID     = $refEntry->{'sSeqID'};
        my $start      = $refEntry->{'qStart'};
        my $end        = $refEntry->{'qEnd'};
        my $origLen    = $refEntry->{'len'};
        my $parentID   = $refEntry->{'id'};
        my $refinedLen =
            $current->getQueryEnd() - $current->getQueryStart() + 1;

        if ( $origLen - $refinedLen > 10 ) {
          # Unlikely candidate -- does not fully align
          $resultsCollection->remove( $i );
        }
        else {
          $current->setQueryName( "refinement" );
          $current->setId( "[$parentID]" );
        }
      }

      $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );
      unlink( $maskfile ) unless ($DEBUG);
      unlink( "$maskfile.log" );

    }
  }

  if ( <$file.tmp.*> ) {
    systemint( "cat $file.tmp.* > $file.cat" );
  }

  # Once and for all let's create a sorted cat file...what do you say?
  my $combinedSearchResults =
      CrossmatchSearchEngine::parseOutput( searchOutput => "$file.cat" );
  $combinedSearchResults->sort(
    sub ($$) {
      (    ( !( $_[ 0 ]->getQueryName() cmp "refinement" ) )
        || ( $_[ 0 ]->getQueryName() cmp $_[ 1 ]->getQueryName() )
        || ( $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart() )
        || ( $_[ 0 ]->getLineageId() cmp $_[ 1 ]->getLineageId() ) );
    }
  );

  $combinedSearchResults->write( "$file.cat", SearchResult::AlignWithQuerySeq );

  return;
}
##-------------------------------------------------------------------------##
## Use:  my $matrix = &chooseMatrices( $GC_frac );
##
##
##  Returns
##
##   Globals Used: None
##-------------------------------------------------------------------------##
sub chooseMatrices {
  my $GC_frac = shift;

  my $matrix = "";

  if ( $GC_frac <= 36 ) {
    $matrix = "35g";
  }
  elsif ( $GC_frac <= 38 ) {
    $matrix = "37g";
  }
  elsif ( $GC_frac <= 40 ) {
    $matrix = "39g";
  }
  elsif ( $GC_frac <= 42 ) {
    $matrix = "41g";
  }
  elsif ( $GC_frac <= 44 ) {
    $matrix = "43g";
  }
  elsif ( $GC_frac <= 46 ) {
    $matrix = "45g";
  }
  elsif ( $GC_frac <= 48 ) {
    $matrix = "47g";
  }
  elsif ( $GC_frac <= 50 ) {
    $matrix = "49g";
  }
  elsif ( $GC_frac <= 52 ) {
    $matrix = "51g";
  }
  else {
    $matrix = "53g";
  }

  return ( $matrix );
}

##-------------------------------------------------------------------------##
## Use:  my addToRefinementCollection( $resultsCollection );
##
##    IN DEVELOPMENT.  This routine decides if a result should be
##    be refined further ( by another search against a refinement library ).
##
##    $refinementHash =  { 'SINE/Alu' => [ { 'seed' => SearchResult,
##                                           'seq' => '' }, { .. } ] }
## Now:
##    $refinementHash =  { 'SINE/Alu' => [ { 'fasta_id' => "qId:qStart-...",
##                                           'id' => "m_b1s328i38",
##                                           'seq' => '' }, { .. } ] }
##
##  Returns
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub addToRefinementCollection {
  my $current        = shift;
  my $refinementHash = shift;
  my $seqDB          = shift;
  my $batchNum       = shift;
  my $stage          = shift;
  my $elementIdx     = shift;
  my $adjStart       = shift;
  my $adjEnd         = shift;
  my $excisions      = shift;

  # Get element fields
  my $subjName = $current->getSubjName();
  my $seqID    = $current->getQueryName();
  my $qStart   = $current->getQueryStart();
  my $qEnd     = $current->getQueryEnd();
  my $parentId = $current->getId();

  # Break up the name in to a classification Type/Subtype
  my ( $sName, $classification ) = ( $subjName =~ /(.*)\#(.*)/ );

  die "RepeatMasker: Error while parsing $subjName into "
      . "( $sName#$classification )\n"
      if ( $classification eq "" || $sName eq "" );

  # Add to class/subclass refinement collection
  my $refColl = $refinementHash->{$classification};
  if ( !defined $refColl ) {
    $refColl = [];
    $refinementHash->{$classification} = $refColl;
  }
  my $querySeq = $current->getQueryString();
  if ( $querySeq eq "" ) {
    die "Search engine did not return an alignment for an annotation!\n";

    # Search engine did not return the alignment so we must
    # grab it ourselves from the DB -- can't anymore, we have already
    # clipped out this element ( if it's in a cut stage ).
    #$querySeq = $seqDB->getSubstr( $seqID, $qStart - 1, $qEnd - $qStart + 1 );
  }
  $querySeq =~ s/-//g;

  ##
  ## Remove excision marker "x" from the alignment data.  This alignment marker is
  ## no longer useful in prohibiting overextension and should not be aligned
  ## to the sequence.  It would have been easier to simply remove the "x" from the
  ## sequence, however nhmmer/nhmmscan convert the "X" into an "N" before printing
  ## the alignment.  Only recourse is to look for recorded excisions:
  ##
  if ( $excisions->{$seqID} ) {
    my @markerRemoval = ();

    # Loop over previous cut out elements for this $seqID
    my $internalBegin  = -1;
    my $internalEnd    = -1;
    my $newResultBegin = $qStart;

    foreach my $hit ( @{ $excisions->{$seqID} } ) {
      my $hitBegin = $hit->[ 1 ];
      my $hitEnd   = $hitBegin + $hit->[ 2 ] - 1;
      last if ( $hitBegin > $qEnd );
      next if ( $hitBegin < $qStart );
      next
          if (    $hitBegin == $qStart
               && $hitEnd == $qEnd );

      $internalBegin = $hitBegin;
      $internalEnd   = $hitEnd;

      #print "Found internal: $internalBegin - $internalEnd\n"
      #    if ( $DEBUG );
      if ( ( $internalBegin - 1 ) > $newResultBegin ) {
        if ( substr( $querySeq, $internalBegin - $qStart, 1 ) =~ /[xn]/i ) {
          push @markerRemoval, ( $internalBegin - $qStart );
        }
      }
      $newResultBegin = $hitEnd + 1;
    }
    if ( @markerRemoval ) {
      foreach my $marker ( sort { $b <=> $a } @markerRemoval ) {

        #print "Removing marker $marker\n";
        substr( $querySeq, $marker, 1 ) = "";
      }

      #print "querySeq = $querySeq ( " . length($querySeq) . " )\n";
    }
  }
  my $len = length( $querySeq );

  #my $fastaID = "$seqID:$adjStart-$adjEnd-$len-$parentId-$subjName";
  # If 'seq' is blank the sequence can be obtained from the seed result.
  push @$refColl,
      {
        'seed'   => $current,
        'qSeqID' => $seqID,
        'qStart' => $adjStart,
        'qEnd'   => $adjEnd,
        'len'    => $len,
        'sSeqID' => $sName,
        'id'     => $parentId,
        'seq'    => $querySeq
      };

}

##-------------------------------------------------------------------------##
## Use:  my isRefineable( $searchResult );
##
##    This routine returns the refineable status
##    given an element.  A refineable element can
##    be searched against a highly detailed library
##    to return a more refined set of alignments.
##
##  Returns
##
##        1 = true, 0 = false.
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub isRefineable {
  my $result            = shift;
  my $refineableHashRef = shift;

  my $name = $result->getSubjName();
  my ( $id, $classification ) = ( $name =~ /(.*)\#(.*)/ );
  if ( defined $refineableHashRef->{$id} ) {
    return 1;
  }
  return 0;
}

##-------------------------------------------------------------------------##
## Use:  my preMaskLevelFilter( $resultsCollection );
##
##
##  Returns
##
##    Currently handles cases where we want to filter out results
##    prior to applying the masklevel filter.  Right now this
##    only applies to LAVA and their problematic Alu sequences.
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub preMaskLevelFilter {
  my $searchResults = shift;

  my $saveDEBUG = $DEBUG;
  $DEBUG = 0;
  print "preMaskLevelFilter( \$searchResults ); Called\n"
      if ( $DEBUG );

  if ( 0 && $DEBUG ) {
    print "Original Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }

  my @deleteList = ();
  for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {

    my $current = $searchResults->get( $i );
    my $name    = $current->getSubjName();
    my $begin   = $current->getSubjStart();
    my $end     = $current->getSubjEnd();

    # SVA Contains a bit of Alu.  Originally we searched SVA in
    # sinecutlib along with Alu sequences so that we could
    # compete the Alu with the internal SVA alu-like sequences
    # ( which are diverged enough to fairly compete ).  We
    # also checked for and removed fragments that only match
    # the SVA region which is nearly identical to LTR5 int-ltr
    # pairs.
    # Now we have SVA ( and it's cousin LAVA ) in shortcut lib
    # and buffer the Alu matches in sinecut lib.  In the step
    # below we remove the SVA fragments aligned only to the
    # and LTR5 regions.
    if (
      $name =~ /^SVA/
      && (
           $begin >= 855     && $end < 1270    #  End of the internal
                                               #    LTR5 sequence
           || $begin >= 1263 && $end < 1372
      )                                     #  LTR5 LTR sequence
        )
    {
      print
"Deleting annotation ($i): preMaskFilter #1 - SVA LTR5 misidentification\n"
          if ( $DEBUG );
      push @deleteList, $i;
    }
    elsif (    $name =~ /^LAVA/
            && $current->getScore() < 500 )
    {
      print "Deleting annotation ($i): preMaskFilter #2 - LAVA false positive\n"
          if ( $DEBUG );
      push @deleteList, $i;
    }
  }
  if ( @deleteList ) {
    print "Doing actual removals:\n" if ( $DEBUG );
    foreach my $index ( sort { $b <=> $a } @deleteList ) {
      print "Removing element $index\n" if ( $DEBUG );
      $searchResults->remove( $index );
    }
  }

  # DEBUG output
  if ( 0 && $DEBUG ) {
    print "Final Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }
  $DEBUG = $saveDEBUG;
}

##-------------------------------------------------------------------------##
## Use:  my filterResults( \%options, $what, $fragCnt, $lib,
##                  $resultsCollection, $tax );
##
##
##  Returns
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub filterResults {
  my %options       = %{ shift() };
  my $chooseClass   = shift;
  my $fragCnt       = shift;
  my $lib           = shift;
  my $searchResults = shift;
  my $tax           = shift;

  my $saveDEBUG = $DEBUG;
  $DEBUG = 0;
  print "filterResults( \%options, $chooseClass, $fragCnt,"
      . " $lib, \$searchResults ); Called\n"
      if ( $DEBUG );

  my (
       $printed_one,   $lastoneisfineorf2, $lastoneisfine3utr,
       $lastonecut,    $delayprint,        $lastlineName,
       $lastlinebegin, $lastlineend,       $lastlineleft
  );

  my $undelPrevious = undef;
  my $previous;
  my $current;
  my $next;

  my @deleteList = ();

  # DEBUG output
  if ( $DEBUG ) {
    print "Original Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }

  #
  # Modify search results and flag annotations for deletion
  #
  for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {

    $current = $searchResults->get( $i );

    my $name = $current->getSubjName();

    # Look for "#buffer" annotations and remove them.  Buffer
    # sequences are sequence (fragments) thrown into a library
    # to protect a sequence from being matched.  This is often
    # used when there is some overlap between libraries.  Matches
    # to #buffer sequences are more likely elements which will
    # be checked at a later stage.
    if ( $name =~ /\#buffer/ ) {    # like MT2B in rodcutsines.lib
      print "Deleting annotation ($i): It's a #buffer sequence.\n"
          if ( $DEBUG );
      push @deleteList, $i;
      next;
    }

    # Grab the current line of data into variables for easy reference
    my $score = $current->getScore();

    # This change works around a problem with NHMMSCAN allowing huge
    # insertions in an alignment.  The percIns goes through the roof
    # and we got division by zero errors here.
    my $diverge;
    if ( ( 100 - $current->getPctInsert() ) <= 0 ) {
      $diverge = $current->getPctDiverge();
    }
    else {
      $diverge = 100 *
          ( $current->getPctDiverge() / ( 100 - $current->getPctInsert() ) );
    }

    my $gaps          = $current->getPctDelete() + $current->getPctInsert();
    my $queryleftover = $current->getQueryRemaining();
    my $reverse       = 0;
    if ( $current->getOrientation() eq "C" ) {
      $reverse = 1;
    }
    my $begin = $current->getSubjStart();
    my $end   = $current->getSubjEnd();
    my $left  = $current->getSubjRemaining();

    #
    # Simple filter on divergence level
    #
    if ( $options{'div'} && $diverge > $options{'div'} ) {
      print "Deleting annotation ($i): Divergence too high.\n" if ( $DEBUG );
      push @deleteList, $i;
      next;
    }

    # Only consider previous elements if they are
    # contained in the same sequence.  Also do not
    # consider previous elements if they were marked
    # for deletion.
    my $havePreviousElement = 0;
    if ( $i > 0 ) {
      $havePreviousElement = 1;
      $previous            = $searchResults->get( $i - 1 );
      if ( $current->getQueryName() ne $previous->getQueryName()
           || ( @deleteList && $deleteList[ $#deleteList ] == $i - 1 ) )
      {
        $havePreviousElement = 0;
        $previous            = undef;
      }
    }

    # Similarly for next elements
    # except elements can't have been deleted yet
    my $haveNextElement = 0;
    if ( $i < $searchResults->size() - 1 ) {
      $next = $searchResults->get( $i + 1 );
      $haveNextElement = 1 if $current->getQueryName() eq $next->getQueryName();
    }

    #
    # Class cut2 lib : CASE #2
    #
    if ( $chooseClass eq 'cut2' ) {

      # cut2.lib
      # cutting out youngish LINE1 3' ends; a tricky business
      # but can have a large (positive) effect only the younger
      # subset of elements in the repeat library are cut; the
      # other consensus seqs are only there to correctly classify
      # matches

# this (high) limit prevents cutting out ancient elements
# misassigned as young elements
#
# WARNING: This only looks back one.  So...in a case like this:
#
#  Original Annotations:
#  #0:  511 15.96 0.00 0.00 CebAlb_disco_line_24154 693 786 (615) C L1PA8_3end#LINE/L1 (1) 918 825
#  #1:  341 24.37 9.92 0.84 CebAlb_disco_line_24154 693 811 (590) C L1PA8A_3end#LINE/L1 (0) 878 748
#  #2:  508 19.81 0.00 0.00 CebAlb_disco_line_24154 693 798 (603) C L1PA7_3end#LINE/L1 (1) 900 795
#
#  Deleting annotation (1): Filter #9 - < 75% identical.
#  Doing actual removals:
#  Removing element 1
#  Final Annotations:
#  #0:  511 15.96 0.00 0.00 CebAlb_disco_line_24154 693 786 (615) C L1PA8_3end#LINE/L1 (1) 918 825
#  #1:  508 19.81 0.00 0.00 CebAlb_disco_line_24154 693 798 (603) C L1PA7_3end#LINE/L1 (1) 900 795
#
# This should be fixed.
#
      if ( $diverge + $gaps < 25 ) {

        if ( $name =~ /_3end\#/ && $left < 20 ) {

          # If we overlap with the previous element by more than 20bp
          if (    $havePreviousElement
               && $previous->getQueryEnd() > $current->getQueryStart() + 20 )
          {

            # Delete this element if we are contained by the previous element
            # or if the previous element was cut.
            # not necessary to look at next element:
            # longer extending element is always presented first
            if (    $previous->getQueryEnd() >= $current->getQueryEnd()
                 || $lastonecut )
            {
              $lastoneisfine3utr = "";
              $lastonecut        = 0;
              print "Deleting annotation ($i): Filter #3 - Contained\n"
                  . "by another annot.\n"
                  if ( $DEBUG );
              push @deleteList, $i;
            }
            ## Overlap of body and 3' end
            # If the previous element was a good body (orf2) and this is
            # in the correct orientation (forward).
            elsif ( $lastoneisfineorf2 && !$reverse ) {
              print "Modifying annotation ($i): Filter #4 - Extending \n"
                  . "line 3' end to include body.\n"
                  if ( $DEBUG );
              if ( $previous->getScore() > $score ) {

                # body scored better so use it's score and pctSub
                $current->setScore( $previous->getScore() );
                $current->setPctDiverge( $previous->getPctDiverge() );
              }

              # Include previous element boundries
              $current->setQueryStart( $previous->getQueryStart() );
              my $tmpName = $name;
              $tmpName =~ s/\#/extended\#/;
              $current->setSubjName( $tmpName );    # used in ProcessRepeats
              $current->setSubjBegin( $lastlinebegin );
              if ( $lastoneisfineorf2 eq 'L1P_orf2' ) {
                $current->setSubjEnd( $end + 3144 );
              }
              else {  # currently only rodent as alternative; need to generalize
                $current->setSubjEnd( $end + 4384 );
              }

              # Remove the body annotation and keep the 3' extended annotation.
              $lastonecut        = 1;
              $lastoneisfine3utr = "";
              print "Deleting annotation ($i-1): Filter #4 - Body assimilated\n"
                  . " by 3' end annot.\n"
                  if ( $DEBUG );
              push @deleteList, $i - 1;
            }
            else {

              # Some freakish combination remove it
              $lastonecut        = 0;
              $lastlinebegin     = "";
              $lastoneisfineorf2 = "";
              print "Deleting annotation ($i-1): Filter #5 - Unknown 3' "
                  . "overlap combination.\n"
                  if ( $DEBUG );
              push @deleteList, $i;
            }
          }    # If we overlap with the previous element by more than 20bp...
               # Overlap <= 20 and several other factors.
               # L1 ORFs and body consensus sequences are built with a 150bp
               # overlap.  If the alignment starts at > 150bp then it truely
               # matches the 3'end part.
          elsif (
            ( $begin > 150 || !$reverse && $name =~ /L1PA\d\#|L1Hs|L1Rn|L1Md/ )

            # L1PA, L1Hs, L1Rn...are young elements. It's safe to assume
            # that no extensions will be found with a shorter wordlength.
            # Therefore clip just the 3' consensus portion out as an
            # independent 3' end.
            && (    !$fragCnt
                 || $current->getQueryStart() > 50 && !$reverse
                 || $queryleftover > 50            && $reverse )

            # Handle the case where this end could be extended but its
            # to close to a boundry ( of a fragmented sequence ) to tell.
              )
          {

         # at the edge of query fragments (< 50 bp left); extension may be found
         # in overlapping fragment
            if (    $havePreviousElement
                 && $previous->getQueryEnd() >= $current->getQueryEnd() )
            {

              #accidental overlap < 20 bp
              if ( !$reverse || $lastonecut ) {
                print "Modifying annotation ($i): Filter #5a - 3' end "
                    . "start adjustment\n"
                    . " (accidental overlap)\n"
                    if ( $DEBUG );
                $current->setQueryStart( $previous->getQueryEnd() + 1 );
              }
            }
            $lastonecut        = 1;
            $lastoneisfine3utr = "";
          }
          else {

            # Ok....this is strange.  If this is set....and for some
            # reason a body is not quickly located...this annotation
            # will never be clipped.
            $lastoneisfine3utr = 1 if $reverse;
            $lastonecut        = 0;

            # Strange that we don't keep it if its a singleton
            print "Deleting annotation ($i): Filter #6 - "
                . "Assimilation or singleton\n"
                if ( $DEBUG );
            push @deleteList, $i;
          }
        }    # if ($name =~ /_3end\#/...
        elsif (
             $name =~ /_orf2/
          && $begin > 100
          && $left < 20

          # only the younger L1 body consensi are suffixed '_orf2';
          # they are all 5' shortened in part because no 5' ends
          # (which overlap with the full consensi) are included
          # in this comparison only matches labeled 'extended'
          # are clipped, wich are treated separately in ProcessRepeats
          && ( !$fragCnt || $current->getQueryStart() > 50 )
            )
        {

          # forward: may extend into previous frag; reverse: lack of
          # complete 3' UTR guaranteed
          if (    $havePreviousElement
               && $lastoneisfine3utr
               && $previous->getQueryEnd() > $current->getQueryStart()
               && $reverse
               && ( !$fragCnt || $queryleftover > 50 ) )
          {
            print "Modifying annotation ($i): Filter #7 - Reverse "
                . "body can be combined with 3'.\n"
                if ( $DEBUG );
            if ( $previous->getScore() > $score ) {
              $current->setScore( $previous->getScore() );
              $current->setPctDiverge( $previous->getPctDiverge() );
            }
            $current->setQueryStart( $previous->getQueryStart() );
            $current->setSubjName( ( $lastlineName =~ s/\#/extended\#/ ) );
            $current->setSubjLeft( $lastlineleft );
            if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
              $current->setSubjEnd( $lastlineend + 3144 );
            }
            else {    #right now only alternative is rodents;
              $current->setSubjEnd( $lastlineend + 4384 );
            }
            $lastonecut        = 1;
            $lastoneisfineorf2 = "";
          }
          else {
            $lastonecut        = 0;
            $lastoneisfineorf2 = $name if !$reverse;
            $lastlinebegin     = $begin;
            print "Deleting annotation ($i): Filter #7 - Not extended (yet).\n"
                if ( $DEBUG );
            push @deleteList, $i;
          }
          $lastoneisfine3utr = "";
        }
        else {

          # I'm not cutting out 3' UTRs on opposite strand that begin < 150
          # and do not overlap with ORF2
          $lastoneisfine3utr = $lastoneisfineorf2 = "";
          $lastonecut = 0;
          print "Deleting annotation ($i): Filter #8 - A bodyless 3' UTR.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      else {    # if <75% identical
        $lastonecut = 0;
        print "Deleting annotation ($i): Filter #9 - < 75% identical.\n"
            if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # Class cut2.lib
         #
         # Class mirs
         #
    elsif ( $chooseClass eq 'mirs' ) {

      my $classnow  = $name;
      my $classthen = $lastlineName;
      $classnow  =~ s/.+\#//;
      $classthen =~ s/.+\#//;

      # Mirslib:
      # The MIR and MIR3 SINEs share 51 and 78 bp (inexactly) with the L2
      # and L3 LINEs, respectively. Occasionally, a fragment is matched to
      # both a SINE and a LINE, confusing ProcessRepeats and often extending
      # the match (and the masking) too far.  This happens quite often
      # > 1,000 times in HG19.
      #
      # Here is an example:
      # 392  28.7 19.6  3.2  chr1 65671617 65671936 +  L2a  3022 3393   (33)
      # 191  25.0  0.0  0.0  chr1 65671907 65671942 +  MIR   206  241   (21)
      #
      # Ratios appear to hold for nhmmer ( see linesine.fa example sequence ).
      #
      if (    $name =~ /^L2a|^L2b|^L3|^MIR/i
           && $havePreviousElement
           && $previous->getQueryEnd() > $current->getQueryStart() + 33
           && $classnow ne $classthen )
      {
        if (
             $current->getQueryEnd() - $previous->getQueryEnd() <= 30
             && (    $previous->getScore() >= 1.05 * $score
                  || $current->getQueryEnd() - $previous->getQueryEnd() <
                  $current->getQueryStart() - $previous->getQueryStart()
                  && $previous->getScore() >= 0.83 * $score )
            )
        {
          print "Deleting annotation ($i): Filter #9a - "
              . "Preceeding L2/L3/Mir better.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif (
                $current->getQueryStart() - $previous->getQueryStart() <= 30
                && (    $score >= 1.05 * $previous->getScore()
                     || $current->getQueryEnd() - $previous->getQueryEnd() >
                     $current->getQueryStart() - $previous->getQueryStart()
                     && $score >= 0.83 * $previous->getScore() )
            )
        {
          print "Deleting annotation ($i-1): Filter #9a - "
              . "Following L2/L3/Mir better.\n"
              if ( $DEBUG );
          push @deleteList, $i - 1;
        }
      }
      else {

        # The crossmatch/blast thresholds for the mirslib are too
        # low for this one element.  NHMMSCAN uses per-model threshold
        # so we should not filter if using NHMMSCAN.
        # NOTE: This element is also in shortlib/shortcutlib
        if (    $options{'engine'} ne "hmmer"
             && $name =~ /MER91/
             && $score < 200 )
        {
          print "Deleting annotation ($i): Filter #9b - A poor MER91.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
    }    #class mirs
         #
         # Class sines
         #
    elsif ( $chooseClass eq 'sines' ) {

      if ( $name =~ /^MLT2B/ ) {

        # Occasionally the match to Ricksha is broken up by a gap
        # while spanned in one piece  by a (much more diverged) match to MLT2
        if (
             (
                  $havePreviousElement
               && $previous->getSubjName() =~ /^Ricksha/
               && $current->getQueryStart() < $previous->getQueryEnd() - 50
               && $score < $previous->getScore()
             )
             || (    $haveNextElement
                  && $next->getSubjName() =~ /^Ricksha/
                  && $current->getQueryEnd() > $next->getQueryStart() + 50
                  && $score < $next->getScore() )
            )
        {
          print
"Deleting annotation ($i): Filter \#9c - MLT2 really is Ricksha.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      elsif (    $name eq 'MER3'
              && $current->getSubjStart() > 30
              && $current->getSubjEnd() < 125 )
      {

# MER3 and MER33 share TIRs, but MER3 has 47 bp extraTIReal seqeunce at the 5' end
# this sometimes leads to MER3 and MER33 alignments overlapping because
# the alignments can be (falsely) extended a few bases further to the MER3 consensus
        if (
             (
                  $havePreviousElement
               && $previous->getSubjName()   eq 'MER33'
               && $current->getOrientation() eq 'C'
               && $current->getQueryEnd() - $previous->getQueryEnd() < 20
             )
             || (    $haveNextElement
                  && $next->getSubjName()       eq 'MER33'
                  && $current->getOrientation() eq '+'
                  && $next->getQueryStart() - $current->getQueryStart() < 20 )
            )
        {
          print
              "Deleting annotation ($i): Filter \#9d - MER3 really is MER33.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }

      # This was handled before the divergence filter.

      if ( $tax->isA( $options{'species'}, "rodentia" ) == 1 ) {

        #
        # TODO:  For now we don't have a HMM models for rodent.  When
        #        we do...we need to revisit the score ratios used below.
        #
        # Similar to above MER3/MER33 case, B4 contains a B1 and alignments
        # can continue a few meaningless bases beyond a B1
        if (    $havePreviousElement
             && $name                    =~ /^B1/
             && $previous->getSubjName() =~ /^B4/
             && $current->getQueryStart() < $previous->getQueryEnd() - 50
             && $score > 1.5 * $previous->getScore()
             && $previous->getSubjStart() >= 80 )
        {

          # deleted requirement of score > 226; this was in original code
          # as the B4 element would otherwise not be considered anyway
          print "Deleting annotation ($i-1): Filter \#10a.\n" if ( $DEBUG );
          push @deleteList, $i - 1;
        }
        elsif (    $haveNextElement
                && $name                =~ /^B4/
                && $next->getSubjName() =~ /^B1/
                && $next->getQueryStart() < $current->getQueryEnd() - 50
                && $next->getScore() > 1.5 * $score
                && $begin >= 80 )
        {
          print "Deleting annotation ($i): Filter \#10b.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif ( $name =~ /SINE/ && $begin <= 100 )
        {    # check for false positives ID??
              # instead of setting $score < 225" in parameters, here we
              # set the cutoff for all none-SINEs allows SINEs with score
              # 200-225 to get masked
              # TODO: Check...this should never happen these days
          if ( $name eq "RSINE1" && $begin > 50 && $score < 225 ) {
            print "Deleting annotation ($i): Filter \#10bb.\n" if ( $DEBUG );
            push( @deleteList, $i );
          }

          # TODO: Is this even correct...we don't use getOverlap here?
        }
        elsif ( $name =~ /^ORR1.*-int/ && $current->getOverlap() eq '*' ) {

          # location of partially masked B1 in consensus;
          # only skipped if there is a better match overlapping it
          # this may occasionally leave a genuine small bit of
          # ORR1 internal unmasked if it overlaps with something
          # else than a B1
          # (MaLR internals are screened in the shortlib session)
          if (    $name =~ /^ORR1A1-int/ && $begin > 530 && $end < 680
               || $name =~ /^ORR1A3-int/ && $begin >= 595 && $end <= 910
               || $name =~ /^ORR1B1-int/ && $begin >= 585 && $end <= 860 )
          {
            print "Deleting annotation ($i): Filter #11.\n" if ( $DEBUG );
            push @deleteList, $i;
          }
        }
        elsif (    $name eq "RatSatRep1"
                && $begin > 1150
                && $end < 1750
                && $diverge > 16 )
        {

# this fragment matches a retroviral internal sequence (screened only in the next step)
          print "Deleting annotation ($i): Filter #12.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif ( $score < 225 ) {
          print "Deleting annotation ($i): Filter #13.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif ( $score <= 350 ) {

       # bits of low complexity in consensi that lead to matches in reversed DNA
          if (
               $name =~ /^RLTR21/   && $begin > 1090 && $end < 1365
            || $name =~ /^RMER12\#/ && $begin > 750  && $end < 1025
            || $score < 310 && (
              $name =~ /^RMER17/ && (
                   $name =~ /^RMER17A\#/  && $begin > 205 && $end < 460
                || $name =~ /^RMER17A2\#/ && $begin > 290 && $end < 495
                || $name =~ /^RMER17B\#/  && $begin > 840 && $end < 940
                ||    # that's an unexpected one
                   $name =~ /^RMER17B\#/  && $begin > 250 && $end < 445
                || $name =~ /^RMER17C\#/  && $begin > 80  && $end < 250
                || $name =~ /^RMER17D\#/  && $begin > 245 && $end <= 500
                || $name =~ /^RMER17D2\#/ && $begin > 255 && $end <= 525
              )
              || $name =~ /^RNLTR15A/
              && (    $name =~ /^RNLTR15A\#/ && $begin > 1440 && $end < 1550
                   || $name =~ /^RNLTR15A2\#/ && $begin > 1310 && $end < 1420 )
            )
              )
          {
            print "Deleting annotation ($i): Filter #14.\n" if ( $DEBUG );
            push @deleteList, $i;
          }
        }
      }
      elsif ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
        if (  $name =~ /^LTR66/ && $begin > 380 && $end < 470
           || $name =~ /^MER45R/ && $score < 275 && $begin > 310 && $end < 400 )
        {
          print "Deleting annotation ($i): Filter #15.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
      }
    }    # class sines
         #
         # Class longlib
         #
         # Low complexity regions of LINEs.  NHMMSCAN masks out these
         # sections of the model so we do not need to filter out
         # hits like this.
    elsif ( $options{'engine'} ne "hmmer" && $chooseClass eq 'longlib' ) {

      if (
              $name =~ /L1MC3_3end/ && $begin > 1310 && $end < 1400
           || $name =~ /L1MC4_3end/ && $begin > 1325 && $end < 1475
           || $name =~ /^Lx9/       && $begin > 1860 && $end < 1940
           || $score <= 300
           && (    $name =~ /^L1_Mur2\#/ && $begin > 825 && $end < 932
                || $name =~ /L1MCa_5end/ && $begin > 1880 && $end < 1980
                || $name =~ /^L1_Mur3\#/ && $begin > 835  && $end < 905 )
          )
      {
        print "Deleting annotation ($i): Filter #16.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class longlib
         #
         # Class l1
         #
         # Low complexity regions of LINEs.  NHMMSCAN masks out these
         # sections of the model so we do not need to filter out
         # hits like this.
    elsif (    $options{'engine'} ne "hmmer"
            && $chooseClass eq 'l1'
            && $score < 425 )
    {    # run with -raw

      if (
           $name =~ /^HAL1\#/    && $begin > 900  && $end < 1350 && $score < 375
        || $name =~ /^HAL1b\#/   && $begin > 695  && $end < 910  && $score < 350
        || $name =~ /L1M4_orf2/  && $begin > 700  && $end < 1340 && $score < 375
        || $name =~ /L1MC3_3end/ && $begin > 1275 && $end < 1430 && $score < 350
        || $name =~ /L1MC4_3end/ && $begin > 1260 && $end < 1490 && $score < 400
        || $name =~ /L1MC4_5end/ && $begin > 850  && $end < 1340 && $score < 415
        || $name =~ /L1MCa_5end/ && $begin > 1855 && $end < 1995 && $score < 350
        || $name =~ /L1M4b_5end/ && (    $begin > 2800 && $end < 3350
                                      || $begin > 2180 && $end < 2300 )
        && $score < 400
        || $name =~ /L1MDa_5end/ && $begin > 2420 && $end < 2505 && $score < 350
        || $name =~ /L1ME4a_3end/ && $begin > 475 && $end < 725 && $score < 350
        || $name =~ /L1MEb_5end/  && $begin > 620 && $end < 800 && $score < 350
        || $name =~ /L1MEc_5end/ && $begin > 2100 && $end < 2500 && $score < 360
          )
      {
        print "Deleting annotation ($i): Filter #17.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class l1
         #
         # Class simple
         #
    elsif ( $chooseClass eq "simple" ) {

      # TODO: Check that now in the TRF era we are handling this correctly.
      #       My feeling is that we need to check for TTTT|(T)n now as well.
      if ( $name !~ /AAAAA|\(A\)n/ && $name !~ /Satellite/ ) {

        # avoid cutting out polyA tails
        # decision to cut out simple repeat dependent on the
        # complexity of the unit
        my ( $length, $complex ) = &calcSimpleRepeatComplexityFromName( $name );
        my $cutoff = 16 - 3 * ( $length - $complex );
        if (    $havePreviousElement
             && $previous->getQueryEnd() >= $current->getQueryStart() )
        {

# cutting out overlapping simple repeats leads to problems
# as some nucleotides are cut out twice (some flanking DNA
# will be deleted instead)
# masklevel is set to 1; masklevel 0 would avoid any
# overlaps, but behaves oddly ( only in crossmatch and not our implementation? )
          my $newQueryStart = $previous->getQueryEnd() + 1;
          $current->setQueryStart( $newQueryStart );
        }
        unless (    $name !~ /\([GA]*\)/ && $diverge + $gaps / 1.5 <= $cutoff
                 || $diverge + $gaps < ( $score - 200 ) / 50
                 || $diverge + $gaps <= 5 )
        {
          print "Deleting annotation ($i): Filter #18.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      else {
        print "Deleting annotation ($i): Filter #19.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class simple
         #
         # Class alu
         #
    elsif ( $chooseClass eq "alu" ) {
      if (    $havePreviousElement
           && $previous->getQueryEnd() >= $current->getQueryStart() )
      {

        #we're stuck with a masklevel 1; 0 behaves odd
        my $newfield5 = $previous->getQueryEnd() + 1;

        # Again...see above....but why?
        #$current->setQueryName( quotemeta $current->getQueryName() );
        $current->setQueryStart( $newfield5 );

        #s/(.*$fields[4]\s+)$fields[5](\s$fields[6].*)/$1$newfield5$2/;
      }

      #
      # Ensure the element is full length
      #   For ALUs we are tolerant of a missing Poly-A tail.  The
      #   general case uses consensus remaining to ensure it's near
      #   full length.
      #
      #   FLAM/FRAM/FAM in primates are special cases
      #      They represent the monomers of the Alu sequence.
      #      These models should *not* be clipped out as removing full
      #      length Alus might succeed in uncovering two arms of
      #      another Alu that will score better than two F[L/R]AM
      #      elements. This is avoided by using a buffer for FLAM/FRAM
      #      in sinecutlib.
      #
      #   In non-primate species there are also sines we attempt
      #   to clip out.  Since we do not have the length of these
      #   elements hardcoded we use the begin/left fields to define
      #   what we mean by near-full length.
      #
      unless (    $name =~ /^Alu/ && ( $begin < 70 && $end > 185 )
               || $begin < 6 && $left < 5 )
      {
        print "Deleting annotation ($i): Filter #20.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class alu
         #
         # Class cut1
         #
    elsif ( $chooseClass eq "cut1" ) {

# for non-LINEs criteria are only that 5 bp or less are missing from ends;
# no need to require a maximum divergence, since element is complete (can't get better)
      if (    $begin < 6 && $left < 5 && $name !~ /LINE/
           || $name =~ /SINE/ && $begin < 6 && $left < 20 )
      {
        ## used to be restricted to Alu and SINE/B..;
        if (    $havePreviousElement
             && $previous->getQueryEnd() >= $current->getQueryStart() )
        {
          $current->setQueryStart( $previous->getQueryEnd() + 1 );

          # since both last and current are full length, no overlaps
          #of more than a few bases occur
        }

        # TODO: This looks like we are trying to buffer BC[12]* elements in
        # this stage.  Perhaps it would be better to just buffer them?  See
        # similar comment under the "alu" filter for FAM/FLAM/FRAM.
        if ( ( $name =~ /RNA$/ || $name =~ /^7SL/ ) && $options{'norna'}
             || $name =~ /^BC[12]\S+SINE/ )
        {

          #fixed feb 03; was RNA &&
          print "Deleting annotation ($i): Filter #21. Avoid cutting out RNAs"
              . "when the user has shut this off.  Also if this is a BC[12] SINE "
              . "we don't wish to cut this out in the cut1 phase???\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      else {
        print "Deleting annotation ($i): Filter #22: Non-complete elements "
            . "could overlap with full length elements.  We want to avoid "
            . "shortening full length elements when the evidence is weak.\n"
            if ( $DEBUG );
        push @deleteList, $i;
      }
    }    #class cut1
         #
         # Class alumask
         #
    elsif ( $chooseClass eq "alumask" ) {

      # NO LONGER USED
    }    # class alumask
         #
         # Catchall class
         #
    else {    # anything that give false positives in the other libraries
              # Arian: it's possible that this doesn't matter
              # TODO: Check this.
              # NHMMSCAN certainly doesn't need this
      if ( $options{'engine'} ne "hmmer" && $score < 300 ) {
        if (    $name =~ /PRIMA4-int/ && $begin > 475 && $end < 575
             || $name eq "LTR39-int" && $begin > 3900 && $end < 3990
             || $name eq "LTR38-int" && $begin > 1350 && $end < 1450 )
        {
          print "Deleting annotation ($i): Filter #24.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
      }
    }

    if ( $name =~ /RNA$/ && $options{'norna'} ) {
      print "Deleting annotation ($i): Filter #25.\n" if ( $DEBUG );
      push @deleteList, $i;
    }

    #
    # Contained elements surviving to this point can cause problems in
    # the excision routine.  Ie.
    #    Excising repeat ( step = 4 ): 53276 - 53394
    #    Excising repeat ( step = 4 ): 53262 - 53394
    #       FastaDB::substr - Error index out of bounds!
    #             at ./RepeatMasker line 4589
    # Delete only if in a cut class, has not already been deleted,
    # is in the same sequence, and overlaps at all with the previous
    # undeleted element.
    #
    if (
         $chooseClass =~ /simple|alu|cut1|cut2/
         && ( !@deleteList
              || $deleteList[ $#deleteList ] != $i )
         && $undelPrevious
         && $current->getQueryName() eq $undelPrevious->getQueryName()
         && $undelPrevious->getQueryEnd() >= $current->getQueryEnd()
        )
    {
      print "Deleting annotation ($i): Filter #26 - Contained by filter\n"
          if ( $DEBUG );
      push @deleteList, $i;
    }

    $lastlineName = $name;
    $lastlineleft = $left;
    $lastlineend  = $end;

    $undelPrevious = $current
        if ( !@deleteList
             || $deleteList[ $#deleteList ] != $i );

  }    # for ( my $i = 0 ; $i < $searchResults->size()...

  # Return a boolean indicating if there were hits left
  # after this process completed
  if ( @deleteList ) {

    # Remove duplicates - not sure if the above generates duplicates but
    # to be on the safe side
    my %seen = ();
    my @uniqDeleteList = grep { !$seen{$_}++ } @deleteList;
    print "Doing actual removals:\n" if ( $DEBUG );
    foreach my $index ( sort { $b <=> $a } @uniqDeleteList ) {
      print "Removing element $index\n" if ( $DEBUG );
      $searchResults->remove( $index );
    }
  }

  # DEBUG output
  if ( $DEBUG ) {
    print "Final Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }
  $DEBUG = $saveDEBUG;
}    # sub filterResults

##-------------------------------------------------------------------------##
## Use:  my
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub calcSimpleRepeatComplexityFromName {
  my %num = ( "A", 0, "C", 0, "G", 0, "T", 0 );
  my %log = ( "A", 0, "C", 0, "G", 0, "T", 0 );
  my $name = shift;
  $name =~ s/^\((\w+).*/$1/;
  my $length = length $name;
  my @bases = split( //, $name );
  foreach my $base ( @bases ) {
    ++$num{$base};
  }
  $log{A} = $num{A} * log( $num{A} ) if $num{A};
  $log{C} = $num{C} * log( $num{C} ) if $num{C};
  $log{G} = $num{G} * log( $num{G} ) if $num{G};
  $log{T} = $num{T} * log( $num{T} ) if $num{T};
  my $complex =
      ( $log{A} + $log{C} + $log{G} + $log{T} - $length * log( $length ) ) / -
      log( 4 );
  return ( $length, $complex );
}

##-------------------------------------------------------------------------##
## Use:  my (\%begin, \%end) = getSegments( $resultsCollection );
##
##
##  Returns
##
##    Looks like this was intended to read the search engine
##    output and look for hit lines.
##
##    From these lines it creates two data structures.  One
##    holds a list (in ascending order) of the start positions
##    and one holds the end positions of all hits in the output.
##
##     %begin = { name1 => [ pos1, pos2, pos3, ... ],
##                name2 => [ pos1, pos2, ... ] };
##     %end   = { name1 => [ pos1, pos2, pos3, ... ],
##                name2 => [ pos1, pos2, ... ] };
##
##     revamped to use a SearchResultCollection.
##
##  NO Globals Used
##-------------------------------------------------------------------------##
sub getSegments {
  my $resultsCollection = shift;

  my %begin = ();    # associative arrays which assign lists (begin and ends)
  my %end   = ();    # to each sequence name: Technically, this assigns a list
                     # reference to each sequence name.
  my ( $lastend, $lastname ) = ();
  for ( my $i = 0 ; $i < $resultsCollection->size() ; $i++ ) {
    my $result = $resultsCollection->get( $i );
    my $name   = $result->getQueryName();
    my $begin  = $result->getQueryStart();
    my $end    = $result->getQueryEnd();

    # Blunt end overlapping annotations
    if (    $lastend
         && $name eq $lastname
         && $lastend >= $begin )
    {

      # otherwise get negative lengths
      $begin = $lastend + 1 if ( ( $lastend + 1 ) < $end );
    }

    push @{ $begin{$name} }, $begin;
    push @{ $end{$name} },   $end;
    $lastname = $name;
    $lastend  = $end;
  }
  return ( \%begin, \%end );
}

sub postProcessSearch {
  my $options           = shift;
  my $resultsCollection = shift;
  my $excisions         = shift;
  my $cutResults        = shift;
  my $sentinelLength    = shift;
  my $seqDB             = shift;
  my $optInv            = shift;
  my $outfile           = shift;
  my $refineableHashRef = shift;
  my $refinementHash    = shift;
  my $batchNum          = shift;
  my $stage             = shift;

  my $subroutine = ( caller( 0 ) )[ 0 ] . "::" . ( caller( 0 ) )[ 3 ];

  #
  # The resultsCollection is expected to be in seqID,queryPos (ascending)
  # sorted order.  Therefore we can get SeqID,queryPos(descending) by
  # starting from the end and work backwards.
  #
  my $excisionRanges = new SearchResultCollection();
  my $prevBeg        = 0;
  my $prevName       = "";

  for ( my $i = $resultsCollection->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result = $resultsCollection->get( $i );
    my $name   = $result->getQueryName();
    my $begin  = $result->getQueryStart();
    my $end    = $result->getQueryEnd();

    #
    # Do all functions based on found coordinates
    #

    # Cut or Mask SeqDB
    my $len = $end - $begin + 1;
    if ( $cutResults && !$options{'nocut'} ) {

      # Check for overlaps
      if ( $prevName eq $name && $prevBeg <= $end ) {
        $len = $prevBeg - $begin;

        # Handle special case where something like this occurs:
        #        --------->
        #        ----->
        if ( $len > 0 ) {
          $seqDB->setSubstr( $name, $begin - 1, $len, 'x' x $sentinelLength );
          $excisionRanges->add(
                                new SearchResult(
                                                  queryName  => $name,
                                                  queryStart => $begin,
                                                  queryEnd   => $prevBeg - 1
                                )
          );
        }
      }
      else {
        $seqDB->setSubstr( $name, $begin - 1, $len, 'x' x $sentinelLength );
        $excisionRanges->add(
                              new SearchResult(
                                                queryName  => $name,
                                                queryStart => $begin,
                                                queryEnd   => $end
                              )
        );
      }
    }
    else {
      eval { $seqDB->setSubstr( $name, $begin - 1, $len, 'X' x $len ) };
      if ( $@ ) {
        croak "$subroutine: $@\nAttempting to mask $name from "
            . ( $begin - 1 ) . " to "
            . ( $end )
            . " ( len = $len )\n";
      }
    }
    $prevBeg  = $begin;
    $prevName = $name;

    #  Modify divergence
    #   TODO: Expand on the motivation here. Divergence should not
    #         be spread over large insertions.  It should be related
    #         to the aligned bases only.
    my $percDiv = $result->getPctDiverge();
    my $percIns = $result->getPctInsert();
    my $adjDiv  = $percDiv;
    if ( $percIns < 100 ) {
      $adjDiv = 100 * ( $percDiv / ( 100 - $percIns ) );
    }
    $result->setPctDiverge( sprintf( "%4.2f", $adjDiv ) );

    #
    #  New ID Scheme:
    #      action:  m = Mask, c = Cut
    #      batch:   Batch number
    #      stage:   Search Stage
    #      element: Position in search results
    #
    #  i.e The 3rd element coming from batch 2 and search stage 7 and
    #      was cut would be labeled:  c_b2s7i3
    #
    my $newID;
    if ( $cutResults && !$options{'nocut'} ) {
      $newID .= "c_b";
    }
    else {
      $newID .= "m_b";
    }
    $newID .= $batchNum . "s" . $stage . "i" . $i;
    $result->setId( $newID );

    # Adjust coordinates
    my ( $globalBegin, $globalEnd ) =
        &queryExcision( $result->getQueryName(), $result->getQueryStart(),
                        $result->getQueryEnd(), $excisions, $sentinelLength );

    # RMH: 11/26/12
    # Must run before $result is modified and after new coordinates are
    # obtained.
    if ( &isRefineable( $result, $refineableHashRef ) ) {
      addToRefinementCollection(
                                 $result,      $refinementHash,
                                 $seqDB,       $batchNum,
                                 $stage,       $i,
                                 $globalBegin, $globalEnd,
                                 $excisions
      );
    }

    #print "TRANSLATING: $begin-$end to $globalBegin-$globalEnd\n" if ( $DEBUG );
    $result->setQueryStart( $globalBegin );
    $result->setQueryEnd( $globalEnd );
  }

  # Fragment annotations
  &fragmentAnnotations( $resultsCollection, $excisions, $sentinelLength );

  # Update excise list.
  if ( $cutResults && !$options{'nocut'} ) {
    $excisionRanges->sort(
      sub ($$) {
        (    ( $_[ 0 ]->getQueryName() cmp $_[ 1 ]->getQueryName() )
          || ( $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart() ) );
      }
    );

    &addExcisionSet( $excisionRanges, $excisions, $sentinelLength );
  }

  # Write out results
  if ( $optInv ) {
    $resultsCollection->write( $outfile, SearchResult::AlignWithSubjSeq );
  }
  else {
    $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );
  }

  return;

}

sub addExcisionSet {
  my $resultCollection = shift;
  my $excisions        = shift;
  my $sentinelLength   = shift;

  my $DEBUG      = 0;
  my $subroutine = ( caller( 0 ) )[ 0 ] . "::" . ( caller( 0 ) )[ 3 ];

  my %deleteList = ();

  #
  # Merge overlaping ranges
  #
  # NOTE: This expect a set sorted on queryStart.
  #       Overlaps will not be detected if it
  #       is not sorted.
  #
  my @ranges = ();
  my $i      = 0;
  while ( $i < $resultCollection->size() ) {
    my $result = $resultCollection->get( $i );

    my $id    = $result->getQueryName();
    my $start = $result->getQueryStart();
    my $end   = $result->getQueryEnd();

    my $j = $i + 1;
    while ( $j < $resultCollection->size() ) {
      my $nextResult = $resultCollection->get( $j );
      if (    $id eq $nextResult->getQueryName()
           && $end > $nextResult->getQueryStart() )
      {
        print "$subroutine: Overlap detected: $start-$end and "
            . $nextResult->getQueryStart() . "-"
            . $nextResult->getQueryEnd() . "\n"
            if ( $DEBUG );
        $end = $nextResult->getQueryEnd()
            if ( $end < $nextResult->getQueryEnd() );
        $i++;
      }
      else {
        last;
      }
      $j++;
    }
    push @ranges, [ $id, $start, $end ];
    $i++;
  }

  #
  # Add ranges to excision list
  #
  foreach my $range ( @ranges ) {
    my $id    = $range->[ 0 ];
    my $start = $range->[ 1 ];
    my $end   = $range->[ 2 ];

    print "$subroutine: Adding range: $id:$start-$end ( "
        . ( $end - $start + 1 ) . " )\n"
        if ( $DEBUG );

    my $posAdjustment = 0;

    my $deleteStart = -1;
    my $startPosAdj = -1;
    my $added       = 0;

    if ( defined $excisions->{$id} ) {
      my $i;
      for ( $i = 0 ; $i <= $#{ $excisions->{$id} } ; $i++ ) {
        my $excisionRec = $excisions->{$id}->[ $i ];

        print "$subroutine: Considering excision: $excisionRec->[0]:"
            . "$excisionRec->[1]-$excisionRec->[2] "
            . "( posAdj = $posAdjustment )\n"
            if ( $DEBUG );

        next if ( $excisionRec->[ 0 ] eq "new" );

        if ( $excisionRec->[ 1 ] - $posAdjustment > $end ) {

          # This excision is beyond us. No need to continue.
          print
              "$subroutine: Left of this one. posAdjustment = $posAdjustment\n"
              if ( $DEBUG );
          last;
        }
        elsif ( $excisionRec->[ 1 ] - $posAdjustment <= $start ) {

          # must account for this previous excision
          print "$subroutine: Accounting for prev..\n" if ( $DEBUG );
          $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
        }
        elsif (    $excisionRec->[ 1 ] - $posAdjustment >= $start
                && $excisionRec->[ 1 ] - $posAdjustment <= $end )
        {
          print "$subroutine: Contained. $posAdjustment makes this x sit at:"
              . ( $excisionRec->[ 1 ] - $posAdjustment ) . "\n"
              if ( $DEBUG );

          # Flag for deletion
          $deleteStart = $i if ( $deleteStart == -1 || $deleteStart > $i );
          $excisions->{$id}->[ $i ]->[ 0 ] = "del";

          $startPosAdj = $posAdjustment if ( $startPosAdj == -1 );
          $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
        }
      }    # for all excisions

      if ( $startPosAdj == -1 ) {
        print "$subroutine: Adding to middle or end ( "
            . ( $start + $posAdjustment ) . ","
            . ( $end - $start + 1 ) . " )\n"
            if ( $DEBUG );
        splice( @{ $excisions->{$id} },
                $i, 0, [ "new", $start + $posAdjustment, $end - $start + 1 ] );
        next;
      }
      else {
        print "$subroutine: Splicing out contained startPosAdj = $startPosAdj "
            . "posAdj = $posAdjustment : new "
            . ( $start + $startPosAdj ) . ","
            . ( ( $end + $posAdjustment ) - ( $start + $startPosAdj ) + 1 )
            . "\n"
            if ( $DEBUG );

        # Add just before deletion
        splice(
                @{ $excisions->{$id} },
                $deleteStart,
                0,
                [
                  "new",
                  $start + $startPosAdj,
                  ( $end + $posAdjustment ) - ( $start + $startPosAdj ) + 1
                ]
        );
        next;
      }
    }
    else {
      print "Adding first\n" if ( $DEBUG );
      push @{ $excisions->{$id} }, [ "new", $start, $end - $start + 1 ];
    }
  }    # Results

  foreach my $idKey ( keys( %{$excisions} ) ) {
    for ( my $j = $#{ $excisions->{$idKey} } ; $j >= 0 ; $j-- ) {
      if ( $excisions->{$idKey}->[ $j ]->[ 0 ] eq "del" ) {
        print "Deleting $idKey:$j $excisions->{$idKey}->[$j]->[0] \n"
            if ( $DEBUG );
        splice( @{ $excisions->{$idKey} }, $j, 1 );
      }
      elsif ( $excisions->{$idKey}->[ $j ]->[ 0 ] eq "new" ) {
        $excisions->{$idKey}->[ $j ]->[ 0 ] = "old";
      }
    }
  }
}

sub queryExcision {
  my $id             = shift;
  my $start          = shift;
  my $end            = shift;
  my $excisions      = shift;
  my $sentinelLength = shift;

  my $DEBUG      = 0;
  my $subroutine = ( caller( 0 ) )[ 0 ] . "::" . ( caller( 0 ) )[ 3 ];

  my $posAdjustment = 0;
  my $deleteStart   = -1;
  my $startPosAdj   = -1;

  if ( defined $excisions->{$id} ) {
    for ( my $i = 0 ; $i <= $#{ $excisions->{$id} } ; $i++ ) {
      my $excisionRec = $excisions->{$id}->[ $i ];

      print
"$subroutine: Considering excision: $excisionRec->[0]:$excisionRec->[1]-$excisionRec->[2] "
          . "( posAdj = $posAdjustment )\n"
          if ( $DEBUG );

      if ( $excisionRec->[ 1 ] - $posAdjustment > $end ) {

        # This excision is beyond us. No need to continue.
        print
"$subroutine: This excision is beyond us. No need to consider others.\n"
            if ( $DEBUG );
        last;
      }
      elsif ( $excisionRec->[ 1 ] - $posAdjustment <= $start ) {

        # must account for this previous excision
        print "Accounting for prev.. ( "
            . ( $excisionRec->[ 1 ] - $posAdjustment ) . ")\n"
            if ( $DEBUG );
        $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
      }
      elsif (    $excisionRec->[ 1 ] - $posAdjustment >= $start
              && $excisionRec->[ 1 ] - $posAdjustment <= $end )
      {
        $startPosAdj = $posAdjustment if ( $startPosAdj == -1 );
        $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
        print "Contained.. $startPosAdj, $posAdjustment\n" if ( $DEBUG );
      }
    }
    if ( $startPosAdj > -1 ) {
      return ( $start + $startPosAdj, $end + $posAdjustment );
    }
    else {
      return ( $start + $posAdjustment, $end + $posAdjustment );
    }
  }
  else {
    return ( $start, $end );
  }
}

##-------------------------------------------------------------------------##
## Use: $groupedAnnots = &fragmentAnnotations( $resultsCollection, $excisions,
##                            $numX );
##
## Returns
##
##    Determine which annotations contain previously excised elements and
##    fragment them.
##
##-------------------------------------------------------------------------##
sub fragmentAnnotations {
  my $resultsCollection = shift;
  my $excisions         = shift;
  my $numX              = shift;

  my $saveDEBUG = $DEBUG; 
  $DEBUG = 0;
  my $newSearchResults = SearchResultCollection->new();
  my %deleteHash       = ();
  my @groupedAnnots    = ();
  for ( my $j = 0 ; $j < $resultsCollection->size() ; $j++ ) {
    my $result      = $resultsCollection->get( $j );
    my $resultBegin = $result->getQueryStart();
    my $resultEnd   = $result->getQueryEnd();
    my $queryID     = $result->getQueryName();

    if ( $DEBUG ) {
      print "Considering fragmenting:\n";
      print ""
          . $result->toStringFormatted( SearchResult::AlignWithQuerySeq )
          . "\n";
      print "" . $result->toString . "\n";
    }

    if ( $excisions->{$queryID} ) {

      # Loop over previous cut out elements for this $seqID
      my $internalBegin = -1;
      my $internalEnd   = -1;

      my @subSegmentList = ();
      my $newResultBegin = $resultBegin;

      print "Element boundaries: $resultBegin - $resultEnd\n" if ( $DEBUG );

      foreach my $hit ( @{ $excisions->{$queryID} } ) {
        my $hitBegin = $hit->[ 1 ];
        my $hitEnd   = $hitBegin + $hit->[ 2 ] - 1;
        last if ( $hitBegin > $resultEnd );
        next if ( $hitBegin < $resultBegin );
        next
            if (    $hitBegin == $resultBegin
                 && $hitEnd == $resultEnd );

        # Deal with this one
        $internalBegin = $hitBegin;
        $internalEnd   = $hitEnd;

        print "Found internal: $internalBegin - $internalEnd\n"
            if ( $DEBUG );

        if ( ( $internalBegin - 1 ) >= $newResultBegin ) {
          print "  - Pushing $newResultBegin - " . ( $internalBegin - 1 ) . "\n"
              if ( $DEBUG );
          push @subSegmentList,
              {
                'begin' => $newResultBegin,
                'end'   => $internalBegin - 1
              };
        }

        $newResultBegin = $hitEnd + 1;
      }
      if (    $internalBegin > $result->getQueryStart()
           && $internalEnd < $resultEnd )
      {

        print "last Annotation from "
            . ( $internalEnd + 1 )
            . " to $resultEnd\n"
            if ( $DEBUG );
        push @subSegmentList,
            {
              'begin' => $internalEnd + 1,
              'end'   => $resultEnd
            };
      }

      if ( $#subSegmentList >= 0 ) {
        my $resultSubCollection =
            &createSubElements( $result, \@subSegmentList, $numX );
        $deleteHash{$j} = 1;    # Signal that this element has been fragmented
        my @fragGroup;
        for ( my $i = 0 ; $i < $resultSubCollection->size() ; $i++ ) {
          push @fragGroup, $resultSubCollection->get( $i );
        }
        push @groupedAnnots, [ @fragGroup ];
        $newSearchResults->addAll( $resultSubCollection );

        if ( $DEBUG ) {
          print "Fragmenting Element:\n"
              . $result->toStringFormatted( SearchResult::AlignWithQuerySeq )
              . "\n";
          for ( my $i = 0 ; $i < $resultSubCollection->size() ; $i++ ) {
            print "New Segment:\n";
            print ""
                . $resultSubCollection->get( $i )
                ->toStringFormatted( SearchResult::AlignWithQuerySeq ) . "\n";
          }
        }
      }
      else {
        push @groupedAnnots, [ $result ];
      }
    }
  }    # for

  # Delete all fragment parents
  foreach my $index ( sort { $b <=> $a } keys( %deleteHash ) ) {
    $resultsCollection->remove( $index );
  }

  # Add all fragments to the results collection
  $resultsCollection->addAll( $newSearchResults );

  # Sort the results collection
  $resultsCollection->sort(
    sub ($$) {
      $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart();
    }
  );

  $DEBUG = $saveDEBUG;

  return ( \@groupedAnnots );
}

##-------------------------------------------------------------------------##
## Use: my $fragResults = &createSubElements( $parentElement,
##                                            $subSegmentList,
##                                            $numX );
##
## Results
##
##   Given a SearchResult and a list of start/end positions fragment
##   the element and return a SearchResultCollection containing the
##   new fragments.
##
##-------------------------------------------------------------------------##
sub createSubElements {
  my $parentElement  = shift;
  my $subSegmentList = shift;
  my $numX           = shift;

  my $saveDEBUG = $DEBUG;
  $DEBUG = 0;

  my $parentQueryStart  = $parentElement->getQueryStart();
  my $parentQueryEnd    = $parentElement->getQueryEnd();
  my $parentQueryLength = $parentQueryEnd - $parentQueryStart + 1;

  my $parentQuerySeq = $parentElement->getQueryString() || "";
  my $parentSubjSeq  = $parentElement->getSubjString()  || "";
  my $parentSubjStart = $parentElement->getSubjStart();
  my $parentSubjEnd   = $parentElement->getSubjEnd();
  # Aligned Length
  my $parentSubjLen   = abs( $parentSubjEnd - $parentSubjStart ) + 1;
  # Total Model Length
  my $parentSubjSize = $parentElement->getSubjRemaining() + $parentElement->getSubjEnd();

  my $newResultColl = SearchResultCollection->new();

  my $numSegments = $#{$subSegmentList} + 1;

  my $segSubjStart = $parentSubjStart;
  my $segSubjEnd   = $parentSubjEnd;

  my $realQueryLength = 0;
  for ( my $j = 0 ; $j < $numSegments ; $j++ ) {
    $realQueryLength +=
        $subSegmentList->[ $j ]->{'end'} - $subSegmentList->[ $j ]->{'begin'} +
        1;
  }

  for ( my $j = 0 ; $j < $numSegments ; $j++ ) {

    my $segQueryStart = $subSegmentList->[ $j ]->{'begin'};
    my $segQueryEnd   = $subSegmentList->[ $j ]->{'end'};
    my $segQueryLen   = $segQueryEnd - $segQueryStart + 1;
    print "RepeatMasker::createSubElements: segQueryStart = $segQueryStart,"
        . " segQueryEnd = $segQueryEnd, segQueryLen = $segQueryLen, "
        . " segSubjStart = $segSubjStart segSubjEnd = $segSubjEnd\n "
        if ( $DEBUG );

    # Do not produce very small alignemnts
    # Note: This can create small gaps ( usually in low quality
    #       regions ) that must be taken into account in ProcessRepeats.
    if ( 0 && $segQueryLen < 5 ) {
      print "RepeatMasker::createSubElements: Annotation is to "
          . "small to report ( len = $segQueryLen, numX = $numX, "
          . "len(parentQuerySeq) = "
          . length( $parentQuerySeq ) . " )\n"
          if ( $DEBUG );

      my $adjLen = 0;
      if ( $segQueryLen < 1 ) {

        # No bases...just move down by numX
        $adjLen = $numX;
      }
      else {
        my $bSeen  = 0;
        my $skipTo = 0;
        while (    $bSeen < $segQueryLen
                && $skipTo < length( $parentQuerySeq ) )
        {
          $bSeen++ if ( substr( $parentQuerySeq, $skipTo++, 1 ) ne "-" );
        }
        print "RepeatMasker::createSubElements: skipTo=$skipTo, bSeen=$bSeen\n"
            if ( $DEBUG );
        $adjLen = $skipTo + $numX;
      }

      if ( $adjLen <= length( $parentQuerySeq ) ) {

        # Adjust subject start
        my $sbjBases = substr( $parentSubjSeq, 0, $adjLen );
        $sbjBases =~ s/-//g;
        $segSubjStart += length( $sbjBases );
        $parentQuerySeq = substr( $parentQuerySeq, $adjLen );
        $parentSubjSeq  = substr( $parentSubjSeq,  $adjLen );
      }

      next;
    }

    my $newSegment = $parentElement->clone();
    $newSegment->setQueryStart( $segQueryStart );
    $newSegment->setQueryEnd( $segQueryEnd );
    $newSegment->setQueryRemaining(
         $parentQueryEnd - $segQueryEnd + $parentElement->getQueryRemaining() );


    # If alignment info
    if ( $parentQuerySeq ne "" ) {

      # Count through query seq until we reach the breakpoint ( discount "-" )
      my $seqCount = 0;
      my $i        = 0;
      while ( $seqCount <= $segQueryLen ) {
        $seqCount++ unless ( substr( $parentQuerySeq, $i++, 1 ) eq '-' );
      }

      # Don't include the X/N
      my $newQuerySeq = substr( $parentQuerySeq, 0, $i - 1 );
      my $newSubjSeq  = substr( $parentSubjSeq,  0, $i - 1 );

      $newSegment->setQueryString( $newQuerySeq );
      $newSegment->setSubjString( $newSubjSeq );

      # Set parent to remaining sequence if any
      if ( ( $i + $numX ) <= length( $parentQuerySeq ) ) {
        $parentQuerySeq = substr( $parentQuerySeq, $i - 1 + $numX );
        $parentSubjSeq  = substr( $parentSubjSeq,  $i - 1 + $numX );
      }

      $newSubjSeq =~ s/-//g;

      if ( $parentElement->getOrientation() eq "C" ) {
        # RMH: 8/2025 - One-off bug reported by Pio
        $newSegment->setSubjEnd( $segSubjEnd );
        if ( $newSubjSeq eq "" ) {
          $segSubjEnd = $segSubjStart;
        }
        else {
          $segSubjEnd -= (length( $newSubjSeq ) + 1);
          $segSubjStart = $segSubjEnd + 1 + $numX;
        }
        $newSegment->setSubjRemaining( $parentSubjSize - $newSegment->getSubjEnd() );
        $newSegment->setSubjStart( $segSubjStart );
      }
      else {
        $newSegment->setSubjStart( $segSubjStart );
        if ( $newSubjSeq eq "" ) {
          $segSubjEnd = $segSubjStart;
        }
        else {
          # RMH: 8/2025 - One-off bug reported by Pio
          $segSubjEnd = $segSubjStart + length( $newSubjSeq );
          $segSubjStart += length( $newSubjSeq ) + 1;
        }
        $newSegment->setSubjEnd( $segSubjEnd );
        $newSegment->setSubjRemaining(
            $parentSubjEnd - $segSubjEnd + $parentElement->getSubjRemaining() );
      }

    }
    else {

      # We have no alignment information.  We cannot be sure
      # how many subject characters match this subsegment of
      # the alignment.  Realignment can be too costly so we
      # will guestimate the number.  This is not really proper
      # and we should make a note of this in the output.  Currently
      # ProcessRepeats is doing this!
      my $segSubjLength = 0;
      if ( $j == ( $numSegments - 1 ) ) {
        if ( $parentElement->getOrientation() eq "C" ) {
          $segSubjLength = $segSubjEnd - $parentSubjStart;
        }
        else {
          $segSubjLength = $parentSubjEnd - $segSubjStart;
        }
      }
      else {
        my $percQuerySegLength =
            ( $segQueryEnd - $segQueryStart + 1 ) / $realQueryLength;
        $segSubjLength = int( $parentSubjLen * $percQuerySegLength );
      }

      if ( $parentElement->getOrientation() eq "C" ) {
        $newSegment->setSubjEnd( $segSubjEnd );
        $newSegment->setSubjRemaining(
            $parentSubjEnd - $segSubjEnd + $parentElement->getSubjRemaining() );
        $segSubjEnd -= $segSubjLength;
        $newSegment->setSubjStart( $segSubjEnd );
        $segSubjEnd--;
      }
      else {
        $newSegment->setSubjStart( $segSubjStart );
        $segSubjEnd = $segSubjStart + $segSubjLength;
        $newSegment->setSubjEnd( $segSubjEnd );
        $segSubjStart = $segSubjEnd + 1;
        $newSegment->setSubjRemaining(
            $parentSubjEnd - $segSubjEnd + $parentElement->getSubjRemaining() );
      }

    }
    $newResultColl->add( $newSegment );
  }

  $DEBUG = $saveDEBUG;

  return $newResultColl;

}

##-------------------------------------------------------------------------##
## Use:  my saveOldFiles( $fijl, $fileend, $originaldir, $date, \%options );
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub saveOldFiles {
  my $fijl        = shift;
  my $fileend     = shift;
  my $originaldir = shift;
  my $date        = shift;
  my %options     = %{ shift() };

  $fijl = $options{'dir'} . "\/$fileend" if $options{'dir'};
  if ( $options{'is_only'} ) {
    rename( "$fijl.alert", "$fijl.alert.pre$date" )
        && print "\nOld file $fijl.alert renamed to $fijl.alert.pre$date\n\n"
        if -s "$fijl.alert";
    unlink "$fijl.withoutIS" if -s "$fijl.withoutIS";
  }
  else {
    my $savedir = "$originaldir\/$fileend.pre$date.RMoutput";
    $savedir = $options{'dir'} . "\/$fileend.pre$date.RMoutput"
        if $options{'dir'};
    mkdir $savedir, 0777;
    rename( "$fijl.cat",    "$savedir\/$fileend.cat" )    if -s "$fijl.cat";
    rename( "$fijl.stderr", "$savedir\/$fileend.stderr" ) if -s "$fijl.stderr";
    rename( "$fijl.out",    "$savedir\/$fileend.out" )    if -s "$fijl.out";
    rename( "$fijl.masked", "$savedir\/$fileend.masked" ) if -s "$fijl.masked";
    rename( "$fijl.tbl",    "$savedir\/$fileend.tbl" )    if -s "$fijl.tbl";
    rename( "$fijl.cut",    "$savedir\/$fileend.cut" )
        if -s "$fijl.cut" && $options{'cut'};
    rename( "$fijl.align", "$savedir\/$fileend.align" )
        if -s "$fijl.align" && $options{'a'};
    rename( "$fijl.alert", "$savedir\/$fileend.alert" )
        if -s "$fijl.alert" && !$options{'no_is'};
    rename( "$fijl.withoutIS", "$savedir\/$fileend.withoutIS" )
        if -s "$fijl.withoutIS" && $options{'is_clip'};
    rmdir $savedir || print "
Some previous RepeatMasker output files were moved to the directory 
$savedir 
in order not to overwrite them.\n\n";
  }
}

##-------------------------------------------------------------------------##
## Use:  my &SkipFile( $file );
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub SkipFile {
  my $file = shift;

  copy( $file, "$file.masked" );
  ( my $tempfile = $file ) =~ s/.+\///;
  print
"RepeatMasker quit because the file $tempfile only contains ambiguous bases, if any.
To accomodate automated processes the file has been copied to $tempfile.masked and this message has been printed to $tempfile.out\n\n";
  open( OUT, ">$file.out" );
  print OUT
"RepeatMasker quit because the file $tempfile only contains ambiguous bases, if any.\n";
  close OUT;
}

##-------------------------------------------------------------------------##
## Use:  my ( $tempdir, $runnumber ) = &createTempDir( \%options, $date,
##                                                     $file );
##
##
##  Returns
##
##   TODO: Clean this up!!!  It uses globals and makes assumptions
##         about how files are passed to repeatmasker etc..
##
##   Globals Used: ARGV[0]
##-------------------------------------------------------------------------##
sub createTempDir {
  my %options = %{ shift() };
  my $date    = shift;
  my $file    = shift;

  my $curdir = cwd();

  # To make cygwin happy - Contributed by Mike Seivers of TimeLogic
  $curdir =~ s/ /\\ /go;

  my ( $querydir, $fileendname ) =
      ( File::Spec->splitpath( $ARGV[ 0 ] ) )[ 1, 2 ];
  $querydir = "." if ( $querydir eq "" );

  # Used to avoid including existing files in output even
  # if $options{'dir'} chosen, preferred to write the temporary
  # files to a temporary subdirectory of the home directory, as
  # $options{'dir'} may be across system boundaries:
  my $runnumber = "$$" . ".$date";

  my $tempdir = "$curdir\/RM_$runnumber";
  unless ( -r "$tempdir\/$fileendname" || mkdir $tempdir, 0777 ) {
    if ( $options{'dir'} ) {
      $tempdir = $options{'dir'} . "\/RM_$runnumber";
      die "Can't write to " . $options{'dir'} . "\n"
          unless mkdir $tempdir, 0777;
    }
    else {    # no writing to current directory
      $tempdir = "$querydir\/RM_$runnumber";
      if ( mkdir $tempdir, 0777 ) {
        my $temptestfile = "$file" . "_$runnumber";
        copy( $file, "$temptestfile" )
            || die "Can not create a $curdir subdirectory nor write "
            . "full output to $querydir.\n Change operating "
            . "directory or use the option "
            . $options{'dir'}
            . " to indicate where files should be written.\n";
        unlink $temptestfile;
      }
      else {
        die "There is no writing access to the current directory "
            . "($curdir) nor to the directory containing the query "
            . "sequence.\nConsider using \"-dir\" or changing "
            . "current directory.";
      }
    }
  }
  return ( $tempdir, $runnumber );
}

##-------------------------------------------------------------------------##
## Use:  my
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
###  Interrupt handler used by systemint() ###
sub handler {
  my ( $sig ) = @_;

  print "\nAborting with a SIG$sig\n";
  exit( -1 );
}

##-------------------------------------------------------------------------##
## Use:  my
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
###  systemint -- Interruptible system call routine.  ###
sub systemint {
  my ( $cmd ) = @_;
  my ( $pid );
  my ( $flag ) = 0;

  local $SIG{INT}  = sub { &handler( @_ ) if ( $flag ) };    #^C
  local $SIG{QUIT} = sub { &handler( @_ ) if ( $flag ) };    #^\
  local $SIG{TERM} =
      sub { &handler( @_ ) if ( $flag ) };    #kill command or system crash
  local $SIG{HUP} = sub { &handler( @_ ) if ( $flag ) };

  #    local $SIG{CHLD} = 'IGNORE';

FORK:
  {
    if ( $pid = fork ) {
      $flag = 1;
      waitpid( $pid, 0 );                     #Waits for child to finish...
      my ( $status ) = $?;
      if ( WIFSTOPPED( $status ) ) {
        my ( $signal ) = WSTOPSIG( $status );
        print "\nforksys:  Program terminated by a signal $signal.\n";
        print "The executing command was:  $cmd\n";
        return 1;
      }
      if ( WIFEXITED( $status ) ) {
        my ( $temp ) = WEXITSTATUS( $status );
        return $temp;
      }
      if ( WIFSIGNALED( $status ) ) {
        my ( $signal ) = WTERMSIG( $status );
        print "\nforksys:  Program terminated by a signal $signal.\n";
        print "The executing command was:  $cmd\n";
        return 1;
      }
    }
    elsif ( defined $pid ) {
      exec( "$cmd" ) or die "Exec $cmd failed\n";
    }
    elsif ( $! =~ /No more process/o ) {
      print "$!\n";
      sleep 5;
      redo FORK;
    }
    else {
      die "Can't fork!  Errorcode: $!\n";
    }
  }
}

##-------------------------------------------------------------------------##
## Use:  my &cleanUp( \%options, $runnumber, $tempdir, $fileori,
##                    $fileend, $file, $originaldir, $compressed );
##
##  Returns
##       --
##       Expects $options{'dir'} ( if set ) to already exist.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub cleanUp {
  my %options     = %{ shift() };
  my $runnumber   = shift;
  my $tempdir     = shift;
  my $fileori     = shift;
  my $fileend     = shift;
  my $file        = shift;
  my $originaldir = shift;
  my $compressed  = shift;

  unlink "$tempdir\/$fileend";    # eq $file, but unlinking $file seems scary
                                  # copying it to originaldirectory would
                                  # change the date, priviliges, etc.
  unlink "$file.masked.log";      # pretty darn useless little file
  opendir TEMP, "$tempdir";
  my @outputfiles = readdir TEMP;
  closedir TEMP;

  # default is writing output to query directory
  my $targetdir = $originaldir;
  if ( $options{'dir'} ) {
    $targetdir = $options{'dir'};
  }
  if ( open TEMP2, ">$targetdir\/temp.$runnumber" ) {
    unlink "$targetdir\/temp.$runnumber";
    close TEMP2;
    $fileend = quotemeta( $fileend );
    foreach my $outputfile ( @outputfiles ) {
      next unless $outputfile =~ /^$fileend/;

      #rename doesn't cross system boundaries
      copy( "$tempdir\/$outputfile", "$targetdir\/$outputfile" )
          || die "Can't write all output files to $targetdir "
          . "(over quota?)n.  Files can be found in $tempdir "
          . "(and perhaps a few in $targetdir).  Run "
          . "\"ProcessRepeats\" on the .cat file or redo "
          . "analysis.\n\n";
      unlink "$tempdir\/$outputfile";
    }
  }
  else {
    print "\nOutput files can not be written to $targetdir. "
        . "They can be found in the directory $tempdir instead. \n"
        . "Consider using the -dir option.\n\n";
  }

  if ( $compressed ) {
    &systemint( "gzip $fileori" )     if $compressed eq 'zipped';
    &systemint( "compress $fileori" ) if $compressed eq 'Zed';
  }
}

##----
# DEPRECATED
##----
#sub getDBStats {
#  my $options     = %{ shift() };
#  my $db          = shift;
#  my $specPattern = shift;
#  my $tax         = shift;
#
#  print "RepeatMasker::getDBStats( \$db, " . "$specPattern, \$tax );\n"
#      if ( $DEBUG );
#
#  my $seqCount = $db->getRecordCount();
#
#  my $cladeCnt  = 0;
#  my $ancestCnt = 0;
#
#  # For each sequence in the master library
#  for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
#    my $record = $db->getRecord( $i );
#    foreach my $name ( $record->getRMSpeciesArray() ) {
#      $name =~ s/_/ /g;
#      my $isDescendant = $tax->isA( $name,        $specPattern );
#      my $isAncestor   = $tax->isA( $specPattern, $name );
#
#      if ( $isDescendant == 1 ) {
#        $cladeCnt++;
#        last;
#      }
#      else {
#        if ( $isAncestor ) {
#          $ancestCnt++;
#          last;
#        }
#      }
#    }
#  }
#
#  print
#      "   - $ancestCnt ancestral and ubiquitous sequence(s) for $specPattern\n";
#  print "   - $cladeCnt lineage specific sequence(s) for $specPattern\n";
#}


#### DEPRECATED
##-------------------------------------------------------------------------##
## Use:  my $libSize = createLib( \%options, $db, $libName, $specPattern,
##                                $stageNum, $tax);
##
##         \%options     :  RepeatMasker options hash
##         $db           :  A FastaDB or EMBL object open to
##                          the repeat datbase.
##         $libName      :  The name of the library to create.
##         $specPattern  :  The name of the species to include seqs for.
##         $stageNum:  The name of the old RM database.  Used to
##                          screen the repeats ( will generalize in the
##                          future ).
##         $tax          :  The Taxonomy.pm object.
##
##  Returns
##
##     Creates a library by filtering the RepeatMasker.lib file
##     given specific filtering parameters ( specPattern and stageNum).
##     If wublast is being used it also creates the binary versions
##     of the fasta library.  Returns the number of sequences stored
##     in the library.  Removes the library file if there are no
##     matching sequences.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
#sub createLib {
#  my $options      = %{ shift() };
#  my $db           = shift;
#  my $libName      = shift;
#  my $specPattern  = shift;
#  my $stageNum     = shift;
#  my $tax          = shift;
#  my $searchEngine = shift;
#
#  print "RepeatMasker::createLib( \$db, "
#      . "$libName, $specPattern, $stageNum, \$tax );\n"
#      if ( $DEBUG );
#
#  my @ids;
#  my @descs;
#  my $seqCount = $db->getRecordCount();
#
#  my $outFile = $libName;
#  $outFile = "$libName-wublast"
#      if ( $searchEngine->isa( "WUBlastSearchEngine" ) );
#
#  $outFile .= ".hmm" if ( $searchEngine->isa( "HMMERSearchEngine" ) );
#
#  open OUT, ">$outFile"
#      or die "RepeatMasker::createLib(): Could "
#      . "not open library file $outFile!\n";
#
#  # The number of sequences stored in this library
#  my $librarySize = 0;
#
#  # For each sequence in the master library
#  for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
#    my $match   = 0;
#    my @buffers = ();
#    my $seq     = "";
#    my $id      = "";
#    my $type    = "";
#    my $desc    = "";
#    my $record  = $db->getRecord( $i );
#    foreach my $name ( $record->getRMSpeciesArray() ) {
#      $name =~ s/_/ /g;
#      if (    $tax->isA( $name, $specPattern ) > 0
#           || $tax->isA( $specPattern, $name ) > 0 )
#      {
#        if ( $stageNum == 80 ) {
#
#          # For the specieslib it is sufficient to be in
#          # the clade or in the ancestral species.  No
#          # need to breakout into seperate search stages
#          # yet.
#          $match = 1;
#        }
#        else {
#
#          # Full length sequence non-buffered
#          my @stages = $record->getRMSearchStagesArray();
#          foreach my $stage ( @stages ) {
#            if (
#                 $stage eq $stageNum
#                 || (
#                      $stageNum == 95
#                      && (    $stage == 35
#                           || $stage == 50
#                           || $stage == 55
#                           || $stage == 60
#                           || $stage == 65
#                           || $stage == 70
#                           || $stage == 75 )
#                 )
#                )
#            {
#              $match = 1;
#            }
#          }
#
#          # Buffered Sequence
#          @stages = $record->getRMBufferStagesArray();
#          foreach my $stage ( @stages ) {
#            if ( $stage =~ /(\d+)\[(\d+)\-(\d+)\]/ ) {
#              if ( $1 == $stageNum ) {
#                push @buffers, "$2-$3";
#              }
#            }
#            elsif ( $stage =~ /(\d+)/ ) {
#              if ( $1 == $stageNum ) {
#                push @buffers, "full";
#              }
#            }
#            else {
#              print "RepeatMasker::createLib: Warning buffer stage $stage "
#                  . "understood!\n";
#            }
#          }
#        }
#        last if ( $match == 1 );
#      }
#    }
#
#    if ( $match > 0 || @buffers ) {
#      $librarySize++;
#      $id = $record->getId();
#      if ( $id =~ /D[FR]\d+/ && defined $record->getName() ) {
#        $id = $record->getName();
#      }
#      $type = "#" . $record->getRMType();
#      if ( $record->getRMSubType() ne "" ) {
#        $type .= "/" . $record->getRMSubType();
#      }
#
#      if ( $match > 0 ) {
#        if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
#
#          # Use species-specific thresholds if supplied
#          my @thresholds    = $record->getThreshArray();
#          my %subThresholds = ();
#          foreach my $thresh ( @thresholds ) {
#            if ( $tax->isSpecies( $thresh->{'taxname'} ) eq
#                 $tax->isSpecies( $specPattern ) )
#            {
#              $subThresholds{'GA'} = $thresh->{'hit_ga'};
#              $subThresholds{'TC'} = $thresh->{'hit_tc'};
#              $subThresholds{'NC'} = $thresh->{'hit_nc'};
#              last;
#            }
#          }
#          if ( exists $subThresholds{'GA'}
#               && $subThresholds{'GA'} > 0 )
#          {
#            my $recordLines = $record->getRecordLines();
#            while ( $recordLines =~ /(.*)[\n\r]+/ig ) {
#              my $line = $1;
#              if ( $line =~ /^(GA|TC|NC) / ) {
#                print OUT "$1   " . $subThresholds{$1} . ";\n";
#              }
#              else {
#                print OUT $line . "\n";
#              }
#            }
#          }
#          else {
#            print OUT $record->getRecordLines();
#          }
#        }
#        else {
#          $desc = $record->getDescription();
#          $seq  = $record->getSequence();
#          if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
#            my $rseq = uc( $seq );
#            $rseq =~ tr/ACGTRYWSKMNXBDHV/TGCAYRSWMKNXVHDB/;
#            $rseq = reverse $rseq;
#
#            die "Repeat consensus ( $id ) contains the "
#                . "word \"anti\" in it's name.  This will cause "
#                . "incorrect orientation calls in the output when "
#                . "running with wublast."
#                if ( $id =~ /anti/ );
#
#            print OUT ">$id" . $type;
#            print OUT " (anti)\n";
#            $rseq =~ s/(\S{50})/$1\n/g;
#            $rseq .= "\n"
#                unless ( $rseq =~ /.*\n+$/s );
#            print OUT $rseq;
#          }
#
#          print OUT ">" . $id . "$type\n";
#          $seq =~ s/(\S{50})/$1\n/g;
#          $seq .= "\n"
#              unless ( $seq =~ /.*\n+$/s );
#          print OUT $seq;
#        }
#      }
#
#      if ( @buffers ) {
#        foreach my $buffer ( @buffers ) {
#          if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
#
#            # TODO: Consider need for buffering with HMMs
#            #warn "Currently we do not support sequence "
#            #    . "buffers for HMMs $id ( $buffer )\n";
#          }
#          else {
#            $desc = $record->getDescription();
#            $seq  = $record->getSequence();
#            if ( $buffer eq "full" ) {
#              $type = "#buffer";
#            }
#            elsif ( $buffer =~ /(\d+)-(\d+)/ ) {
#              $seq = substr( $seq, $1 - 1, $2 - $1 + 1 );
#              $type = "_$1" . "_$2#buffer";
#            }
#            if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
#              my $rseq = uc( $seq );
#              $rseq =~ tr/ACGTRYWSKMNXBDHV/TGCAYRSWMKNXVHDB/;
#              $rseq = reverse $rseq;
#
#              die "Repeat consensus ( $id ) contains the "
#                  . "word \"anti\" in it's name.  This will cause "
#                  . "incorrect orientation calls in the output when "
#                  . "running with wublast."
#                  if ( $id =~ /anti/ );
#
#              print OUT ">" . $id . "$type";
#              print OUT " (anti)\n";
#              $rseq =~ s/(\S{50})/$1\n/g;
#              $rseq .= "\n"
#                  unless ( $rseq =~ /.*\n+$/s );
#              print OUT $rseq;
#            }
#
#            print OUT ">" . $id . "$type\n";
#            $seq =~ s/(\S{50})/$1\n/g;
#            $seq .= "\n"
#                unless ( $seq =~ /.*\n+$/s );
#            print OUT $seq;
#          }
#        }
#      }
#    }
#  }
#  close OUT;
#
#  if ( $librarySize == 0 ) {
#    unlink( $outFile );
#  }
#  else {
#    my ( $outFileVol, $outFileDir, $outFileBasename ) =
#        File::Spec->splitpath( $outFile );
#    $outFileDir = "." if ( $outFileDir eq "" );
#
#    if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
#      system(   "$SETDB_PRGM $outFile > "
#              . "$outFileDir/setdb.log 2>&1" ) == 0
#          or die "RepeatMasker::createLib(): Error invoking setdb on file "
#          . "$outFile.  We tried using the setdb program ( "
#          . "$SETDB_PRGM ).\n";
#      unlink( $outFile ) unless ( $DEBUG );
#      move( "$outFile.ahd", "$libName.ahd" );
#      move( "$outFile.atb", "$libName.atb" );
#      move( "$outFile.bsq", "$libName.bsq" );
#    }
#    elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
#      system(   "$NCBIBLASTDB_PRGM -dbtype nucl "
#              . "-in $outFile > $outFileDir/rmblastdb.log 2>&1" ) == 0
#          or die "RepeatMasker::createLib(): Error invoking "
#          . "$NCBIBLASTDB_PRGM"
#          . " on file $outFile.\n";
#    }
#    elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
#      system(   "$HMMPRESS_PRGM"
#              . " $outFile > $outFileDir/hmmPress.log 2>&1" ) == 0
#          or die "RepeatMasker::createLib(): Error invoking "
#          . "$HMMPRESS_PRGM on file "
#          . "$outFile.\n";
#    }
#  }
#
#  return ( $librarySize );
#
#}

#sub createValidIDList {
#  my $db          = shift;
#  my $specPattern = shift;
#  my $speciesDir  = shift;
#  my $tax         = shift;
#
#  print "RepeatMasker::createValidIDList( \$db, "
#      . "$specPattern, $speciesDir, \$tax );\n"
#      if ( $DEBUG );
#
#  open OUT, ">$speciesDir/speciesMeta.pm"
#      or die "RepeatMasker::createValidIDList(): Could "
#      . "not open library file $speciesDir/speciesMeta.pm!\n";
#
#  my %validIDs = ();
#  my $seqCount = $db->getRecordCount();
#
#  # For each sequence in the master library
#  for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
#    my $record = $db->getRecord( $i );
#    my $id     = $record->getId();
#
#    my $match = 0;
#    foreach my $name ( $record->getRMSpeciesArray() ) {
#      $name =~ s/_/ /g;
#      if (    $tax->isA( $name, $specPattern ) > 0
#           || $tax->isA( $specPattern, $name ) > 0 )
#      {
#        $match = 1;
#        last;
#      }
#    }
#    if ( $match ) {
#      $validIDs{ lc( $id ) } = 1;
#    }
#  }
#
#  ##
#  ##
#  ##
#  print OUT "package speciesMeta;\n";
#  print OUT "require Exporter;\n";
#  print OUT "\@EXPORT_OK   = qw( \%validIDs );\n";
#  print OUT "\%EXPORT_TAGS = ( all => [ \@EXPORT_OK ] );\n";
#  print OUT "\@ISA         = qw(Exporter);\n";
#  print OUT "BEGIN {\n";
#  print OUT "  \%validIDs = (\n";
#  my $output = Dumper( \%validIDs );
#  $output =~ s/^\$VAR1 = \{//;
#  $output =~ s/\};//;
#  print OUT $output;
#  print OUT "\n";
#  print OUT "  ); }\n";
#  close OUT;
#
#}
#
##-------------------------------------------------------------------------##
## Use:  my &processCustomLib( $libFile, $wublastSetDB, $tempdir );
##
##         $libFile      :  RepeatMasker options hash
##         $wublastSetDB :  The full path to the wublast "setdb" program.
##         $tempdir      :  The temporary directory for this run
##         $searchEngine :  The searchEngine being used
##
##  Returns
##
##     Processes a custom library ( in FASTA format ) supplied by the user.
##     This involves checking that the repeat names supplied by the user's
##     library conform to the RepeatMasker nomenclature.  Secondarily this
##     will create the frozen databases for WUBlast.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub processCustomLib {
  my $libFile      = shift;
  my $tempdir      = shift;
  my $searchEngine = shift;

  print "RepeatMasker::processCustomLib()\n" if ( $DEBUG );

  my ( $custLibVol, $custLibDir, $custLibFile ) =
      File::Spec->splitpath( $libFile );
  $custLibDir = "." if ( $custLibDir eq "" );

  # TODO: Check name syntax

  if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {

    my $libDB = FastaDB->new(
                              fileName    => $libFile,
                              openMode    => SeqDBI::ReadOnly,
                              maxIDLength => 80
    );

    open OUT, ">$tempdir/$custLibFile.anti"
        or die "RepeatMasker::processCustomLib(): Could "
        . "not create wublast compatable library file "
        . "$custLibDir/$custLibFile.anti!\n";

    foreach my $seqID ( $libDB->getIDs() ) {
      my $seq  = $libDB->getSequence( $seqID );
      my $rseq = uc( $seq );
      my $desc = $libDB->getDescription( $seqID );

      die "Repeat consensus ( $seqID ) contains the "
          . "word \"anti\" in it's name.  This will cause "
          . "incorrect orientation calls in the output when "
          . "running with wublast."
          if ( $seqID =~ /anti/ );

      print OUT ">$seqID\n";
      $seq =~ s/(.{50})/$1\n/g;
      print OUT "$seq\n";

      $rseq =~ tr/ACGTRYWSKMNXBDHV/TGCAYRSWMKNXVHDB/;
      $rseq = reverse $rseq;

      print OUT ">$seqID (anti)\n";
      $rseq =~ s/(.{50})/$1\n/g;
      print OUT "$rseq\n";
    }
    close OUT;

    if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
      my $currdir = cwd();
      chdir( $tempdir )
          or die "RepeatMasker::processCustomLib(): "
          . "Cannot change directory to $tempdir";
      system(   "$SETDB_PRGM -o $custLibFile "
              . "$tempdir/$custLibFile.anti "
              . " > setdb.log 2>&1" ) == 0
          or die "RepeatMasker::processCustomLib(): Error invoking setdb "
          . "on file $tempdir/$custLibFile.anti.  We tried using "
          . "the setdb program ( $SETDB_PRGM ).\n";
      chdir( $currdir )
          or die "RepeatMasker::processCustomLib(): "
          . "Cannot change directory to $currdir";
    }
    undef $libDB;

  }
  elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
    system(   "$NCBIBLASTDB_PRGM -out $tempdir/$custLibFile "
            . "-dbtype nucl -in $libFile > "
            . "$tempdir/makeblastdb.log 2>&1" );
  }
  elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
    system( "cp $libFile $tempdir/$custLibFile" );
    system(   "$HMMPRESS_PRGM "
            . " $tempdir/$custLibFile > $tempdir/hmmPress.log 2>&1" ) == 0
        or die "RepeatMasker::createLib(): Error invoking "
        . "$HMMPRESS_PRGM on file "
        . "$tempdir/$custLibFile.\n";
  }
}

##-------------------------------------------------------------------------##
##  my ( $refineableHashRef ) = builRefineableHash( $EMBLDBRef );
##
##  Returns
##            A hash: $refineableHashRef = { 'AluJo' => 1,
##                                           'AluSc' => 1, .. }
##            which contains all the id's of repeats which can be
##            refined.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub buildRefineableHash {
  my $db = shift;

  my %refineableHash = ();

  if ( $db->isa( "EMBL" ) || $db->isa( "DFAM" ) ) {
    my $seqCount = $db->getRecordCount();

    # For each sequence in the master library
    for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
      my $record     = $db->getRecord( $i );
      my $refineable = $record->getRMRefineable();
      if ( $refineable ) {
        $refineableHash{ $record->getId() } = 1;
      }
    }
  }

  return ( \%refineableHash );
}

sub commify {
    my $text = reverse $_[0];
    $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
    return scalar reverse $text;
}

##-------------------------------------------------------------------------##
##  my ( $species, $generalCacheDir, $speciesCacheDir, 
##       $customLibDir, $rmLibLabel ) =
##                             initLibrariesFromFamdb();
##
##      species       : "-species" option provided by the user or undef.
##      customLibFile : "-lib" option provided by the user or undef.
##      rmLibDir      : Directory containing the famdb/ subdirectory.
##      workingDir    : The programs working directory (e.g "RM_#####..").
##      libraryPath   : Search path for cached RM libraries.
##      searchEngine  : The search engine object.
##
##  Initialize and collect data on libraries for use by RepeatMasker.
##  The expectation is that libraries now come in two forms either 
##  a simple FASTA/HMM file provided directly by the user with the 
##  -lib option or a FAMDB (hdf5) library with taxonomic labels for
##  multi-purpose searching.
##
##  Returns
##     orgFlag : 
##
##     Or it fails validation and exits the program.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub initLibrariesFromFamdb {
  my $species       = shift;
  my $customLibFile = shift;
  my $rmLibDir      = shift;
  my $workingDir    = shift;
  my @libraryPath   = @{ shift() };
  my $searchEngine  = shift;

  if ( $species ne "" && $customLibFile ne "" ) {
    die "The custom library (-lib) option may not be combined with the\n" .
        "species (-species) option.\n\n";
  }

  my $speciesCacheDir = "";          # The directory where we will find the
                                     #   cache files for species spec. libraries
  my $generalCacheDir = "";          # The directory where we will find the
                                     #   cache files for general libraries
  my $customCacheDir  = "$workingDir";  # The directory where we will find the
                                     #   "-lib" custom library.

  my $orgFlag;
  my $dbLabel;

  ##
  ## User supplied FASTA/HMM library
  ##
  if ( $customLibFile ne "" ) {
    if ( -s $customLibFile ) {
      # Validate/Generate required pre-processed files for
      # the search engine requested.
      if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
        if (    -s "$customLibFile.bsq"
             || -s "$customLibFile.xps" )
        {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $customLibFile );
          $customCacheDir = $custLibDir || ".";
          warn "NOTE: Compressed versions of your custom library were\n"
              . "found in the $custLibDir directory.  The program will\n"
              . "use these by default. If these databases do not contain\n"
              . "reverse complemented copies of your sequences the reverse\n"
              . "strand hits will not be returned!";
        }
        else {
          &processCustomLib( $customLibFile, $workingDir, $searchEngine );
        }
      }
      elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
        if (    -s "$customLibFile.nsq"
             || -s "$customLibFile.nhr" )
        {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $customLibFile );
          $customCacheDir = $custLibDir || ".";
        }
        else {
          &processCustomLib( $customLibFile, $workingDir, $searchEngine );
        }
      }
      elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
        # mylib.hmm.h3f
        # mylib.hmm.h3i
        # mylib.hmm.h3m
        # mylib.hmm.h3p
        if (    -s "$customLibFile.h3f"
             && -s "$customLibFile.h3i"
             && -s "$customLibFile.h3m"
             && -s "$customLibFile.h3p" )
        {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $customLibFile );
          $customCacheDir = $custLibDir || ".";
        }
        else {
          &processCustomLib( $customLibFile, $workingDir, $searchEngine );
        }
      }
      else {
        my ( $custLibVol, $custLibDir, $custLibFile ) =
            File::Spec->splitpath( $customLibFile );
        $customCacheDir = $custLibDir || ".";
      }
    }
    else {
      die "RepeatMasker::setspecies: Could not find user specified library "
          . $customLibFile . ", or the file is empty.\n";
    }
    print "Using Custom Repeat Library: $customLibFile\n\n";
  }
  else {

    # Default species if non given.
    $species = "homo sapiens" if ( $species eq "" );

    # TODO: This should be moved above...because even with a -lib the general directory
    # needs to be created.
    my $modelPrefix = "CONS-";
    $modelPrefix = "HMM-" if ( $searchEngine->isa( "HMMERSearchEngine" ) ); 
    my $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/famdb info ";
    open IN,"$famdbCmd |";
    my $dbTitle;
    my $dbVersion;
    my $dbDate;
    my $dbConsCnt;
    my $dbHMMCnt;
    while (<IN>){
      # Database: Dfam
      # Version: 3.1
      # Date: 2019-06-20
      #
      # Dfam - A database of transposable element (TE) sequence alignments and HMMs.
      #
      # Total consensus sequences: 273655
      # Total HMMs: 273655
      if ( /^Database\s*:\s*(\S.*)/ ) { 
        $dbTitle = $1;
      }elsif ( /^Version\s*:\s*(\S.*)/ ) {
        $dbVersion = $1;
      }elsif ( /^Date\s*:\s*(\S.*)/ ) {
        $dbDate = $1;
      }elsif ( /^Total consensus sequences present\s*:\s*(\d+)/ ) {
        $dbConsCnt = $1;
      }elsif ( /^Total HMMs present\s*:\s*(\d+)/ ) {
        $dbHMMCnt = $1;
      }
    }
    close IN;
    if ( $? ) {
      die "Could not execute famdb.py using: $famdbCmd\n";
    }
    $dbLabel = $dbTitle;
    $dbLabel = s/^\s+//;
    $dbLabel = s/\s+$//;
    $dbLabel = s/\s+/_/g;
    my $sanitizedTitle = $dbTitle;
    $sanitizedTitle =~ s/[\[\(\{\]\)\]]+//g;
    $sanitizedTitle =~ s/[^a-zA-Z\d]+/_/g;
    $dbLabel = $modelPrefix . $sanitizedTitle . "_" . $dbVersion;
    print "\nUsing Master RepeatMasker Database: $rmLibDir/famdb\n";
    print "  Title    : $dbTitle\n";
    print "  Version  : $dbVersion\n";
    print "  Date     : $dbDate\n";
    if ( $dbConsCnt > $dbHMMCnt ) {
      print "  Families : " . commify($dbConsCnt) . "\n\n";
    }else {
      print "  Families : " . commify($dbHMMCnt) . "\n\n";
    }

    ## For minimal DB installs, -species is not an option.
    if ( $dbTitle eq "Sequencing_artifacts_only" ) {
      die "\nERROR: A database of TE families is not present.\n" .
          "         Either install the Dfam famDB root partition and additional\n" .
          "         optional partitions or use the -lib option and provide your\n" .
          "         own custom TE FASTA library file.\n\n".
          "  To install Dfam FamDB partitions, simply download the root ('0') partition\n" .
          "  from https://www.dfam.org/releases/current/families/FamDB/ into your\n".
          "  RepeatMasker/Libraries/famdb folder and rerun the RepeatMasker 'configure'\n" .
          "  tool to complete the installation. See the FamDB release notes and partition\n" .
          "  descriptions https://www.dfam.org/releases/current/families/FamDB/README.txt\n" .
          "  for further details.\n\n";
    }

    # TODO Print total Taxa count
    # Lookup species in famdb and resolve orgFlag
    # NOTE: orgFlag is a bit of a legacy concept that will probably disappear
    #       in a future RM version. 
    # TODO: Should REPEATMASKER_DIR be global???
    $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/famdb lineage '" .
                    $species . "' -f semicolon";
    #print "FamdbCMD: $famdbCmd\n";
    open IN,"$famdbCmd |";
    my $lineage = "";
    my $NCBITaxID;
    my $NCBITaxName;
    my $msg = "";
    while ( <IN> ) {
      # current famdb lineage output:
      # 370040(0): root;cellular organisms;Eukaryota;Opisthokonta;Metazoa;Eumetazoa;Bilateria;Deuterostomia;Chordata;Craniata;Vertebrata;Gnathostomata;Teleostomi;Euteleostomi;Actinopterygii;Actinopteri;Neopterygii;Teleostei;Osteoglossocephalai;Clupeocephala;Euteleosteomorpha;Neoteleostei;Eurypterygia;Ctenosquamata;Acanthomorphata;Euacanthomorphacea;Percomorphaceae;Ovalentaria;Mugilomorphae;Mugiliformes;Mugilidae;Planiliza;Planiliza haematocheilus [1469]
      #1299 entries in ancestors; 8 lineage-specific entries
      $orgFlag = "mammalia" if ( /^\d+\(\d+\):\s.*;Mammalia(;| \[)/ );
      $orgFlag = "rodentia" if ( /^\d+\(\d+\):\s.*;Rodentia(;| \[)/ );
      $orgFlag = "primates" if ( /^\d+\(\d+\):\s.*;Primates(;| \[)/ );
      if ( /^(\d+)\(\d+\):\s+(root.*)\;(\S.*)\s+\[.*/ ) {
        $NCBITaxID = $1;
        $lineage = $2;
        $NCBITaxName = $3;
      }
      # Capture messages in case name is not identified.
      $msg .= $_;
    }
    close IN;
    if ( $? ) {
      die "Could not execute famdb.py using: $famdbCmd\n";
    }

    if ( $lineage eq "" ) {
      if ( $msg =~ /Taxon in Partition\s+(\d+)/ ) {
        die  "\nTaxon \"$species\" is in partition $1 of the current FamDB however,\n"
           . "this partition is absent.  Please download this file from the original\n"
           .  "source and rerun configure to proceed.\n\n";
      }else {
        die "\nTaxon \""
          . $species
          . "\" is not defined in the current FamDB partitions\n"
          . "present.  There may not be any TE families defined in the\n"
          . "database for this taxon or there may be an error in the spelling.\n"
          . "Please check your entry against the NCBI Taxonomy database\n"
          . "and/or try using a broader clade or related species instead.\n"
          . "The full list of species/clades defined in the library may be\n"
          . "obtained using the famdb.py script.\n\n"
          . "For example, to search for particular taxa use:\n\n"
          . "    % ./famdb.py names Heterodontus\n\n"
          . "    Exact Matches\n"
          . "    =============\n"
          . "    Taxon: 7791, Partition: 0, Names: Heterodontus (scientific name)\n\n"
          . "    Non-exact Matches\n"
          . "    =================\n"
          . "    Taxon: 95544, Partition: 0, Names: Heterodontus japonicus (scientific name), Japanese bullhead shark (genbank common name)\n\n"
          . "Or to obtain the full taxanomic lineage to a particular entry:\n\n"
          . "    % ./famdb.py lineage \"Heterodontus japonicus\" -ad -f semicolon\n\n"
          . "    95544(0): root;cellular organisms;Eukaryota;Opisthokonta;Metazoa;Eumetazoa;Bilateria;Deuterostomia;Chordata;Craniata <chordates>;Vertebrata <vertebrates>;Gnathostomata <vertebrates>;Chondrichthyes;Elasmobranchii;Selachii;Galeomorphii;Heterodontoidea;Heterodontiformes;Heterodontidae;Heterodontus;Heterodontus japonicus [0]\n\n"
          . "In this last example, if the '-f semicolon' is omitted the output will\n"
          . "also contain the counts of families labeled with each taxon in the lineage\n\n\n";
      }
    }

 
    # Tabulate curated
    $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/famdb lineage '" .
                    $species . "' --curated --ancestors --descendants -f totals";
    open IN,"$famdbCmd |";
    my $curated_ancestor_counts = 0;
    my $curated_lineage_counts = 0;
    while ( <IN> ) {
      #1299 entries in ancestors; 8 lineage-specific entries
      if ( /^(\d+)\s+entries in ancestors;\s*(\d+)\s+lineage-specific/ ) {
        $curated_ancestor_counts = $1;
        $curated_lineage_counts = $2;
      }
    }
    close IN;
    if ( $? ) {
      die "Could not execute famdb.py using: $famdbCmd\n";
    }
    
    # Tabulate uncurated
    $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/famdb lineage '" .
                    $species . "' --uncurated --ancestors --descendants -f totals";
    open IN,"$famdbCmd |";
    my $uncurated_ancestor_counts = 0;
    my $uncurated_lineage_counts = 0;
    while ( <IN> ) {
      #1299 entries in ancestors; 8 lineage-specific entries
      if ( /^(\d+)\s+entries in ancestors;\s*(\d+)\s+lineage-specific/ ) {
        $uncurated_ancestor_counts = $1;
        $uncurated_lineage_counts = $2;
      }
    }
    close IN;
    if ( $? ) {
      die "Could not execute famdb.py using: $famdbCmd\n";
    }

    print "Species/Taxa Search:\n";
    print "  $NCBITaxName [NCBI Taxonomy ID: $NCBITaxID]\n";
    my $lineageBlock = "";
    my $lineageLine = "";
    foreach my $component ( split(/;/,$lineage) ){
      my $prefix;
      if ( $lineageBlock eq "" ) {
        $prefix = "  Lineage: ";
      }else {
        $prefix = "           ";
      }
      if ( length($lineageLine . ";" . $component) > 60 ) {
        $lineageBlock .= $prefix . $lineageLine . "\n";
        $lineageLine = "";
      }
      $lineageLine .= "$component;";
    }
    $lineageBlock =~ s/;$//;
    $lineageBlock .= "\n"
            unless ( $lineageBlock =~ /.*\n+$/s );
    print "$lineageBlock";
       
    if ( $options{'uncurated'} ) {
       print "Including curated and uncurated families:\n";
       my $tot_ancestral = $uncurated_ancestor_counts + $curated_ancestor_counts;
       my $tot_lineage = $uncurated_lineage_counts + $curated_lineage_counts;
       print "  $tot_ancestral families in ancestor taxa; $tot_lineage lineage-specific families\n";
       print "    ** NOTE: No uncurated families were found for this taxon **\n" 
           if ( ($uncurated_ancestor_counts + $uncurated_lineage_counts) == 0 );
       print "\n";
    }else{
       print "Including only curated families:\n";
       print "  $curated_ancestor_counts families in ancestor taxa; $curated_lineage_counts lineage-specific families\n\n";
    }
  }

  # Check library search path for cached versions
  # of libraries.  NOTE: cached versions are
  # stored as follows:
  #
  #     @libraryPath/$dbLabel/general/foo.lib
  #     @libraryPath/$dbLabel/$species/foolib
  #
  # The first directory in the search path containing
  # the desired library type is used.
  #
  my $speciesWord = lc($species || "");
  $speciesWord =~ s/\s+/_/g;
  # 2023/11/14
  #   New "uncurated" flag requires suffixes for cached libraries
  #   to ensure we reproduce the same set of families for each query.
  $speciesWord .= "_wunc" if ( $options{'uncurated'} );
  foreach my $path ( @libraryPath ) {
    if ( -d "$path/$dbLabel" ) {
      if ( -d "$path/$dbLabel/$speciesWord" ) {
        if (
          (
            $searchEngine->isa( "WUBlastSearchEngine" )
            && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*.ahd" ) )
          )
          || ( $searchEngine->isa( "NCBIBlastSearchEngine" )
            && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*.nhr" ) )
          )
          || ( $searchEngine->isa( "CrossmatchSearchEngine" )
             && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*lib" ) )
          )
          || ( $searchEngine->isa( "HMMERSearchEngine" )
             && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*hmm" ) )
          )
            )
        {
          $speciesCacheDir = "$path/$dbLabel/$speciesWord";
        }
      }elsif ( -d "$path/$dbLabel/$speciesWord" . ".working" ) {
        # Make sure checkpointing name has been cleared in previous run
        print "\n  It appears that RepeatMasker attempted to generate a cached library for this\n"
              ."  species before but didn't complete it.  Attempting to remove and rebuild this\n"
              ."  cache: $path/$dbLabel/$speciesWord.working\n\n";
        rmtree("$path/$dbLabel/$speciesWord" . ".working",0,1);
      }
      if ( -d "$path/$dbLabel/general" ) {
        if ( $searchEngine->isa( "NCBIBlastSearchEngine" ) && 
             -s "$path/$dbLabel/general/is.lib" && 
             ! -s "$path/$dbLabel/general/is.lib.nhr") {
             # Must build frozen version of is.lib
             $generalCacheDir = "";
        }else {
          # Fallback on .working to makes sure this gets built for hmmer/crossmatch
          $generalCacheDir = "$path/$dbLabel/general";
        }
      }elsif ( -d "$path/$dbLabel/general" . ".working" ){
        # Make sure checkpointing name has been cleared in previous run
        print "\n  It appears that RepeatMasker attempted to generate a cached general library\n"
              ."  before but didn't complete it.  Attempting to remove and rebuild this\n"
              ."  cache: $path/$dbLabel/general.working\n\n";
        rmtree("$path/$dbLabel/general.working",0,1);
      }
    }
  }

  #
  # If we could not find either the general library cache
  # or the species library cache we need to build them.
  #
  if ( $generalCacheDir eq "" || $speciesCacheDir eq "" ) {
    # Cached libraries are missing -- generate them
    #
    # Determine the highest level writable directory
    #
    my $writableCacheDir = "";
    foreach my $path ( @libraryPath ) {
      if ( -d $path ) {
        if ( open( TEST, ">$path/rmwritetest.deleteme" ) ) {
          close TEST;
          unlink "$path/rmwritetest.deleteme";

          # Just in case there is a read-only version of the
          # db already extracted by the installer.
          if ( -d "$path/$dbLabel" ) {
            if ( open( TEST, ">$path/$dbLabel/rmwritetest.deleteme" ) ) {
              close TEST;
              unlink "$path/rmwritetest.deleteme";
              $writableCacheDir = $path;
              last;
            }
          }
          else {
            $writableCacheDir = $path;
            last;
          }
        }
      }
      elsif ( mkdir "$path", 0777 ) {
        $writableCacheDir = $path;
        last;
      }
    }

    if ( $generalCacheDir eq "" ) {
      # Need to build libraries:   at.lib,  simple.lib
      #                            l1.lib, mirs.lib, mir.lib, is.lib
      print "Building general libraries in: "
          . "$writableCacheDir/$dbLabel/general\n";

      # Make cache dir
      if ( !-d "$writableCacheDir/$dbLabel/general.working" ) {
        eval { mkpath( "$writableCacheDir/$dbLabel/general.working" ) };
        if ( $@ ) {
          die "RepeatMasker::setspecies: Can't create dir path "
              . "$writableCacheDir/$dbLabel/general.working! $@\n";
        }
      }

      &createLibFromFamdb( $rmLibDir, 
                           "$writableCacheDir/$dbLabel/general.working/is.lib", 
                           'root', 10, $searchEngine );
      # Rename or copy upon successful completion
      if ( -d "$writableCacheDir/$dbLabel/general" ) {
        # Directory exists but setup for a different search engine - copy files over
        system("cp $writableCacheDir/$dbLabel/general.working/* $writableCacheDir/$dbLabel/general");
        rmtree("$writableCacheDir/$dbLabel/general.working",0,1);
      }else {
        rename("$writableCacheDir/$dbLabel/general.working", "$writableCacheDir/$dbLabel/general");
      }
      $generalCacheDir = "$writableCacheDir/$dbLabel/general";
    }
    else {
      print STDERR "Using general libraries in:\n  $generalCacheDir\n"
          if ( $DEBUG );
    }

    if ( !$customLibFile ) {
      if ( $speciesCacheDir eq "" ) {

        # Need to build species specific libraries
        print "Building species libraries in: "
            . "$writableCacheDir/$dbLabel/$speciesWord\n";

        # Make cache dir
        if ( !-d "$writableCacheDir/$dbLabel/$speciesWord.working" ) {
          eval { mkpath( "$writableCacheDir/$dbLabel/$speciesWord.working" ) };
          if ( $@ ) {
            die "RepeatMasker::setspecies: Can't create dir path "
                . "$writableCacheDir/$dbLabel/$speciesWord! $@\n";
          }
        }

        # Build the cached refineable elements hash.  These are the
        # IDs of sequences which can be refined by searching against
        # the refinelib.
        my $cOpt = "--curated";
        $cOpt = "" if ( $options{'uncurated'} );
        my $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/famdb families '" .
                       $species . "' $cOpt --ancestors --descendants -f embl_meta";
        open IN,"$famdbCmd|";
        my $acc;
        my $name;
        my $ref;
        my %validIDs = ();
        my %refineableHash = ();
        while ( <IN> ) {
           if ( /^\/\// ) {
             if ( $name ) {
               $validIDs{ lc( $name ) } = 1;
               $refineableHash{ $name } = 1 if ( $ref );
             }else {
               $validIDs{ lc( $acc ) } = 1;
               $refineableHash{ $acc } = 1 if ( $ref );
             }
             $acc = undef;
             $name = undef;
             $ref = 0; 
           }
           #ID   DF0000003; SV 4; linear; DNA; STD; UNC; 309 BP.
           $acc = $1 if ( /^ID\s+(\S+)\;/ );
           #NM   AluSc
           $name = $1 if ( /^NM\s+(\S+)/ );
           #CC        Refineable
           $ref = 1 if ( /^CC\s+Refineable/ );
        }
        close IN;
        if ( $? ) {
          die "Could not execute famdb.py using: $famdbCmd\n";
        }
        nstore \%refineableHash,
            "$writableCacheDir/$dbLabel/$speciesWord.working/refineableHash.dat";
   
        ##
        ##
        ##
        open OUT,">$writableCacheDir/$dbLabel/$speciesWord.working/speciesMeta.pm" or 
            die "Could not open $writableCacheDir/$dbLabel/$speciesWord.working/speciesMeta.pm " . 
                "for writing!\n";
        print OUT "package speciesMeta;\n";
        print OUT "require Exporter;\n";
        print OUT "\@EXPORT_OK   = qw( \%validIDs );\n";
        print OUT "\%EXPORT_TAGS = ( all => [ \@EXPORT_OK ] );\n";
        print OUT "\@ISA         = qw(Exporter);\n";
        print OUT "BEGIN {\n";
        print OUT "  \%validIDs = (\n";
        my $output = Dumper( \%validIDs );
        $output =~ s/^\$VAR1 = \{//;
        $output =~ s/\};//;
        print OUT $output;
        print OUT "\n";
        print OUT "  ); }\n";
        close OUT;

        if ( $orgFlag eq "mammalia" || $orgFlag eq "primates" ) {
      
          # alu.lib, rodcutsines.lib => sinecutlib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/sinecutlib", 
                               $species, 35, $searchEngine );

          # cut1.lib, rodcut.lib => shortcutlib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/shortcutlib", 
                               $species, 40, $searchEngine );



          # cut2.lib, rodcut2.lib, cetartiocut.lib => cutlib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/cutlib", 
                               $species, 45, $searchEngine );

          unless ( $searchEngine->isa( "HMMERSearchEngine" ) ) {

            # humsines.lib, rod1.lib, cetartio1.lib => shortlib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/shortlib", 
                                 $species, 50, $searchEngine, 1 );

            # humlines.lib, rod2.lib => longlib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/longlib", 
                                 $species, 55, $searchEngine );

            # mirs.lib => mirslib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/mirslib", 
                                 $species, 60, $searchEngine );

            # mir.lib => mirlib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/mirlib", 
                                 $species, 65, $searchEngine );

            # retrovirus.lib => retrolib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/retrolib", 
                                 $species, 70, $searchEngine );
          }
          else {
            # HMM search combines 35,50,60,65,70 into one search lib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/masklib", 
                                 $species, 95, $searchEngine, 1 );
          }

          # refinelib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/refinelib", 
                               $species, 85, $searchEngine );
        }
        else {
          # Need to separate into a species.lib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/specieslib", 
                               $species, 80, $searchEngine );
        }
        # Rename or copy upon successful completion
        if ( -d "$writableCacheDir/$dbLabel/$speciesWord" ) {
          # Directory exists but setup for a different search engine - copy files over
          system("cp $writableCacheDir/$dbLabel/$speciesWord.working/* $writableCacheDir/$dbLabel/$speciesWord");
          rmtree("$writableCacheDir/$dbLabel/$speciesWord.working",0,1);
        }else {
          rename("$writableCacheDir/$dbLabel/$speciesWord.working", "$writableCacheDir/$dbLabel/$speciesWord");
        }
        $speciesCacheDir = "$writableCacheDir/$dbLabel/$speciesWord";
      }
      else {
        print STDERR "Using species libraries in:\n  $speciesCacheDir\n"
            if ( $DEBUG );
      }
    }
  }

  return ( $species, $generalCacheDir, $speciesCacheDir, $customCacheDir, $dbLabel );

}

##-------------------------------------------------------------------------##
## Use:  createLibFromFamdb( $rmLibDir, $libName, $species,
##                           $stageNum, $searchEngine, $addUndefined );
##
##         $rmLibDir     :
##         $libName      :  The name of the library to create.
##         $species      :  The name of the species to include seqs for.
##         $stageNum     :  The name of the old RM database.  Used to
##                          screen the repeats ( will generalize in the
##                          future ).
##         $searchEngine :  The search engine to prepare the library for.
##         $addUndefined :  If true, add the undefined search stage 
##                          sequences to the library.
##
##  Returns
##
##     Creates a library by filtering the RepeatMasker.lib file
##     given specific filtering parameters ( species and stageNum).
##     If wublast is being used it also creates the binary versions
##     of the fasta library. Removes the library file if there are no
##     matching sequences.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub createLibFromFamdb {
  my $rmLibDir     = shift;
  my $libName      = shift;
  my $species      = shift;
  my $stageNum     = shift;
  my $searchEngine = shift;
  my $addUndefined = shift || 0;

  print "RepeatMasker::createLib( "
      . "$libName, $species, $stageNum );\n"
      if ( $DEBUG );

  my $outFile = $libName;
  my $format = "fasta_name";
  if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
    $outFile .= ".hmm";
    $format = "hmm_species";
  }elsif ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
    $outFile = "$libName-wublast";
  }

  # NOTE: Stage 95 is a catch all for 35 50 55 60 65 70 75....basically
  #       excluding 25/40/45. Stage '80' is all stages.  Currently famdb.py handles these
  #       special case selections internally.
  my $cOpt = "--curated";
  $cOpt = "" if ( $options{'uncurated'} );
  my $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/famdb families '" . 
              $species . "' $cOpt --ancestors --descendants --include-class-in-name --stage $stageNum -f $format";
  if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
    $famdbCmd .= " --add-reverse-complement";
  }
  $famdbCmd .= " > $outFile";
  system($famdbCmd);
  if ( $? ) {
    die "Could not execute famdb.py using: $famdbCmd\n";
  }

  # RMH: 6/27/2025
  if ( $addUndefined ) {
    # Add the undefined sequences to the library
    $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/famdb families '" .
                 $species . "' $cOpt --ancestors --descendants --include-class-in-name --stage 0 -f $format";
    if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
      $famdbCmd .= " --add-reverse-complement";
    }
    $famdbCmd .= " >> $outFile";
    system($famdbCmd);
    if ( $? ) {
      die "Could not execute famdb.py using: $famdbCmd\n";
    }
  }

  if ( `grep -v -c "^#" $outFile` == 0 ) {
    print "createLibFromFamdb: Removing empty library $outFile\n" if ( $DEBUG );
    unlink( $outFile );
  }
  else {
    my ( $outFileVol, $outFileDir, $outFileBasename ) =
        File::Spec->splitpath( $outFile );
    $outFileDir = "." if ( $outFileDir eq "" );

    if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
      system(   "$SETDB_PRGM $outFile > "
              . "$outFileDir/setdb.log 2>&1" ) == 0
          or die "RepeatMasker::createLib(): Error invoking setdb on file "
          . "$outFile.  We tried using the setdb program ( "
          . "$SETDB_PRGM ).\n";
      unlink( $outFile ) unless ( $DEBUG );
      move( "$outFile.ahd", "$libName.ahd" );
      move( "$outFile.atb", "$libName.atb" );
      move( "$outFile.bsq", "$libName.bsq" );
    }
    elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
      system(   "$NCBIBLASTDB_PRGM -dbtype nucl "
              . "-in $outFile > $outFileDir/rmblastdb.log 2>&1" ) == 0
          or die "RepeatMasker::createLib(): Error invoking "
          . "$NCBIBLASTDB_PRGM"
          . " on file $outFile.\n";
    }
    elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
      #
      # With the addition of DR families we now have the potential of generating
      # families without any thresholds set.  We can generate thresholds for a
      # given genome size and E-Value using the HMM using the hmmstat program.
      # Ideally the genome size and E-Value would be parameters for RepeatMasker
      # but for now they are fixed at 3GB genome size ( typical for mammals ) and
      # the Dfam theoretical FDR E-value threshold of 0.02.  Below we calculate
      # this for all families in the library ( because it's fast ) and then 
      # add them only if a family is missing a GA.
      #
      system("mv $outFile $outFile.preThresh");
      open IN,"<$outFile.preThresh" or die "Could not open library $outFile.preThresh for reading!\n";
      open OUT,">$outFile" or die "Could not open library $outFile for writing!\n";
      my $acc;
      my $preHeader;
      my $postHeader;
      my $beforeChecksum = 1;
      my $hmm_maxl = 0;
      my $hmm_tau = 0;
      my $hmm_lambda = 0;
      my $hasGA = 0;
      # A typical mammalian genome is ~3GB.  Here we use the genome size
      # specified in MB using the HMMER convention.
      my $Zval = 3000;
      my $evalue = 0.02;
      while ( <IN> ) {
        if ( /^ACC\s+(\S+)/ ) {
          $acc = $1;
          $hmm_maxl = 0;
          $hmm_tau = 0;
          $hmm_lambda = 0;
        }
        if ( /^MAXL\s+(\d+)/ ) {
          $hmm_maxl = $1;
        }
        if ( /^STATS\s+LOCAL\s+FORWARD\s+([\-\d\.]+)\s+([\-\d\.]+)/ ) {
          $hmm_tau = $1;
          $hmm_lambda = $2;
        }
        if ( /^GA\s+\d+\.\d+;/ ) {
          $hasGA = 1;
        }
        if ( $beforeChecksum ) {
          $preHeader .= $_;
        }else {
          $postHeader .= $_;
        }
        if ( /^CKSUM/ ) {
          $beforeChecksum = 0;
        }
        if ( /^\/\// ) {
          print OUT $preHeader;
          if ( $hasGA == 0 ){
            die "Could not find HMM Lambda is zero in accession $acc!\n" if ( $hmm_lambda == 0 );
            die "Could not find HMM MAXL value for $acc!\n" if ( $hmm_maxl == 0 );
            # From eval2score() originally in hmmstat and now in dfthresh ( see dfthresh for details )
            my $nseq = ($Zval * 1000000) / $hmm_maxl;
            my $sc = $hmm_tau - ((1/$hmm_lambda) * log($evalue/$nseq));
            print OUT "GA    " . sprintf("%0.2f",$sc) . ";\n";
            print OUT "TC    " . sprintf("%0.2f",$sc) . ";\n";
            print OUT "NC    " . sprintf("%0.2f",$sc) . ";\n";
          }
          print OUT $postHeader;
          $hasGA = 0;
          $acc = "";
          $preHeader = "";
          $postHeader = "";
          $beforeChecksum = 1;
        }
      }
      close IN;
      close OUT;

      if ( -s $outFile ) {
        unlink("$outFile.preThresh");
      }

      system(   "$HMMPRESS_PRGM"
              . " $outFile > $outFileDir/hmmPress.log 2>&1" ) == 0
          or die "RepeatMasker::createLib(): Error invoking "
          . "$HMMPRESS_PRGM on file "
          . "$outFile.\n";
    }
  }
}

1;
