#!/usr/bin/env perl
##---------------------------------------------------------------------------##
##  File:
##      @(#) configure
##  Author:
##      Robert Hubley <rhubley@systemsbiology.org>
##  Description:
##      Configuration utility for the RepeatMasker package.
##
#******************************************************************************
#* Copyright (C) Institute for Systems Biology 2003-2024 Developed by
#* Robert Hubley.
#*
#* This work is licensed under the Open Source License v2.1.  To view a copy
#* of this license, visit http://www.opensource.org/licenses/osl-2.1.php or
#* see the license.txt file contained in this distribution.
#*
###############################################################################

=head1 NAME

configure - Configure the RepeatMasker package

=head1 SYNOPSIS

  perl ./configure [options]

=head1 DESCRIPTION

  Interactive configuration script for RepeatMasker.
  Run from inside the RepeatMasker installation directory.

=head1 OPTIONS

=over 4

=item -perlbin <path>

Path to the perl interpreter to embed in script headers.

=item -trf_prgm <path>

Full path to the TRF (Tandem Repeat Finder) executable.

=item -rmblast_dir <path>

Path to the RMBlast bin directory.

=item -hmmer_dir <path>

Path to the HMMER bin directory.

=item -crossmatch_dir <path>

Path to the Crossmatch bin directory.

=item -abblast_dir <path>

Path to the ABBlast bin directory.

=item -famdb_dir <path>

Path to the FamDB installation directory (containing famdb.py).

=item -default_search_engine <name>

Default search engine: rmblast, hmmer, crossmatch, or abblast.

=back

=head1 SEE ALSO

RepeatMasker

=head1 COPYRIGHT

Copyright 2003-2024 Robert Hubley, Institute for Systems Biology

=head1 AUTHOR

Robert Hubley <rhubley@systemsbiology.org>

=cut

use strict;
use warnings;
use Config;
use Cwd;
use FindBin;
use File::Spec;
use File::Basename;
use Getopt::Long;
use Pod::Text;
use lib $FindBin::Bin;
use RepeatMaskerConfig;

$| = 1;

my $VERSION     = $RepeatMaskerConfig::VERSION;
my $INSTALL_DIR = $FindBin::RealBin;

##
## Command-line option processing
##
my %opts;
Getopt::Long::config( "noignorecase", "bundling_override" );
GetOptions( \%opts,
  'version',
  'perlbin=s',
  'trf_prgm=s',
  'rmblast_dir=s',
  'hmmer_dir=s',
  'crossmatch_dir=s',
  'abblast_dir=s',
  'famdb_dir=s',
  'default_search_engine=s',
) or usage();

if ( $opts{version} ) {
  print "RepeatMasker $VERSION\n";
  exit 0;
}

##
## Must be run from the installation directory
##
if ( getcwd() ne $INSTALL_DIR ) {
  die "\n  ERROR: configure must be run from inside the RepeatMasker\n"
    . "         installation directory:\n\n"
    . "           cd $INSTALL_DIR\n"
    . "           ./configure\n\n";
}

## Apply command-line overrides to configuration before any validation
RepeatMaskerConfig::resolveConfiguration( \%opts );
my $config = $RepeatMaskerConfig::configuration;

##
## Search engine definitions (must be declared before any phase that references them)
##
my @ENGINES = (
  { key     => 'RMBLAST_DIR',
    label   => 'RMBlast',
    defname => 'rmblast',
    probe   => 'rmblastn',
    url     => 'https://www.repeatmasker.org/rmblast/' },
  { key     => 'HMMER_DIR',
    label   => 'HMMER',
    defname => 'hmmer',
    probe   => 'nhmmer',
    url     => 'http://hmmer.org/' },
  { key     => 'CROSSMATCH_DIR',
    label   => 'Crossmatch',
    defname => 'crossmatch',
    probe   => 'cross_match',
    url     => 'http://www.phrap.org/' },
  { key     => 'ABBLAST_DIR',
    label   => 'ABBlast',
    defname => 'abblast',
    probe   => 'blastp',
    url     => 'https://blast.advbiocomp.com/' },
);

##
## Phase 1: Dependency checks (non-interactive, run before clearing screen)
##
check_dependencies();

##
## Phase 2: Fix perl shebang lines in all RepeatMasker scripts
##
my $perl = resolve_perl( $opts{perlbin} );
update_shebangs( $perl );

##
## Phase 3: Interactive configuration
##
system("clear");
print_banner();

configure_trf();
configure_search_engines();
configure_famdb();

##
## Phase 4: Save and summarise
##
RepeatMaskerConfig::updateConfigFile();
print_summary();

exit 0;

###########################################################################
## Subroutines
###########################################################################

sub usage {
  my $p = Pod::Text->new( sentence => 0, width => 78 );
  $p->output_fh(*STDOUT);
  $p->parse_file($0);
  exit 1;
}

sub print_banner {
  my $w = 62;
  print "=" x $w . "\n";
  printf " %-*s\n", $w - 2, " RepeatMasker v$VERSION  --  Configuration";
  print "=" x $w . "\n\n";
}

sub section_header {
  my ($title) = @_;
  print "\n" . "-" x 62 . "\n";
  print "  $title\n";
  print "-" x 62 . "\n";
}

sub status_ok   { printf "  [ OK ]  %s\n", $_[0] }
sub status_info { printf "  [    ]  %s\n", $_[0] }
sub status_warn { printf "  [WARN]  %s\n", $_[0] }

##
## Read a line from STDIN, strip newline, return $default if blank.
##
sub prompt {
  my ( $message, $default ) = @_;
  if ( defined $default && $default ne "" ) {
    print "  $message [$default]: ";
  }
  else {
    print "  $message: ";
  }
  my $answer = <STDIN> // "";
  $answer =~ s/[\r\n]+$//;
  return ( $answer eq "" && defined $default ) ? $default : $answer;
}

##
## Prompt for Y/N. Returns 1 for yes, 0 for no.
##
sub prompt_yn {
  my ( $message, $default ) = @_;
  $default //= "n";
  my $hint = ( lc($default) eq "y" ) ? "[Y/n]" : "[y/N]";
  my $answer;
  do {
    print "  $message $hint: ";
    $answer = <STDIN> // "";
    $answer =~ s/[\r\n]+$//;
    $answer = $default if $answer eq "";
  } while ( $answer !~ /^[YyNn]$/i );
  return $answer =~ /^[Yy]$/i ? 1 : 0;
}

##
## Search PATH for an executable. Returns full path or "".
##
sub find_in_path {
  my ($name) = @_;
  for my $dir ( split /:/, ( $ENV{PATH} // "" ) ) {
    my $full = "$dir/$name";
    return $full if -f $full && -x $full;
  }
  return "";
}

##
## Resolve the perl interpreter to use for shebang lines.
##
sub resolve_perl {
  my ($perlbin) = @_;

  my $perl = $^X;

  if ( $perlbin ) {
    if ( -d $perlbin ) {
      for my $candidate ( "$perlbin/perl", "$perlbin/bin/perl" ) {
        if ( -f $candidate && -x $candidate ) {
          $perl = $candidate;
          last;
        }
      }
    }
    elsif ( -f $perlbin && -x $perlbin ) {
      $perl = $perlbin;
    }
    else {
      die "\n  ERROR: -perlbin '$perlbin' does not point to a perl executable.\n\n";
    }
  }

  unless ( File::Spec->file_name_is_absolute($perl) ) {
    $perl = $Config{perlpath} . ( $Config{_exe} // "" );
  }

  die "\n  ERROR: Cannot determine an absolute path for the perl interpreter.\n"
    . "         Try: ./configure -perlbin /path/to/perl\n\n"
    unless File::Spec->file_name_is_absolute($perl) && -x $perl;

  return $perl;
}

##
## Check required Perl modules and minimum versions.
##
sub check_dependencies {
  section_header("Checking Dependencies");

  ## Perl version
  if ( $] < 5.008 ) {
    print "  [ FAIL ]  Perl $] detected -- version 5.8 or higher is required.\n\n";
    exit 1;
  }
  status_ok( "Perl " . sprintf( "%vd", $^V ) );

  ## Required modules
  my @required = qw( Tie::File Getopt::Long POSIX File::Copy
                     File::Path Data::Dumper Cwd Storable );
  my @missing;
  for my $mod ( @required ) {
    push @missing, $mod unless eval "require $mod; 1";
  }
  if ( @missing ) {
    print "  [ FAIL ]  Missing Perl modules: " . join( ", ", @missing ) . "\n";
    print "\n  Install via CPAN or your system package manager, then re-run configure.\n\n";
    exit 1;
  }
  status_ok("Required Perl modules present");

  ## Scalar::Util must be the XS-compiled version
  unless ( eval "use Scalar::Util qw(weaken); 1" ) {
    print "  [ FAIL ]  Scalar::Util is not compiled with XS support.\n";
    print "\n  Please reinstall Scalar::Util from CPAN and re-run configure.\n\n";
    exit 1;
  }
  status_ok("Scalar::Util (XS)");

  ## Storable minimum version
  require Storable;
  if ( $Storable::VERSION < 2.06 ) {
    print "  [ FAIL ]  Storable $Storable::VERSION -- version 2.06 or higher required.\n\n";
    exit 1;
  }
  status_ok("Storable $Storable::VERSION");
}

##
## Rewrite the perl shebang line in every RepeatMasker script.
##
sub update_shebangs {
  my ($perl) = @_;

  section_header("Perl Interpreter");
  status_ok($perl);

  my @scripts = qw(
    RepeatMasker
    ProcessRepeats
    RepeatProteinMask
    DupMasker
    util/calcDivergenceFromAlign.pl
    util/createRepeatLandscape.pl
    util/maskFile.pl
    util/rmOutToGFF3.pl
    util/buildRMLibFromEMBL.pl
    util/rmToUCSCTables.pl
  );

  my $perlEsc = $perl;
  $perlEsc =~ s/\//\\\//g;

  for my $script ( @scripts ) {
    if ( -s $script ) {
      system( qq($perl -i -0pe 's/^#\\!.*perl.*/#\\!$perlEsc/g;' $script) );
    }
    else {
      die "\n  ERROR: $script is missing from the RepeatMasker distribution.\n\n";
    }
  }
}

##
## Configure TRF (Tandem Repeat Finder).
##
sub configure_trf {
  section_header("TRF - Tandem Repeat Finder");
  print "\n  TRF identifies simple tandem repeats and is required for all runs.\n";
  print "  https://github.com/Benson-Genomics-Lab/TRF\n\n";

  # Accept CLI value without prompting
  if ( $opts{trf_prgm} ) {
    if ( RepeatMaskerConfig::validateParam('TRF_PRGM') ) {
      status_ok( $config->{'TRF_PRGM'}->{'value'} );
      return;
    }
    status_warn("Specified TRF path is not valid: $opts{trf_prgm}");
    print "\n";
  }

  # Seed the prompt with the current configured value or a PATH search
  my $current = $config->{'TRF_PRGM'}->{'value'};
  $current    = find_in_path("trf") if !$current || !-x $current;

  while (1) {
    my $trf = prompt( "Full path to TRF executable", $current );
    if ( -f $trf && -x $trf ) {
      $config->{'TRF_PRGM'}->{'value'} = $trf;
      status_ok($trf);
      return;
    }
    print "\n  '$trf' does not exist or is not executable.\n\n";
    $current = $trf;
  }
}

##
## Configure search engines.
##
sub configure_search_engines {
  section_header("Search Engines");
  print "\n  At least one search engine is required.\n";
  print "  RMBlast is recommended for most uses.\n";

  # Non-interactive path: all engines given on command line
  if ( $opts{rmblast_dir} || $opts{hmmer_dir} ||
       $opts{crossmatch_dir} || $opts{abblast_dir} ) {
    my @valid = grep { RepeatMaskerConfig::validateParam( $_->{key} ) } @ENGINES;
    if ( !@valid ) {
      print "\n  ERROR: None of the specified engine directories are valid.\n\n";
      exit 1;
    }
    _auto_set_default( \@valid );
    print "\n";
    _print_engine_status();
    return;
  }

  # Interactive menu
  my $done = 0;
  while ( !$done ) {
    print "\n";
    my $i = 1;
    for my $e ( @ENGINES ) {
      my $val     = $config->{ $e->{key} }->{'value'} // "";
      my $default = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "";
      my $tag     = RepeatMaskerConfig::validateParam( $e->{key} )
                  ? ( lc($default) eq $e->{defname} ? " [DEFAULT]" : " [configured]" )
                  : "";
      my $display = RepeatMaskerConfig::validateParam( $e->{key} ) ? $val : "(not configured)";
      printf "  %d. %-12s  %s%s\n", $i++, $e->{label}, $display, $tag;
    }
    print "  $i. Done\n";

    my $sel = prompt("\n  Select an engine to configure, or $i to finish", "");
    $sel =~ s/\s+//g;

    if ( $sel =~ /^\d+$/ && $sel >= 1 && $sel < $i ) {
      _configure_one_engine( $ENGINES[$sel - 1] );
    }
    elsif ( $sel eq "$i" ) {
      my @valid = grep { RepeatMaskerConfig::validateParam( $_->{key} ) } @ENGINES;
      if ( !@valid ) {
        print "\n  At least one search engine must be configured.\n";
        next;
      }
      _ensure_default( \@valid );
      $done = 1;
    }
    else {
      print "\n  Invalid selection.\n";
    }
  }

  print "\n";
  _print_engine_status();
}

##
## Prompt for and validate one engine's directory.
##
sub _configure_one_engine {
  my ($e) = @_;

  # Seed from current config or PATH
  my $current = $config->{ $e->{key} }->{'value'} // "";
  if ( !$current || !RepeatMaskerConfig::validateParam( $e->{key} ) ) {
    my $found = find_in_path( $e->{probe} );
    $current  = dirname($found) if $found;
  }

  print "\n  Configuring $e->{label}\n";
  print "  $e->{url}\n\n";

  my $expected = join( ", ", @{ $config->{ $e->{key} }->{'expected_binaries'} } );

  while (1) {
    my $dir = prompt( "Path to $e->{label} bin directory", $current );
    $config->{ $e->{key} }->{'value'} = $dir;
    if ( RepeatMaskerConfig::validateParam( $e->{key} ) ) {
      # First engine or no default yet → make it the default automatically
      my $cur_default = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "";
      my @already_valid = grep {
        $_->{key} ne $e->{key} && RepeatMaskerConfig::validateParam( $_->{key} )
      } @ENGINES;

      if ( !$cur_default || !@already_valid ) {
        $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $e->{defname};
      }
      elsif ( $cur_default ne $e->{defname} ) {
        if ( prompt_yn( "Make $e->{label} the default search engine?", "n" ) ) {
          $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $e->{defname};
        }
      }
      return;
    }
    print "\n  Could not find required binaries in '$dir'.\n";
    print "  Expected: $expected\n\n";
  }
}

##
## If the current default engine is not valid, pick the first valid one.
##
sub _auto_set_default {
  my ($valid_ref) = @_;
  my $cur = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "";
  my $still_valid = grep { $_->{defname} eq lc($cur) } @$valid_ref;
  unless ( $still_valid ) {
    $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $valid_ref->[0]{defname};
  }
}

##
## After the menu, ensure a valid default is set; prompt if ambiguous.
##
sub _ensure_default {
  my ($valid_ref) = @_;
  my $cur = lc( $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "" );
  my $still_valid = grep { $_->{defname} eq $cur } @$valid_ref;

  return if $still_valid;

  if ( @$valid_ref == 1 ) {
    $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $valid_ref->[0]{defname};
    return;
  }

  print "\n  Multiple engines are configured. Select the default:\n\n";
  my $i = 1;
  for my $e ( @$valid_ref ) {
    printf "  %d. %s\n", $i++, $e->{label};
  }
  while (1) {
    my $d = prompt( "Default engine", "1" );
    if ( $d =~ /^\d+$/ && $d >= 1 && $d <= @$valid_ref ) {
      $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $valid_ref->[$d - 1]{defname};
      return;
    }
    print "  Invalid selection.\n";
  }
}

sub _print_engine_status {
  my $default = lc( $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "" );
  for my $e ( @ENGINES ) {
    next unless RepeatMaskerConfig::validateParam( $e->{key} );
    my $tag = ( $e->{defname} eq $default ) ? " [DEFAULT]" : "";
    status_ok( "$e->{label}: " . $config->{ $e->{key} }->{'value'} . $tag );
  }
}

##
## Optionally configure FamDB.
##
sub configure_famdb {
  section_header("FamDB  (optional, but highly recommended)");
  print "\n  FamDB provides access to the Dfam database for taxonomy-based\n";
  print "  repeat searches (-species option). Without it, only custom\n";
  print "  library searches (-lib) are supported.\n";
  print "\n  https://github.com/Dfam-consortium/FamDB\n\n";

  # Non-interactive: -famdb_dir supplied on command line
  if ( $opts{famdb_dir} ) {
    if ( RepeatMaskerConfig::validateParam('FAMDB_DIR') ) {
      status_ok( "FamDB: " . $config->{'FAMDB_DIR'}->{'value'} );
      return;
    }
    status_warn("Specified FamDB directory is not valid: $opts{famdb_dir}");
    print "  famdb.py must exist and be executable in that directory.\n\n";
    # Fall through to interactive prompt
  }

  my $current   = $config->{'FAMDB_DIR'}->{'value'} // "";
  my $currently_valid = $current && RepeatMaskerConfig::validateParam('FAMDB_DIR');

  if ( $currently_valid ) {
    print "  Currently configured: $current\n\n";
    if ( prompt_yn( "Keep this FamDB configuration?", "y" ) ) {
      status_ok("FamDB: $current");
      return;
    }
    print "\n";
    unless ( prompt_yn( "Configure a different FamDB installation?", "y" ) ) {
      $config->{'FAMDB_DIR'}->{'value'} = "";
      status_info("FamDB not configured. Only -lib searches will be available.");
      return;
    }
  }
  else {
    unless ( prompt_yn( "Configure FamDB now?", "n" ) ) {
      $config->{'FAMDB_DIR'}->{'value'} = "";
      status_info("FamDB not configured. Only -lib searches will be available.");
      return;
    }
  }

  print "\n";
  while (1) {
    my $dir = prompt( "Path to FamDB installation (directory containing famdb.py)", $current );
    $config->{'FAMDB_DIR'}->{'value'} = $dir;
    if ( RepeatMaskerConfig::validateParam('FAMDB_DIR') ) {
      status_ok("FamDB: $dir");
      return;
    }
    print "\n  famdb.py not found or not executable in '$dir'.\n\n";
    $current = $dir;
  }
}

##
## Print a final summary of the installed configuration.
##
sub print_summary {
  my $w = 62;
  print "\n" . "=" x $w . "\n";
  printf " %-*s\n", $w - 2, " Configuration Summary";
  print "=" x $w . "\n\n";

  my $trf     = $config->{'TRF_PRGM'}->{'value'};
  my $default = lc( $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "" );

  printf "  %-14s  %s\n", "Libraries:",   "$INSTALL_DIR/Libraries";
  printf "  %-14s  %s\n", "TRF:",         $trf;

  my %defname_to_key = map { $_->{defname} => $_ } @ENGINES;
  for my $e ( @ENGINES ) {
    next unless RepeatMaskerConfig::validateParam( $e->{key} );
    my $tag = ( $e->{defname} eq $default ) ? "  [DEFAULT]" : "";
    printf "  %-14s  %s%s\n", "$e->{label}:", $config->{ $e->{key} }->{'value'}, $tag;
  }

  if ( RepeatMaskerConfig::validateParam('FAMDB_DIR') ) {
    my $famdb_dir = $config->{'FAMDB_DIR'}->{'value'};
    printf "  %-14s  %s\n", "FamDB:", $famdb_dir;

    my $info = `$famdb_dir/famdb.py info 2>/dev/null` // "";
    if ( $info ) {
      my ($db_name, $db_ver, $db_date, $db_count);
      $db_name  = $1 if $info =~ /^Database\s*:\s*(\S.*)/m;
      $db_ver   = $1 if $info =~ /^Version\s*:\s*(\S.*)/m;
      $db_date  = $1 if $info =~ /^Date\s*:\s*(\S.*)/m;
      $db_count = $1 if $info =~ /^Total consensus sequences present\s*:\s*(\d+)/m;
      $db_count = $1 if !$db_count && $info =~ /^Total HMMs present\s*:\s*(\d+)/m;
      if ( $db_name ) {
        printf "  %-14s  %s %s (%s)\n", "",
          $db_name, $db_ver // "", $db_date // "";
        printf "  %-14s  %s families\n", "", _commify($db_count) if $db_count;
      }
    }
  }
  else {
    printf "  %-14s  (not configured -- only -lib searches available)\n", "FamDB:";
  }

  print "\n" . "=" x $w . "\n";
  printf " %-*s\n", $w - 2, " RepeatMasker v$VERSION is configured and ready.";
  print "=" x $w . "\n\n";
}

sub _commify {
  my $n = reverse( $_[0] // 0 );
  $n =~ s/(\d{3})(?=\d)/$1,/g;
  return scalar reverse $n;
}

1;
