#!/usr/bin/perl -X
use strict;
use warnings;
use Bio::SeqIO;
use Getopt::Long;
use Bio::Phylo::Util::Logger ':levels';

# process command line arguments
my $verbosity = INFO;
my $infile;
my $outfile;

# http://virological.org/t/a-dynamic-nomenclature-for-sars-cov-2-to-assist-genomic-epidemiology/458
my $length = 29_000; 
GetOptions(
	'infile=s'  => \$infile,
	'outfile=s' => \$outfile,
	'verbose+'  => \$verbosity,
	'length=i'  => \$length,
);
Bio::Phylo::Util::Logger->new( 
	'-level' => $verbosity, 
	'-class' => 'main' 
);

# open file handle or use STDIN
my $in;
if ( $infile ) {
	INFO "reading from infile $infile";
	$in = Bio::SeqIO->new(
		'-format' => 'fasta',
		'-file'   => $infile,
	);
}
else {
	INFO "reading from STDIN";
	$in = Bio::SeqIO->new(
		'-format' => 'fasta',
		'-fh'     => \*STDIN,
	);
}

# open file handle or STDOUT
my $out;
if ( $outfile ) {
	INFO "writing to outfile $outfile";
	$out = Bio::SeqIO->new(
		'-format' => 'fasta',
		'-file'   => ">$outfile",
	);
}
else {
	INFO "writing to STDOUT";
	$out = Bio::SeqIO->new(
		'-format' => 'fasta',
		'-fh'     => \*STDOUT,
	);
}

# start filtering
while( my $seq = $in->next_seq ) {
	if ( length( $seq->seq ) >= $length ) {
		$out->write_seq( $seq );
	}
	DEBUG "skipping short seq ".$seq->id;
}

