fastq2fasta 转换小脚本

一个小工具,主要针对 小RNA 数据操作使用,将 fastq reads 转换为 fasta 。

#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use File::Basename;

my ( $fastq, $out, $pre, $type, $help );

GetOptions(
    "fq=s"   => \$fastq,
    "out:s"  => \$out,
    "pre:s"  => \$pre,
    "type:s" => \$type,
    "help|h!"=> \$help
);

die &usage if (!defined $fastq || defined $help );

$out ||= "Result/out"; $pre ||= "seq";
$type ||= "1";
my $outdir = dirname( $out );
system ("mkdir -p $outdir");

## convert fastq to fasta and remove redundancy reads
## seq_id: seq_0000001_x345 

my ($seq, $total, $unique ) = &reads_counter($fastq);
my $fo = scalar ( length ($unique) ); 
my $ff = "%0".$fo."d";
my $i = 0;

open O1, ">$out.convert.fa" || die $!;
foreach my $k ( keys %$seq ){

    $i++;
    my $cnt = $seq->{$k};
    my $id;

    if ($type == 1){
        $id = $pre."_".sprintf ("$ff", $i)."_x$cnt";
    } elsif ($type == 2){
        $id = $pre.sprintf("$ff",$i)."\t$cnt";
    } else {
        print STDERR "ERROR! -type options must be 1 or 2.";
        print STDERR "1 for [{$pre}xxx_00002_x345] 2 for [{$pre}00002\t345]\n";
        exit;
    }

    print O1 ">$id\n$k\n";
}
close O1;

open STAT, ">$out.Reads.stat" || die $!;
print STAT "total_reads\tunique_reads\n";
print STAT "$total\t$unique\n";
close STAT;

## =============================== SUB MODULE ============================ ##

sub reads_counter{
    
    use PerlIO::gzip;
    my $infile = shift @_;
    
    if ( $infile =~ /\.gz$/ ) {
        open FQ, "){
        chomp;
        $line++;
        if ($line == 2){
            $total_reads++;
            $seq{$_}++;
        } elsif ($line == 4){
            $line = 0;
        } else {
            next;
        }
    }
    close FQ;

    my $unique_reads = scalar ( keys %seq );

    return ( \%seq, $total_reads, $unique_reads );
}


sub usage{

    my $name = basename($0);

    print STDERR <

你可能感兴趣的:(fastq2fasta 转换小脚本)