TFBS::PatternGen
Elph
Summary
TFBS::PatternGen::Elph - a pattern factory that uses the Elph program
Package variables
No package variables defined.
Included modules
Bio::Seq
Bio::SeqIO
File::Temp qw ( :POSIX )
Inherit
Synopsis
my $patterngen =
TFBS::PatternGen::Elph->new(-seq_file=>'sequences.fa',
-binary => '/Elph/elph'
-motif_length => [8, 9, 10],
-additional_params => '-x -r -e');
my $pfm = $patterngen->pattern(); # $pfm is now a TFBS::Matrix::PFM object
Description
TFBS::PatternGen::Gibbs builds position frequency matrices
using an advanced Gibbs sampling algorithm implemented in external
Gibbs program by Chip Lawrence. The algorithm can produce
multiple patterns from a single set of sequences.
Methods
Methods description
Title : new Usage : my $db = TFBS::PatternGen::Gibbs->new(%args); Function: the constructor for the TFBS::PatternGen::Gibbs object Returns : a TFBS::PatternGen::Gibbs object Args : This method takes named arguments; you must specify one of the following three -seq_list # a reference to an array of strings # and/or Bio::Seq objects # or -seq_stream # A Bio::SeqIO object # or -seq_file # the name of the fasta file containing # all the sequences Other arguments are: -binary # a fully qualified path to Gibbs executable # OPTIONAL: default 'Gibbs' -nr_hits # a presumed number of pattern occurences in the # sequence set: it can be a single integer, e.g. # -nr_hits => 24 , or a reference to an array of # integers, e.g -nr_hits => [12, 24, 36] -motif_length # an expected length of motif in nucleotides: # it can be a single integer, e.g. # -motif_length => 8 , or a reference to an # array ofintegers, e.g -motif_length => [8..12] -additional_params # a string containing additional # command-line switches for the # Gibbs program |
Methods code
_parse_elph_output | description | prev | next | Top |
sub _parse_elph_output
{ my ($self, $resultstring,$command_line) = @_;
if ($resultstring=~/^error/){
$self->throw ("Error running elp command:\n $command_line");
return;
}
(my $MAP)=$resultstring=~/MAP for motif: (.*) InfoPar=/;
($resultstring)=~s/.*Motif counts:\n//s;
my @array=split "\n",$resultstring;
my @matrix;
foreach (0..3){
my (@line)=split(/\s+/,$array[$_]);
shift @line;
push @matrix,\@line;
}
my $sites=$self->_site_props($resultstring);
my $motif =TFBS::PatternGen::Elph::Motif->new
(
-tags => {score=>$MAP}, -sites=>$sites,
-matrix =>\@ matrix
);
push @{ $self->{'motifs'} }, $motif;
} |
sub _run_elph
{ my $self = shift;
my $tmp_file = tmpnam();
my $outstream = Bio::SeqIO->new(-file=>">$tmp_file", -format=>"fasta");
foreach my $seqobj (@{ $self->{'seq_set'} } ) {
$outstream->write_seq($seqobj);
}
$outstream->close();
$self->{'additional_params'}=~s/-b//;
my $command_line =
$self->{'binary'}." ".
$tmp_file." ".
"LEN=".$self->{'motif_length_string'}." ".
$self->{'additional_params'}." 2>/dev/null";
my $resultstring = `$command_line`;
$self->_parse_elph_output($resultstring,$command_line);
return 1 } |
sub _site_props
{ my ($self,$resultstring)=@_;
my @sites;
my @array=split(/Seq\.no/,$resultstring);
my @sites_array=split "\n", $array[1];
foreach my $line(@sites_array){
next if $line=~/Pos/;
last if $line eq'';
my @site=split(/\s+/,$line);
my $nr=0;
$nr = 1 if $site[2]==1;
my $motif_seq=$site[4-$nr];
my $site = Bio::SeqFeature::Generic->new ( -start => $site[2],
-end => $site[2]+(length$motif_seq)-1,
-strand => 1,
-source => 'Elph',
-score => $site[-3],
);
foreach my $seq(@{$self->{'seq_set'}}){
if ($seq->id eq $site[-1]){ $site->attach_seq ($seq);
}
}
push (@sites,$site);
}
return\@ sites;
}
1; } |
sub new
{ my ($caller, %args) = @_;
my $self = bless {}, ref($caller) || $caller;
$self->{'motif_length_string'} =
($args{'-motif_length'}
? (ref($args{'-motif_length'})
? join(',', @{$args{'-motif_length'}})
: $args{'-motif_length'})
: 8 );
$self->{'additional_params'} =
($args{'-additional_params'}
? (ref($args{'-additional_params'})
? join(' ', @{$args{'-additional_params'}})
: $args{'-additional_params'})
: "" );
$self->{'binary'} = $args{'-binary'} || 'elph';
$self->{'motifs'} = [];
$self->_create_seq_set(%args) or die ('Error creating sequence set');
$self->_run_elph() or $self->throw("Error running elph.");
return $self; } |
General documentation
The three methods listed above are used for the retrieval of patterns,
and are common to all TFBS::PatternGen::* classes. Please
see
TFBS::PatternGen for details.