TFBS::PatternGen
AnnSpec
Summary
TFBS::PatternGen::AnnSpec - a pattern factory that uses the AnnSpec program (version 2.1)
Package variables
No package variables defined.
Included modules
Bio::Seq
Bio::SeqIO
File::Temp qw ( :POSIX )
Inherit
Synopsis
my $patterngen =
TFBS::PatternGen::AnnSpec->new(-seq_file=>'sequences.fa',
-binary => 'ann-spec '
my $pfm = $patterngen->pattern(); # $pfm is now a TFBS::Matrix::PFM object
Description
TFBS::PatternGen::AnnSpec builds position frequency matrices
using an external program AnnSpec (Workman, C. and Stormo, G.D. (2000) ANN-Spec: A method for discovering transcription factor binding sites with improved specificity. Proc. Pacific Symposium on Biocomputing 2000).
Methods
_parse_AnnSpec_output | No description | Code |
_parse_raw_matrix | No description | Code |
_parse_sites | No description | Code |
_run_AnnSpec | No description | Code |
new | Description | Code |
Methods description
Title : new Usage : my $pattrengen = TFBS::PatternGen::AnnSpec->new(%args); Function: the constructor for the TFBS::PatternGen::AnnSpec object Returns : a TFBS::PatternGen::AnnSpec object Args : This method takes named arguments; you must specify one of the following three -seq_list # a reference to an array of strings # and/or Bio::Seq objects # or -seq_stream # A Bio::SeqIO object # or -seq_file # the name of the fasta file containing # all the sequences Other arguments are: -binary # a fully qualified path to the 'meme' executable # OPTIONAL: default 'ann-spec' -additional_params # a string containing additional # command-line switches for the # ann-spec program |
Methods code
_parse_AnnSpec_output | description | prev | next | Top |
sub _parse_AnnSpec_output
{ my ($self,$resultstring,$command_line)=@_;
if ($resultstring eq''){
$self->throw ("Error running AnnSpec using command:\n $command_line");
return;
}
my ($consensus,$matrix)=$self->_parse_raw_matrix($resultstring);
my ($score,$sites)=$self->_parse_sites($resultstring);
for(my $x = 0; $x < scalar(@$consensus); $x++){
my $motif =TFBS::PatternGen::AnnSpec::Motif->new
(
-tags => {consensus => $consensus->[$x],
score=>$score->[$x]},
-nr_hits => 1,
-sites=>$sites->[$x],
-matrix => $matrix->[$x]
);
push @{ $self->{'motifs'} }, $motif;
}
return } |
sub _parse_raw_matrix
{ my ($self,$string)=@_;
my (@pfms, @consensus);
foreach my $sub_string (split /REPORTING/, $string){
my ($ma)=$sub_string=~/RUN\s+WEIGHTS_CONS.*ALR\s+\/.*ALR\s+\#.*(ALR.*\nALR.*\nALR.*\nALR.*\s+\d+\n)ALR\s+=+.*/s;
my ($con)=$sub_string=~/WEIGHTS_CONS\s+(.*)\n/;
if($ma){
my @matrix=split("\n",$ma);
my @pfm;
foreach my $row(@matrix){
my @row=split /\s+/, $row;
push @pfm, [@row[2..scalar@row-1]];
}
push @pfms,\@ pfm;
push @consensus, $con;
}
}
return\@ consensus,\@ pfms;
}
1; } |
sub _parse_sites
{ my ($self,$string)=@_;
my (@hits, @scores);
foreach my $substring (split /REPORTING/, $string ){
my @sub_hits;
my ($sites)=$substring=~/STR\s+n.*seq\n(.*)RUN\s+ALIGNMENT.*/s;
my ($average)=$substring=~/RUN INFORMATION_CONTENT\s+(\d*\.*\d*)/;
my ($score)=$substring=~/RUN\s+SCORE\s+(\d*\.*\d*)/;
if($sites){
my @sites=split/\n/,$sites;
foreach my $site (@sites){
my @site_array=split(/\s+/,$site);
my ($seq_id)=$site_array[6]=~/>(.*)/;
my $strand=1;
$strand=-1 if $site_array[3]=~/\'/; my ($start)=$site_array[3]=~/(\d+)/;
my $site = Bio::SeqFeature::Generic->new ( -start => $start,
-end => $start+(length$site_array[4])-1,
-strand => $strand,
-source => 'AnnSpec',
-score => $site_array[2],
);
foreach my $seq(@{$self->{'seq_set'}}){
if ($seq->id eq $seq_id){
$site->attach_seq ($seq);
}
}
push (@sub_hits,$site);
}
push @scores, $score;
push @hits,\@ sub_hits;
}
}
return\@ scores,\@hits; } |
sub _run_AnnSpec
{ my ($self)=shift;
my $tmp_file = tmpnam();
my $outstream = Bio::SeqIO->new(-file=>">$tmp_file", -format=>"fasta");
foreach my $seqobj (@{ $self->{'seq_set'} } ) {
$outstream->write_seq($seqobj);
}
$outstream->close();
my $command_line =
$self->{'binary'}." ".
"-f ".$tmp_file." ".
$self->{'additional_params'}.
"";
my $resultstring = `$command_line`;
print "$resultstring\n";
$self->_parse_AnnSpec_output($resultstring,$command_line);
unlink $tmp_file;
return 1 } |
sub new
{ my ($caller, %args) = @_;
my $self = bless {}, ref($caller) || $caller;
$self->{'filename'} =$args{'-seq_file'};
$self->{'additional_params'} =
($args{'-additional_params'}
? (ref($args{'-additional_params'})
? join(' ', @{$args{'-additional_params'}})
: $args{'-additional_params'})
: "" );
$self->{'binary'} = $args{'-binary'} || 'annspec';
$self->_create_seq_set(%args) or die ('Error creating sequence set');
$self->_run_AnnSpec() or $self->throw("Error running AnnSpec.");
return $self; } |
General documentation
Please send bug reports and other comments to the author.
AUTHOR - Wynand Alkema | Top |
The three methods listed above are used for the retrieval of patterns,
and are common to all TFBS::PatternGen::* classes. Please
see
TFBS::PatternGen for details.