TFBS
Matrix
Summary
TFBS::Matrix - base class for matrix patterns, containing methods common
to all
Package variables
Privates (from "my" definitions)
%dna = ("A" => [12,0,0,0], "C" => [0,12,0,0], "G" => [0,0,12,0], "T" => [0,0,0,12], "U" => [0,0,0,12], "R" => [6,0,6,0], "Y" => [0,6,0,6], "M" => [6,6,0,0], "K" => [0,0,6,6], "W" => [6,0,0,6], "S" => [0,6,6,0], "B" => [0,4,4,4], "D" => [4,0,4,4], "H" => [4,4,0,4], "V" => [4,4,4,0], "N" => [3,3,3,3])
Included modules
Inherit
Synopsis
No synopsis!
Description
TFBS::Matrix is a base class consisting of universal constructor called by
its subclasses (TFBS::Matrix::*), and matrix manipulation methods that are
independent of the matrix type. It is not meant to be instantiated itself.
Methods
Methods description
Title : STAMPprint Usage : my $STAMPstring = $pfm->STAMPprint(); Function: Convert the matrix to STAMP readable format Returns : A string of the matrix in a TRANSFAC like format for STAMP Args : none |
Title : length Usage : my $pattern_length = $pfm->length; Function: gets the pattern length in nucleotides (i.e. number of columns in the matrix) Returns : an integer Args : none |
Title : matrix Usage : my $matrix = $pwm->matrix(); $pwm->matrix( [ [12, 3, 0, 0, 4, 0], [ 0, 0, 0,11, 7, 0], [ 0, 9,12, 0, 0, 0], [ 0, 0, 0, 1, 1,12] ]);
Function: get/set for the matrix data
Returns : a reference to 2D array of integers(PFM) or floats (ICM, PWM)
Args : none for get;
a four line string, reference to 2D array, or a 2D piddle for set |
Title : pdl_matrix Usage : my $pdl = $pwm->pdl_matrix(); Function: access the PDL matrix used to store the actual matrix data directly Returns : a PDL object, aka a piddle Args : none |
Title : prettyprint Usage : my $prettystring = $pfm->prettyprint(); Function: convert matrix data to a human-readable string format Returns : a four-line string with nucleotides and aligned numbers Args : none |
Title : randomize_columns Usage : $pfm->randomize_columns(); Function: Randomizes the columns of a matrix (in place). Returns : Nothing Args : none |
Title : rawprint Usage : my $rawstring = $pfm->rawprint); Function: convert matrix data to a simple tab-separated format Returns : a four-line string of tab-separated integers or floats Args : none |
Title : revcom Usage : my $revcom_pfm = $pfm->revcom(); Function: create a matrix pattern object which is reverse complement of the current one Returns : a TFBS::Matrix::* object of the same type as the one the method acted upon Args : none |
Methods code
sub STAMPprint
{ my $self = shift;
my $pwmstring = sprintf ( $self->pdl_matrix );
$pwmstring =~ s/\[|\]//g; $pwmstring =~ s/\n\s+/\n/g; my @pwmlines = split(/\n+/, $pwmstring);
@pwmlines = map { [ split(/\s+/, $_) ] } @pwmlines;
$pwmstring = "DE ".$self->{'ID'}."\t".$self->{'name'}."\t".$self->{'class'}."\n";
for my $row (0..$#{ $pwmlines[1] }) {
$pwmstring .= sprintf "%02d\t", $row;
$pwmstring .= $pwmlines[$_]->[$row]."\t" for (1..$#pwmlines);
$pwmstring .= "\n";
}
$pwmstring .= "XX\n";
return $pwmstring; } |
sub _load
{ my ($self, $field, $value) = @_;
if (substr(ref($self->{db}),0,5) eq "DBI::") {
}
elsif (-d $self->{dbh}) {
$self->_lookup_in_matrixlist($field, $value)
or do {
warn ("Matrix with $field=>$value not found.");
return undef;
};
my $ID = $self->{ID};
my $DIR = $self->{dbh};
$self->set_matrix(scalar `cat $DIR/$ID.pwm`); # FIXME - temporary
} else { $self->throw("-dbh is not a valid database handle or a directory."); } } |
sub _matrix_from_IUPAC_string
{ my ($self, $matrixstring) = @_;
$matrixstring =~ s/^\s+//;
$matrixstring =~ s/\s+$//;
my @str = split(//, $matrixstring);
my @matrix;
for my $ltr (0..3) {
my @l = map {$dna{uc($_)}->[$ltr]} @str;
push @matrix,\@ l;
}
return\@ matrix; } |
sub _matrix_from_STAMP_string
{ my ($self, $matrixstring) = @_;
my @lines = split("\n", $matrixstring);
my (@a, @c, @g, @t);
shift(@lines);
pop(@lines);
for (@lines) {
my @l = split;
push @a, $l[1];
push @c, $l[2];
push @g, $l[3];
push @t, $l[4];
}
return [\@a,\@ c,\@ g,\@ t]; } |
sub _matrix_from_string
{ my ($self, $matrixstring) = @_;
my @array = ();
foreach ((split "\n", $matrixstring)[0..3]) {
s/^\s+//;
s/\s+$//;
push @array, [split];
}
return\@ array; } |
sub _pdl_to_matrixref
{ my ($matrixdata) = @_;
unless ($matrixdata->isa("PDL")) {
die "A non-PDL object passed to _pdl_to_matrixref";
}
my @list = list $matrixdata;
my @array;
my $matrix_width = scalar(@list) / 4; for (0..3) {
push @array, [splice(@list, 0, $matrix_width)];
}
return\@ array; } |
sub _set_min_max_score
{ my ($self) = @_;
my $transpose = $self->pdl_matrix->xchg(0,1);
$self->{min_score} = sum(minimum $transpose);
$self->{max_score} = sum(maximum $transpose); } |
sub length
{ my $self = shift;
return $self->pdl_matrix->getdim(0); } |
sub matrix
{ my ($self, $matrixdata) = @_;
$self->set_matrix($matrixdata) if $matrixdata;
return $self->{'matrix'}; } |
sub new
{ my $class = shift;
my %args = @_;
my $self = bless {}, ref($class) || $class;
if (defined $args{'-matrix'}) {
$self->set_matrix($args{'-matrix'});
}
elsif (defined $args{'-matrixstring'}) {
$self->set_matrix($args{'-matrixstring'});
}
elsif (defined $args{-matrixfile}) {
my $matrixstring;
open (FILE, $args{-matrixfile})
or $self->throw("Could not open $args{-matrixfile}");
{
local $/ = undef;
$matrixstring = <FILE>;
}
$self->set_matrix($matrixstring);
}
else {
$self->throw("No matrix or db object provided.");
}
$self->{'ID'} = ($args{-ID} or
$self->{ID} or
"Unknown");
$self->{'name'} = ($args{-name} or
$self->{name} or
"Unknown");
$self->{'class'} = ($args{-class} or
$self->{class} or
"Unknown");
$self->{'strand'} = ($args{-strand} or
$self->{strand} or
"+");
$self->{'bg_probabilities'} =
($args{'-bg_probabilities'} || {A => 0.25,
C => 0.25,
G => 0.25,
T => 0.25});
$self->{'tags'} = $args{-tags} ? ((ref($args{-tags}) eq "HASH") ? $args{-tags} : {} ) :{};
return $self; } |
sub pdl_matrix
{ pdl $_[0]->{'matrix'}; } |
sub prettyprint
{ my $self = shift;
my $pwmstring = sprintf ( $self->pdl_matrix );
$pwmstring =~ s/\[|\]//g; $pwmstring =~ s/\n /\n/g; my @pwmlines = split("\n", $pwmstring); @pwmlines = ("A [$pwmlines[2] ]",
"C [$pwmlines[3] ]",
"G [$pwmlines[4] ]",
"T [$pwmlines[5] ]");
$pwmstring = join ("\n", @pwmlines)."\n";
return $pwmstring; } |
sub randomize_columns
{ my $self = shift;
my $matrix = $self->{'matrix'};
my $i = 0;
map { ( undef, $$matrix[0][$i], $$matrix[1][$i], $$matrix[2][$i], $$matrix[3][$i] ) = @$_; $i++; }
sort { $a->[0] <=> $b->[0] }
map { [ rand(), $$matrix[0][$_], $$matrix[1][$_], $$matrix[2][$_], $$matrix[3][$_] ] } ( 0 .. ($self->length()-1) ); } |
sub rawprint
{ my $self = shift;
my $pwmstring = sprintf ( $self->pdl_matrix );
$pwmstring =~ s/\[|\]//g; $pwmstring =~ s/\n /\n/g; my @pwmlines = split("\n", $pwmstring); $pwmstring = join ("\n", @pwmlines[2..5])."\n";
return $pwmstring; } |
sub revcom
{ my ($self) = @_;
my $revcom_matrix =
$self->new(-matrix => $self->pdl_matrix->slice('-1:0,-1:0'),
-ID => ($self->{ID} or ""),
-name => ($self->{name} or ""),
-class => ($self->{class} or ""),
-strand => ($self->{strand} and $self->{strand} eq "-") ? "+" : "-",
-tags => ($self->{tags} or {}) );
return $revcom_matrix; } |
sub set_matrix
{ my ($self, $matrixdata) = @_;
if (ref($matrixdata) eq "ARRAY"
and ref($matrixdata->[0]) eq "ARRAY"
and scalar(@{$matrixdata}) == 4)
{
$self->{'matrix'} = $matrixdata;
}
elsif (ref($matrixdata) eq "PDL")
{
$self->{matrix} = _pdl_to_matrixref($matrixdata);
}
elsif (!ref($matrixdata))
{
if ($matrixdata =~ /^DE/) {
$self->{matrix} = $self->_matrix_from_STAMP_string($matrixdata);
} elsif ($matrixdata =~ /[ACGTURYMKWSBDHVNacgturymkwsbdhvn]{3,}/) {
$self->{matrix} = $self->_matrix_from_IUPAC_string($matrixdata);
} else {
$self->{matrix} = $self->_matrix_from_string($matrixdata);
}
}
else {
$self->throw("Wrong data type/format for -matrix.\n".
"Acceptable formats are Array of Arrays (4 rows),\n".
"PDL Array, (4 rows),\n".
"or plain string (4 lines).");
}
return 1; } |
General documentation
Please send bug reports and other comments to the author.
AUTHOR - Boris Lenhard | Top |
The rest of the documentation details each of the object
methods. Internal methods are preceded with an underscore.