TFBS Matrix
SummaryIncluded librariesPackage variablesDescriptionGeneral documentationMethods
Summary
TFBS::Matrix - base class for matrix patterns, containing methods common
to all
Package variables
Privates (from "my" definitions)
%dna = ("A" => [12,0,0,0], "C" => [0,12,0,0], "G" => [0,0,12,0], "T" => [0,0,0,12], "U" => [0,0,0,12], "R" => [6,0,6,0], "Y" => [0,6,0,6], "M" => [6,6,0,0], "K" => [0,0,6,6], "W" => [6,0,0,6], "S" => [0,6,6,0], "B" => [0,4,4,4], "D" => [4,0,4,4], "H" => [4,4,0,4], "V" => [4,4,4,0], "N" => [3,3,3,3])
Included modules
PDL
TFBS::PatternI
Inherit
TFBS::PatternI
Synopsis
No synopsis!
Description
TFBS::Matrix is a base class consisting of universal constructor called by
its subclasses (TFBS::Matrix::*), and matrix manipulation methods that are
independent of the matrix type. It is not meant to be instantiated itself.
Methods
DESTROY
No description
Code
STAMPprintDescriptionCode
_load
No description
Code
_matrix_from_IUPAC_string
No description
Code
_matrix_from_STAMP_string
No description
Code
_matrix_from_string
No description
Code
_pdl_to_matrixref
No description
Code
_set_min_max_score
No description
Code
lengthDescriptionCode
matrixDescriptionCode
new
No description
Code
pdl_matrixDescriptionCode
prettyprintDescriptionCode
randomize_columnsDescriptionCode
rawprintDescriptionCode
revcomDescriptionCode
set_matrix
No description
Code
Methods description
STAMPprintcode    nextTop
 Title   : STAMPprint
Usage : my $STAMPstring = $pfm->STAMPprint();
Function: Convert the matrix to STAMP readable format
Returns : A string of the matrix in a TRANSFAC like format for STAMP
Args : none
lengthcodeprevnextTop
 Title   : length
Usage : my $pattern_length = $pfm->length;
Function: gets the pattern length in nucleotides
(i.e. number of columns in the matrix)
Returns : an integer
Args : none
matrixcodeprevnextTop
 Title   : matrix
Usage : my $matrix = $pwm->matrix();
$pwm->matrix( [ [12, 3, 0, 0, 4, 0],
[ 0, 0, 0,11, 7, 0],
[ 0, 9,12, 0, 0, 0],
[ 0, 0, 0, 1, 1,12]
]);
Function: get/set for the matrix data Returns : a reference to 2D array of integers(PFM) or floats (ICM, PWM) Args : none for get; a four line string, reference to 2D array, or a 2D piddle for set
pdl_matrixcodeprevnextTop
 Title   : pdl_matrix
Usage : my $pdl = $pwm->pdl_matrix();
Function: access the PDL matrix used to store the actual
matrix data directly
Returns : a PDL object, aka a piddle
Args : none
prettyprintcodeprevnextTop
 Title   : prettyprint
Usage : my $prettystring = $pfm->prettyprint();
Function: convert matrix data to a human-readable string format
Returns : a four-line string with nucleotides and aligned numbers
Args : none
randomize_columnscodeprevnextTop
 Title   : randomize_columns
Usage : $pfm->randomize_columns();
Function: Randomizes the columns of a matrix (in place).
Returns : Nothing
Args : none
rawprintcodeprevnextTop
 Title   : rawprint
Usage : my $rawstring = $pfm->rawprint);
Function: convert matrix data to a simple tab-separated format
Returns : a four-line string of tab-separated integers or floats
Args : none
revcomcodeprevnextTop
 Title   : revcom
Usage : my $revcom_pfm = $pfm->revcom();
Function: create a matrix pattern object which is reverse complement
of the current one
Returns : a TFBS::Matrix::* object of the same type as the one
the method acted upon
Args : none
Methods code
DESTROYdescriptionprevnextTop
sub DESTROY {
    # nothing
} 1;
}
STAMPprintdescriptionprevnextTop
sub STAMPprint {
    my $self = shift;
    my $pwmstring = sprintf ( $self->pdl_matrix );
    $pwmstring =~ s/\[|\]//g;                # lose []
$pwmstring =~ s/\n\s+/\n/g; # lose leading spaces
my @pwmlines = split(/\n+/, $pwmstring); @pwmlines = map { [ split(/\s+/, $_) ] } @pwmlines; $pwmstring = "DE ".$self->{'ID'}."\t".$self->{'name'}."\t".$self->{'class'}."\n"; for my $row (0..$#{ $pwmlines[1] }) { $pwmstring .= sprintf "%02d\t", $row; $pwmstring .= $pwmlines[$_]->[$row]."\t" for (1..$#pwmlines); $pwmstring .= "\n"; } $pwmstring .= "XX\n"; return $pwmstring;
}
_loaddescriptionprevnextTop
sub _load {
    my ($self, $field, $value) = @_;
    if (substr(ref($self->{db}),0,5) eq "DBI::")  {
	# database retrieval
} elsif (-d $self->{dbh}) { # retrieval from .pwm files in a directory
$self->_lookup_in_matrixlist($field, $value) or do { warn ("Matrix with $field=>$value not found."); return undef; }; my $ID = $self->{ID}; my $DIR = $self->{dbh}; $self->set_matrix(scalar `cat $DIR/$ID.pwm`); # FIXME - temporary

}
else {
$self->throw("-dbh is not a valid database handle or a directory.");
}
}
_matrix_from_IUPAC_stringdescriptionprevnextTop
sub _matrix_from_IUPAC_string {
    my ($self, $matrixstring) = @_;
    $matrixstring =~ s/^\s+//;
    $matrixstring =~ s/\s+$//;
    my @str = split(//, $matrixstring);
    my @matrix;

    for my $ltr (0..3) {
	my @l = map {$dna{uc($_)}->[$ltr]} @str;
	push @matrix,\@ l; 
    }

    return\@ matrix;
}
_matrix_from_STAMP_stringdescriptionprevnextTop
sub _matrix_from_STAMP_string {
    my ($self, $matrixstring) = @_;
    my @lines = split("\n", $matrixstring);
    my (@a, @c, @g, @t);

    # Remove garbage
shift(@lines); pop(@lines); for (@lines) { my @l = split; push @a, $l[1]; push @c, $l[2]; push @g, $l[3]; push @t, $l[4]; } return [\@a,\@ c,\@ g,\@ t];
}
_matrix_from_stringdescriptionprevnextTop
sub _matrix_from_string {
    my ($self, $matrixstring) = @_;
    my @array = ();
    foreach ((split "\n", $matrixstring)[0..3])  {
	s/^\s+//;
	s/\s+$//;
	push @array, [split];
    }
    return\@  array;
}
_pdl_to_matrixrefdescriptionprevnextTop
sub _pdl_to_matrixref {
    my ($matrixdata) = @_;
    unless ($matrixdata->isa("PDL")) {
	die "A non-PDL object passed to _pdl_to_matrixref";
    }
    my @list = list $matrixdata;
    my @array;
    my $matrix_width = scalar(@list) / 4;
for (0..3) { push @array, [splice(@list, 0, $matrix_width)]; } return\@ array;
}
_set_min_max_scoredescriptionprevnextTop
sub _set_min_max_score {
    my ($self) = @_;
    my $transpose = $self->pdl_matrix->xchg(0,1);
    $self->{min_score} = sum(minimum $transpose);
    $self->{max_score} = sum(maximum $transpose);
}
lengthdescriptionprevnextTop
sub length {
    my $self = shift;
    return $self->pdl_matrix->getdim(0);
}
matrixdescriptionprevnextTop
sub matrix {
    my ($self, $matrixdata) = @_;
    $self->set_matrix($matrixdata) if $matrixdata;
    return $self->{'matrix'};
}
newdescriptionprevnextTop
sub new {
    my $class = shift;
    my %args = @_;
    my $self = bless {}, ref($class) || $class;

    # first figure out how it was called
# we need (-dbh and (-ID or -name) for fetching it from a database
# or -matrix for direct matrix input
if (defined $args{'-matrix'}) { $self->set_matrix($args{'-matrix'}); } elsif (defined $args{'-matrixstring'}) { $self->set_matrix($args{'-matrixstring'}); } elsif (defined $args{-matrixfile}) { my $matrixstring; open (FILE, $args{-matrixfile}) or $self->throw("Could not open $args{-matrixfile}"); { local $/ = undef; $matrixstring = <FILE>; } $self->set_matrix($matrixstring); } else { $self->throw("No matrix or db object provided."); } # Set the object data.
# Parameters specified in constructor call override those
# fetched from the database.
$self->{'ID'} = ($args{-ID} or $self->{ID} or "Unknown"); $self->{'name'} = ($args{-name} or $self->{name} or "Unknown"); $self->{'class'} = ($args{-class} or $self->{class} or "Unknown"); $self->{'strand'} = ($args{-strand} or $self->{strand} or "+"); $self->{'bg_probabilities'} = ($args{'-bg_probabilities'} || {A => 0.25, C => 0.25, G => 0.25, T => 0.25}); $self->{'tags'} = $args{-tags} ? ((ref($args{-tags}) eq "HASH") ? $args{-tags} : {} ) :{}; return $self;
}
pdl_matrixdescriptionprevnextTop
sub pdl_matrix {
    pdl $_[0]->{'matrix'};
}
prettyprintdescriptionprevnextTop
sub prettyprint {
    my $self = shift;
    my $pwmstring = sprintf ( $self->pdl_matrix );
    $pwmstring =~ s/\[|\]//g;                # lose []
$pwmstring =~ s/\n /\n/g; # lose leading spaces
my @pwmlines = split("\n", $pwmstring); #
@pwmlines = ("A [$pwmlines[2] ]", "C [$pwmlines[3] ]", "G [$pwmlines[4] ]", "T [$pwmlines[5] ]"); $pwmstring = join ("\n", @pwmlines)."\n"; return $pwmstring;
}
randomize_columnsdescriptionprevnextTop
sub randomize_columns {
    my $self = shift;
    my $matrix = $self->{'matrix'};
    my $i = 0;

    # Schwartzian Transform to get random permutation
map { ( undef, $$matrix[0][$i], $$matrix[1][$i], $$matrix[2][$i], $$matrix[3][$i] ) = @$_; $i++; } sort { $a->[0] <=> $b->[0] } map { [ rand(), $$matrix[0][$_], $$matrix[1][$_], $$matrix[2][$_], $$matrix[3][$_] ] } ( 0 .. ($self->length()-1) );
}
rawprintdescriptionprevnextTop
sub rawprint {
    my $self = shift;
    my $pwmstring = sprintf ( $self->pdl_matrix );
    $pwmstring =~ s/\[|\]//g;                # lose []
$pwmstring =~ s/\n /\n/g; # lose leading spaces
my @pwmlines = split("\n", $pwmstring); # f
$pwmstring = join ("\n", @pwmlines[2..5])."\n"; return $pwmstring;
}
revcomdescriptionprevnextTop
sub revcom {
    my ($self) = @_;
    my $revcom_matrix =
	$self->new(-matrix => $self->pdl_matrix->slice('-1:0,-1:0'),
		   # the above line rotates the original matrix 180 deg,
-ID => ($self->{ID} or ""), -name => ($self->{name} or ""), -class => ($self->{class} or ""), -strand => ($self->{strand} and $self->{strand} eq "-") ? "+" : "-", -tags => ($self->{tags} or {}) ); return $revcom_matrix;
}
set_matrixdescriptionprevnextTop
sub set_matrix {
    my ($self, $matrixdata) = @_;

    # The input matrix (specified as -array=> in the constructir call
# can either be
# * a 2D regular perl array with 4 rows,
# * a piddle (FIXME - check for 4 rows), or
# * a four-line string of numbers
# print STDERR "MATRIX>>>".$matrixdata;
if (ref($matrixdata) eq "ARRAY" and ref($matrixdata->[0]) eq "ARRAY" and scalar(@{$matrixdata}) == 4) { # it is a perl array
$self->{'matrix'} = $matrixdata; } elsif (ref($matrixdata) eq "PDL") { # it's a piddle
$self->{matrix} = _pdl_to_matrixref($matrixdata); } elsif (!ref($matrixdata)) #and (scalar split "\n",$matrixdata) == 4)
{ # it's a string then, but what string?
if ($matrixdata =~ /^DE/) { # STAMP string
$self->{matrix} = $self->_matrix_from_STAMP_string($matrixdata); } elsif ($matrixdata =~ /[ACGTURYMKWSBDHVNacgturymkwsbdhvn]{3,}/) { $self->{matrix} = $self->_matrix_from_IUPAC_string($matrixdata); } else { # Regular string
$self->{matrix} = $self->_matrix_from_string($matrixdata); } } else { $self->throw("Wrong data type/format for -matrix.\n". "Acceptable formats are Array of Arrays (4 rows),\n". "PDL Array, (4 rows),\n". "or plain string (4 lines)."); } # $self->_set_min_max_score();
# print STDERR $self->prettyprint();
return 1;
}
General documentation
FEEDBACKTop
Please send bug reports and other comments to the author.
AUTHOR - Boris LenhardTop
Boris Lenhard <Boris.Lenhard@cgb.ki.se>
Modified by Eivind Valen eivind.valen@gmail.com
APPENDIXTop
The rest of the documentation details each of the object
methods. Internal methods are preceded with an underscore.