Perl example code

From irefindex

These are code examples that accompany the MBV3070 Perl lectures.

Copy and paste the text to a text file called Exercise1.plx And then, in the command prompt, type

perl Exercise1.plx

to run the example script.


Exercise1.plx

#!/usr/bin/perl
use strict;
use warnings;

print "My first Perl program\n";
#try single quotes
print "First line\nsecond line and there is a tab\there\n";

Exercise2.plx

#!/usr/bin/perl
use strict;
use warnings;

#assign values to variables $x and $y and print them out
my $x = 4;
my $y = 2;
print "x is $x and y is $y\n";

#example of arithmetic expression
my $z = $x + $y**3;
$x++;
print "x is $x and z is $z\n";

#evaluating arithmetic expression within print command
print "add 3 to $z: $z + 3\n"; #did it work?
print "add 3 to $z:", $z + 3,"\n";

Exercise3.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Concatenate two given sequences, 
#find the length of the new sequence and
#print out the second codon of the sequence

#assign strings to variables
my $DNA = "GATTACACAT";
my $polyA = "AAAA";

#concatenate two strings
my $modifiedDNA = $DNA.$polyA;

#calculate the length of $modifiedDNA and
#print out the value of the variable and its length
my $DNAlength = length($modifiedDNA);
print "Modified DNA: $modifiedDNA has length $DNAlength\n";

#extract the second codon in $modifiedDNA
my $codon = substr($modifiedDNA,3,3);
print "Second codon is $codon\n";

Exercise4.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Ask the user for her name and age
#and calculate her age in days
#get a string from the keyboard
print "Please enter your name\n";
my $name = <STDIN>;
#getting rid of the new line character
#try leaving this line out
chomp($name); 
#prompt the user for his/her age
#get a number from the keyboard
print "$name please enter your age\n";
my $age = <>;
chomp($age);
#calculate age in days
my $age_in_days = $age*365;
print "You are $age_in_days days old\n";

Exercise5.plx

#!/usr/bin/perl
use strict;
use warnings;

#initialize an array
my @bases = ("A","C","G","T");

#print two elements of the array
print $bases[0],$bases[2],"\n";

#print the whole array
print @bases,"\n"; #try with double quotes

#print the number of elements in the array
print scalar(@bases),"\n";

Exercise6.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Count the frequency of base G
#in a given DNA sequence

my $DNA = "GATTACACAT";

#initialize $countG and $currentPos
my $countG = 0;
my $currentPos = 0;

#calculate the length of $DNA
my $DNAlength = length($DNA);

#for each letter in the sequence check if it is the base G
#if 'yes' increment $countG
while($currentPos < $DNAlength){
	my $base = substr($DNA,$currentPos,1);
	if($base eq "G"){
		$countG++;
	}
	$currentPos++;
} #end of while loop

#print out the number of Gs
print "There are $countG G bases\n";


Exercise7.plx

#!/usr/bin/perl
use strict;
use warnings;
my @array;
#initialize a 20-element array with numbers 0,...19
for(my $i=0;$i<20;$i++){
	$array[$i] = $i;
}

#print elements one-by-one using foreach
foreach my $element (@array){
	print "$element\n";
}

Exercise8.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: For a given DNA sequence find its RNA transcript,
#find its reverse complement and check if
#the reverse complement contains a start codon

my $DNA = "GATTACACAT";

#transcribe DNA to RNA - T changes to U
my $RNA = $DNA;
$RNA =~ s/T/U/g;
print "RNA sequence is $RNA\n";

#find the reverse complement of $DNA using substitution operator
#first - reverse the sequence
my $rcDNA = reverse($DNA);

$rcDNA =~ s/T/A/g;
$rcDNA =~ s/A/T/g;
$rcDNA =~ s/G/C/g;
$rcDNA =~ s/C/G/g;

print "Reverse complement of $DNA is $rcDNA\n"; #did it work?

#find the reverse complement of $DNA using translation operator
#first - reverse the sequence
$rcDNA = reverse($DNA);
#then - complement the sequence
$rcDNA =~ tr/ACGT/TGCA/;
#then - print the reverse complement
print "Reverse complement of $DNA is $rcDNA\n";

#look for a start codon in the reverse sequence
if($rcDNA =~ /ATG/){
	print "Start codon found\n";
}
else{
	print "Start codon not found\n";
}

Exercise9.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Make a subroutine that calculates
#the reverse
#complement of a DNA sequence and call it
#from the main program 

#body of the main program with the function call
my $DNA = "GATTACACAT";
my $rcDNA = revcomp($DNA); 
print "$rcDNA\n";
exit;
#definition of the function for reverse complement
sub revcomp{
	my($DNAin) = @_;
	my $DNAout  = reverse($DNAin);
	$DNAout =~ tr/ACGT/TGCA/;
	return $DNAout;
}


Exercise10.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Read DNA sequences from ‘DNAseq’ input file – 
#there is one sequence per line
#For each sequence find the reverse complement and 
#print it to ‘DNAseqRC’ output file

#open input and output files
open(IN,"DNAseq.txt");
open(OUT,">DNAseqRC.txt");

#read the input file line-by-line
#for each line find the reverse complement
#print it in the output file
while(<IN>){
	chomp;
	my $rcDNA = revcomp($_);
	print OUT "$rcDNA\n";
}

#close input and output files
close(IN);
close(OUT);
exit();


#definition of the function for reverse complement
sub revcomp{
	my($DNAin) = @_;
	my $DNAout = reverse($DNAin);
	$DNAout =~ tr/ACGT/TGCA/;
	return $DNAout;
}

DNAseq.txt

ACGACTAGCATCAGCAT
AAAAATGATCGACTATATAGCATA
AAAGGTGCATCAGCATGG


Exercise11.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Print a list of all Perl programs we did so far.
#These files can be found in your current directory and 
#they contain the word "exercise"

print "List of programs we made today:\n";

#system call for 'ls' function - the result goes into a string
my $listing = `dir`; #these are back quotes

#split the string to get individual files
my @files = split(/\n/,$listing);

#use foreach to step through the array
#if a file contains word "exercise" print it out

foreach my $file (@files){
	if($file =~ /exercise/){
		print "$file\n";
	}
}

Exercise 11 - an alternative

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Print a list of all Perl programs we did so far.
#These files can be found in your current directory and 
#they contain the word "exercise"

print "List of programs we made today:\n";

#system call for 'ls' function - the result goes into a string
open(LISTING, "dir|");

#while we can read a line from the file...
#if a file contains word "exercise" print it out

while (<LISTING>) {
        my $file = $_;
	if($file =~ /exercise/){
		print "$file\n";
	}
}

close(LISTING);

Exercise12.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Write a program that has one function.
#Use a variable named “$some_variable” in this
#function and in the main body of the program.

#Prove that you can alter the value of 
#$some_variable in the function without
#changing the value of $some_variable in the 
#the main body of the program

#declare variables used in the main routine 
my $some_variable;
my $some_variable_changed_by_subroutine;

#main routine
$some_variable = 10;

print "i am in the main routine and some_variable is: $some_variable\n";
$some_variable_changed_by_subroutine = subroutine($some_variable);
print "i am in the main routine some_variable is now: $some_variable\n";
print "i am in the main routine some_variable_changed_by_subroutine is: $some_variable_changed_by_subroutine\n";



#a subroutine that uses a variable with the same name as a variable in the main routine
sub subroutine{
    my $some_variable;
	
	$some_variable = $_[0];
	
	print "i am in the subroutine and some_variable is: $some_variable\n";
	
	++$some_variable;
	
	print "i am in the subroutine and some_variable is now: $some_variable\n";
	
	return $some_variable
}


Exercise13.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Read lines from input file – 
#print lines that match a regular expression

#open input and output files
open(IN,"exercise13input.txt");
open(OUT,">exercise13output.txt");

#read the input file line-by-line
#for each line ask if it matches the regex
#print it in the output file
while(<IN>){
	chomp;
	if ($_ =~ /^ATG?C*[ATCG]+?A{3,10}$/) {
		print OUT "$_\n";
		print "$_\n";
	}
}

#close input and output files
close(IN);
close(OUT);
exit();

exercise13input.txt

This is the regex that we are looking for in the lines below:

 /^ATG?C*[ATCG]+?A{3,10}$/

And this is our input file:

ATGCCCAA
ATGCCCAAAA
ATGCCCAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD

Exercise 13 - an alternative

#!/usr/bin/perl
use strict;
use warnings;

my $line;

#read the input file line-by-line
#for each line ask if it matches the regex
#print it if it matches
while($line = <>){
        chomp $line;
        if ($line =~ /^ATG?C*[ATCG]+?A{3,10}$/) {
                print "$line\n";
        }
}

exit();

Exercise14.plx

#!/usr/bin/perl
use strict;
use warnings;


#Open a web browser
#Go to http://search.cpan.org/
#Type in “bioperl Tools BLAST”
#Follow the link to Bio::Tools::Blast
#Browse through this page and the example code



#bioperl example code
use strict;
use warnings;

#make the bioperl module (class) accessible to your program
use Bio::Seq;

print"ok - ready to use Bio::Seq";

Exercise 15 - install BioPerl

Useful references:

Using ActivePerl's Perl Package Manager:

  1. At the command line prompt type...
    ppm
  2. At the ppm prompt, type...
    search bioperl
  3. Then type...
    install bioperl

Exercise16.plx

#! /usr/local/bin/perl
# Create and run a program which creates a Seq object and manipulates it:

use Bio::Seq;

# initiation of Seq object
$seq = Bio::Seq->new('-seq' =>'CGGCGTCTGGAACTCTATTTTAAGAACCTCTCAAAACGAAACAAGC',
                     '-desc' => 'An example',
                     '-display_id' => 'NM_005476',
                     '-accession_number' => '6382074',
                     '-moltype' => 'dna');  

# sequence manipulations
$aa = $seq -> moltype();                # one of 'dna','rna','protein'
$ab = $seq -> subseq(5,10);             # part of the sequence as string

$ac = $seq -> revcom;                   # returns an object of the reverse complemented sequence
$ac1 = $ac -> seq();

$ad = $seq -> translate;                # returns an object of the sequence translation
$ad1 = $ad -> seq();

$ae = $seq -> translate(undef,undef,1); # returns an object of the sequence translation (using frame 1) (0,1,2 can be used)?
$ae1 = $ae -> seq();

print "Molecule Type: $aa\n";
print "Sequence from 5 to 10: $ab\n";
print "Reverse complemented sequence: $ac1\n";
print "Translated sequence: $ad1\n";
print "Translated sequence (using frame 1): $ae1\n";

Exercise17.plx


Check out the code of several examples
using BioPerl at:


http://bip.weizmann.ac.il/course/prog2/perlBioinfo/


Another answer to exercise 12

#!/usr/bin/perl
use strict;
use warnings;

#TASK: demonstrate the use of “my” in setting the
#scope of a variable 
my $some_variable = 100;

#body of the main program with the function call
print "the value of some_variable is: $some_variable\n";
subroutine1();
print "but here, some_variable is still: $some_variable\n";

#subroutine using $some_variable
sub subroutine1{
	my $some_variable = 0;
	print "in subroutine1,some_variable  is: $some_variable\n";
}


#what happens if you comment out "use strict" and 
#remove "my" from lines 7 and 16


Another answer to exercise 13

#!/usr/bin/perl
use strict;
use warnings;

#TASK: check your answers to the regex exercise

#open input and output files
open(IN,"myanswers.txt");


#read the input file line-by-line
#for each line test if it matches a regular expression
while(<IN>){
	chomp;
	my $is_correct = does_it_match($_);
	if ($is_correct){
		print "$_ is a match\n";
	}
	else{
		print "$_ is NOT a match\n";
	} 
}

#close input file and exit
close(IN);
exit();


#does it match
sub does_it_match{
	my($answer) = @_;
	my $is_correct = 0;
	if ($answer =~ m/^ATG?C*[ATCG]+?A{3,10}$/){
		$is_correct = 1;
	}
	return $is_correct;
}