Difference between revisions of "Perl example code"

From irefindex
(Fixed descriptions of exercise 11.)
 
(6 intermediate revisions by 2 users not shown)
Line 1: Line 1:
 
These are code examples that accompany the [[MBV3070]] Perl lectures.
 
These are code examples that accompany the [[MBV3070]] Perl lectures.
  
Copy and paste the text to a text file called Excercise1.plx
+
Copy and paste the text to a text file called Exercise1.plx
 
And then, in the command prompt, type
 
And then, in the command prompt, type
  
 
<pre>
 
<pre>
perl Excercise1.plx
+
perl Exercise1.plx
 
</pre>
 
</pre>
  
Line 12: Line 12:
  
  
Excercise1.plx
+
Exercise1.plx
  
 
<pre>
 
<pre>
Line 24: Line 24:
 
</pre>
 
</pre>
  
Excercise2.plx
+
Exercise2.plx
  
 
<pre>
 
<pre>
Line 46: Line 46:
 
</pre>
 
</pre>
  
Excercise3.plx
+
Exercise3.plx
  
 
<pre>
 
<pre>
Line 74: Line 74:
 
</pre>
 
</pre>
  
Excercise4.plx
+
Exercise4.plx
  
 
<pre>
 
<pre>
Line 99: Line 99:
 
</pre>
 
</pre>
  
Excercise5.plx
+
Exercise5.plx
  
 
<pre>
 
<pre>
Line 119: Line 119:
 
</pre>
 
</pre>
  
Excercise6.plx
+
Exercise6.plx
  
 
<pre>
 
<pre>
Line 154: Line 154:
  
  
Excercise7.plx
+
Exercise7.plx
  
 
<pre>
 
<pre>
Line 172: Line 172:
 
</pre>
 
</pre>
  
Excercise8.plx
+
Exercise8.plx
  
 
<pre>
 
<pre>
Line 218: Line 218:
 
</pre>
 
</pre>
  
Excercise9.plx
+
Exercise9.plx
  
 
<pre>
 
<pre>
Line 246: Line 246:
  
  
Excercise10.plx
+
Exercise10.plx
  
 
<pre>
 
<pre>
Line 295: Line 295:
  
  
Excercise11.plx
+
Exercise11.plx
  
 
<pre>
 
<pre>
Line 304: Line 304:
 
#TASK: Print a list of all Perl programs we did so far.
 
#TASK: Print a list of all Perl programs we did so far.
 
#These files can be found in your current directory and  
 
#These files can be found in your current directory and  
#they start with the word ‘program’
+
#they contain the word "exercise"
  
 
print "List of programs we made today:\n";
 
print "List of programs we made today:\n";
Line 315: Line 315:
  
 
#use foreach to step through the array
 
#use foreach to step through the array
#if a file contains word 'program' print it out
+
#if a file contains word "exercise" print it out
  
 
foreach my $file (@files){
 
foreach my $file (@files){
if($file =~ /excercise/){
+
if($file =~ /exercise/){
 
print "$file\n";
 
print "$file\n";
 
}
 
}
Line 324: Line 324:
 
</pre>
 
</pre>
  
 +
Exercise 11 - an alternative
  
 +
<pre>
 +
#!/usr/bin/perl
 +
use strict;
 +
use warnings;
 +
 +
#TASK: Print a list of all Perl programs we did so far.
 +
#These files can be found in your current directory and
 +
#they contain the word "exercise"
 +
 +
print "List of programs we made today:\n";
 +
 +
#system call for 'ls' function - the result goes into a string
 +
open(LISTING, "dir|");
 +
 +
#while we can read a line from the file...
 +
#if a file contains word "exercise" print it out
 +
 +
while (<LISTING>) {
 +
        my $file = $_;
 +
if($file =~ /exercise/){
 +
print "$file\n";
 +
}
 +
}
  
 +
close(LISTING);
 +
</pre>
  
Excercise12.plx
+
Exercise12.plx
  
 
<pre>
 
<pre>
Line 374: Line 400:
  
  
Excercise13.plx
+
Exercise13.plx
  
 
<pre>
 
<pre>
Line 385: Line 411:
  
 
#open input and output files
 
#open input and output files
open(IN,"excercise13input.txt");
+
open(IN,"exercise13input.txt");
open(OUT,">excercise13output.txt");
+
open(OUT,">exercise13output.txt");
  
 
#read the input file line-by-line
 
#read the input file line-by-line
Line 405: Line 431:
 
</pre>
 
</pre>
  
excercise13input.txt
+
exercise13input.txt
<pre>
+
 
#this is the regex that we are looking for in the lines below
+
This is the regex that we are looking for in the lines below:
#/^ATG?C*[ATCG]+?A{3,10}$/
 
  
 +
  /^ATG?C*[ATCG]+?A{3,10}$/
  
 +
And this is our input file:
  
 +
<pre>
 
ATGCCCAA
 
ATGCCCAA
 
ATGCCCAAAA
 
ATGCCCAAAA
Line 419: Line 447:
 
</pre>
 
</pre>
  
 +
Exercise 13 - an alternative
 +
 +
<pre>
 +
#!/usr/bin/perl
 +
use strict;
 +
use warnings;
 +
 +
my $line;
 +
 +
#read the input file line-by-line
 +
#for each line ask if it matches the regex
 +
#print it if it matches
 +
while($line = <>){
 +
        chomp $line;
 +
        if ($line =~ /^ATG?C*[ATCG]+?A{3,10}$/) {
 +
                print "$line\n";
 +
        }
 +
}
 +
 +
exit();
 +
</pre>
 +
 +
Exercise14.plx
 +
 +
<pre>
 +
#!/usr/bin/perl
 +
use strict;
 +
use warnings;
 +
 +
 +
#Open a web browser
 +
#Go to http://search.cpan.org/
 +
#Type in “bioperl Tools BLAST”
 +
#Follow the link to Bio::Tools::Blast
 +
#Browse through this page and the example code
 +
 +
 +
 +
#bioperl example code
 +
use strict;
 +
use warnings;
 +
 +
#make the bioperl module (class) accessible to your program
 +
use Bio::Seq;
 +
 +
print"ok - ready to use Bio::Seq";
 +
</pre>
 +
 +
Exercise 15 - install BioPerl
 +
 +
Useful references:
 +
 +
* http://www.bioperl.org/wiki/Installing_BioPerl
 +
* http://www.bioperl.org/wiki/Installing_Bioperl_for_Unix#INSTALLING_BIOPERL_THE_EASY_WAY_USING_CPAN
 +
 +
Using ActivePerl's Perl Package Manager:
 +
 +
<ol>
 +
<li>At the command line prompt type...
 +
<pre>ppm</pre></li>
 +
<li>At the <tt>ppm</tt> prompt, type...
 +
<pre>search bioperl</pre></li>
 +
<li>Then type...
 +
<pre>install bioperl</pre></li>
 +
</ol>
 +
 +
Exercise16.plx
  
 
<pre>
 
<pre>
 +
#! /usr/local/bin/perl
 +
# Create and run a program which creates a Seq object and manipulates it:
 +
 +
use Bio::Seq;
 +
 +
# initiation of Seq object
 +
$seq = Bio::Seq->new('-seq' =>'CGGCGTCTGGAACTCTATTTTAAGAACCTCTCAAAACGAAACAAGC',
 +
                    '-desc' => 'An example',
 +
                    '-display_id' => 'NM_005476',
 +
                    '-accession_number' => '6382074',
 +
                    '-moltype' => 'dna'); 
 +
 +
# sequence manipulations
 +
$aa = $seq -> moltype();                # one of 'dna','rna','protein'
 +
$ab = $seq -> subseq(5,10);            # part of the sequence as string
 +
 +
$ac = $seq -> revcom;                  # returns an object of the reverse complemented sequence
 +
$ac1 = $ac -> seq();
 +
 +
$ad = $seq -> translate;                # returns an object of the sequence translation
 +
$ad1 = $ad -> seq();
 +
 +
$ae = $seq -> translate(undef,undef,1); # returns an object of the sequence translation (using frame 1) (0,1,2 can be used)?
 +
$ae1 = $ae -> seq();
 +
 +
print "Molecule Type: $aa\n";
 +
print "Sequence from 5 to 10: $ab\n";
 +
print "Reverse complemented sequence: $ac1\n";
 +
print "Translated sequence: $ad1\n";
 +
print "Translated sequence (using frame 1): $ae1\n";
 +
</pre>
 +
 +
Exercise17.plx
 +
 +
<pre>
 +
 +
Check out the code of several examples
 +
using BioPerl at:
 +
 +
 +
http://bip.weizmann.ac.il/course/prog2/perlBioinfo/
 +
 +
 +
</pre>
 +
 +
Another answer to exercise 12
 +
 +
<pre>
 +
#!/usr/bin/perl
 +
use strict;
 +
use warnings;
 +
 +
#TASK: demonstrate the use of “my” in setting the
 +
#scope of a variable
 +
my $some_variable = 100;
 +
 +
#body of the main program with the function call
 +
print "the value of some_variable is: $some_variable\n";
 +
subroutine1();
 +
print "but here, some_variable is still: $some_variable\n";
 +
 +
#subroutine using $some_variable
 +
sub subroutine1{
 +
my $some_variable = 0;
 +
print "in subroutine1,some_variable  is: $some_variable\n";
 +
}
 +
 +
 +
#what happens if you comment out "use strict" and
 +
#remove "my" from lines 7 and 16
 +
</pre>
 +
 +
 +
Another answer to exercise 13
 +
 +
<pre>
 +
#!/usr/bin/perl
 +
use strict;
 +
use warnings;
 +
 +
#TASK: check your answers to the regex exercise
 +
 +
#open input and output files
 +
open(IN,"myanswers.txt");
 +
 +
 +
#read the input file line-by-line
 +
#for each line test if it matches a regular expression
 +
while(<IN>){
 +
chomp;
 +
my $is_correct = does_it_match($_);
 +
if ($is_correct){
 +
print "$_ is a match\n";
 +
}
 +
else{
 +
print "$_ is NOT a match\n";
 +
}
 +
}
 +
 +
#close input file and exit
 +
close(IN);
 +
exit();
 +
 +
 +
#does it match
 +
sub does_it_match{
 +
my($answer) = @_;
 +
my $is_correct = 0;
 +
if ($answer =~ m/^ATG?C*[ATCG]+?A{3,10}$/){
 +
$is_correct = 1;
 +
}
 +
return $is_correct;
 +
}
 +
 
</pre>
 
</pre>

Latest revision as of 15:22, 22 November 2011

These are code examples that accompany the MBV3070 Perl lectures.

Copy and paste the text to a text file called Exercise1.plx And then, in the command prompt, type

perl Exercise1.plx

to run the example script.


Exercise1.plx

#!/usr/bin/perl
use strict;
use warnings;

print "My first Perl program\n";
#try single quotes
print "First line\nsecond line and there is a tab\there\n";

Exercise2.plx

#!/usr/bin/perl
use strict;
use warnings;

#assign values to variables $x and $y and print them out
my $x = 4;
my $y = 2;
print "x is $x and y is $y\n";

#example of arithmetic expression
my $z = $x + $y**3;
$x++;
print "x is $x and z is $z\n";

#evaluating arithmetic expression within print command
print "add 3 to $z: $z + 3\n"; #did it work?
print "add 3 to $z:", $z + 3,"\n";

Exercise3.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Concatenate two given sequences, 
#find the length of the new sequence and
#print out the second codon of the sequence

#assign strings to variables
my $DNA = "GATTACACAT";
my $polyA = "AAAA";

#concatenate two strings
my $modifiedDNA = $DNA.$polyA;

#calculate the length of $modifiedDNA and
#print out the value of the variable and its length
my $DNAlength = length($modifiedDNA);
print "Modified DNA: $modifiedDNA has length $DNAlength\n";

#extract the second codon in $modifiedDNA
my $codon = substr($modifiedDNA,3,3);
print "Second codon is $codon\n";

Exercise4.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Ask the user for her name and age
#and calculate her age in days
#get a string from the keyboard
print "Please enter your name\n";
my $name = <STDIN>;
#getting rid of the new line character
#try leaving this line out
chomp($name); 
#prompt the user for his/her age
#get a number from the keyboard
print "$name please enter your age\n";
my $age = <>;
chomp($age);
#calculate age in days
my $age_in_days = $age*365;
print "You are $age_in_days days old\n";

Exercise5.plx

#!/usr/bin/perl
use strict;
use warnings;

#initialize an array
my @bases = ("A","C","G","T");

#print two elements of the array
print $bases[0],$bases[2],"\n";

#print the whole array
print @bases,"\n"; #try with double quotes

#print the number of elements in the array
print scalar(@bases),"\n";

Exercise6.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Count the frequency of base G
#in a given DNA sequence

my $DNA = "GATTACACAT";

#initialize $countG and $currentPos
my $countG = 0;
my $currentPos = 0;

#calculate the length of $DNA
my $DNAlength = length($DNA);

#for each letter in the sequence check if it is the base G
#if 'yes' increment $countG
while($currentPos < $DNAlength){
	my $base = substr($DNA,$currentPos,1);
	if($base eq "G"){
		$countG++;
	}
	$currentPos++;
} #end of while loop

#print out the number of Gs
print "There are $countG G bases\n";


Exercise7.plx

#!/usr/bin/perl
use strict;
use warnings;
my @array;
#initialize a 20-element array with numbers 0,...19
for(my $i=0;$i<20;$i++){
	$array[$i] = $i;
}

#print elements one-by-one using foreach
foreach my $element (@array){
	print "$element\n";
}

Exercise8.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: For a given DNA sequence find its RNA transcript,
#find its reverse complement and check if
#the reverse complement contains a start codon

my $DNA = "GATTACACAT";

#transcribe DNA to RNA - T changes to U
my $RNA = $DNA;
$RNA =~ s/T/U/g;
print "RNA sequence is $RNA\n";

#find the reverse complement of $DNA using substitution operator
#first - reverse the sequence
my $rcDNA = reverse($DNA);

$rcDNA =~ s/T/A/g;
$rcDNA =~ s/A/T/g;
$rcDNA =~ s/G/C/g;
$rcDNA =~ s/C/G/g;

print "Reverse complement of $DNA is $rcDNA\n"; #did it work?

#find the reverse complement of $DNA using translation operator
#first - reverse the sequence
$rcDNA = reverse($DNA);
#then - complement the sequence
$rcDNA =~ tr/ACGT/TGCA/;
#then - print the reverse complement
print "Reverse complement of $DNA is $rcDNA\n";

#look for a start codon in the reverse sequence
if($rcDNA =~ /ATG/){
	print "Start codon found\n";
}
else{
	print "Start codon not found\n";
}

Exercise9.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Make a subroutine that calculates
#the reverse
#complement of a DNA sequence and call it
#from the main program 

#body of the main program with the function call
my $DNA = "GATTACACAT";
my $rcDNA = revcomp($DNA); 
print "$rcDNA\n";
exit;
#definition of the function for reverse complement
sub revcomp{
	my($DNAin) = @_;
	my $DNAout  = reverse($DNAin);
	$DNAout =~ tr/ACGT/TGCA/;
	return $DNAout;
}


Exercise10.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Read DNA sequences from ‘DNAseq’ input file – 
#there is one sequence per line
#For each sequence find the reverse complement and 
#print it to ‘DNAseqRC’ output file

#open input and output files
open(IN,"DNAseq.txt");
open(OUT,">DNAseqRC.txt");

#read the input file line-by-line
#for each line find the reverse complement
#print it in the output file
while(<IN>){
	chomp;
	my $rcDNA = revcomp($_);
	print OUT "$rcDNA\n";
}

#close input and output files
close(IN);
close(OUT);
exit();


#definition of the function for reverse complement
sub revcomp{
	my($DNAin) = @_;
	my $DNAout = reverse($DNAin);
	$DNAout =~ tr/ACGT/TGCA/;
	return $DNAout;
}

DNAseq.txt

ACGACTAGCATCAGCAT
AAAAATGATCGACTATATAGCATA
AAAGGTGCATCAGCATGG


Exercise11.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Print a list of all Perl programs we did so far.
#These files can be found in your current directory and 
#they contain the word "exercise"

print "List of programs we made today:\n";

#system call for 'ls' function - the result goes into a string
my $listing = `dir`; #these are back quotes

#split the string to get individual files
my @files = split(/\n/,$listing);

#use foreach to step through the array
#if a file contains word "exercise" print it out

foreach my $file (@files){
	if($file =~ /exercise/){
		print "$file\n";
	}
}

Exercise 11 - an alternative

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Print a list of all Perl programs we did so far.
#These files can be found in your current directory and 
#they contain the word "exercise"

print "List of programs we made today:\n";

#system call for 'ls' function - the result goes into a string
open(LISTING, "dir|");

#while we can read a line from the file...
#if a file contains word "exercise" print it out

while (<LISTING>) {
        my $file = $_;
	if($file =~ /exercise/){
		print "$file\n";
	}
}

close(LISTING);

Exercise12.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Write a program that has one function.
#Use a variable named “$some_variable” in this
#function and in the main body of the program.

#Prove that you can alter the value of 
#$some_variable in the function without
#changing the value of $some_variable in the 
#the main body of the program

#declare variables used in the main routine 
my $some_variable;
my $some_variable_changed_by_subroutine;

#main routine
$some_variable = 10;

print "i am in the main routine and some_variable is: $some_variable\n";
$some_variable_changed_by_subroutine = subroutine($some_variable);
print "i am in the main routine some_variable is now: $some_variable\n";
print "i am in the main routine some_variable_changed_by_subroutine is: $some_variable_changed_by_subroutine\n";



#a subroutine that uses a variable with the same name as a variable in the main routine
sub subroutine{
    my $some_variable;
	
	$some_variable = $_[0];
	
	print "i am in the subroutine and some_variable is: $some_variable\n";
	
	++$some_variable;
	
	print "i am in the subroutine and some_variable is now: $some_variable\n";
	
	return $some_variable
}


Exercise13.plx

#!/usr/bin/perl
use strict;
use warnings;

#TASK: Read lines from input file – 
#print lines that match a regular expression

#open input and output files
open(IN,"exercise13input.txt");
open(OUT,">exercise13output.txt");

#read the input file line-by-line
#for each line ask if it matches the regex
#print it in the output file
while(<IN>){
	chomp;
	if ($_ =~ /^ATG?C*[ATCG]+?A{3,10}$/) {
		print OUT "$_\n";
		print "$_\n";
	}
}

#close input and output files
close(IN);
close(OUT);
exit();

exercise13input.txt

This is the regex that we are looking for in the lines below:

 /^ATG?C*[ATCG]+?A{3,10}$/

And this is our input file:

ATGCCCAA
ATGCCCAAAA
ATGCCCAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD

Exercise 13 - an alternative

#!/usr/bin/perl
use strict;
use warnings;

my $line;

#read the input file line-by-line
#for each line ask if it matches the regex
#print it if it matches
while($line = <>){
        chomp $line;
        if ($line =~ /^ATG?C*[ATCG]+?A{3,10}$/) {
                print "$line\n";
        }
}

exit();

Exercise14.plx

#!/usr/bin/perl
use strict;
use warnings;


#Open a web browser
#Go to http://search.cpan.org/
#Type in “bioperl Tools BLAST”
#Follow the link to Bio::Tools::Blast
#Browse through this page and the example code



#bioperl example code
use strict;
use warnings;

#make the bioperl module (class) accessible to your program
use Bio::Seq;

print"ok - ready to use Bio::Seq";

Exercise 15 - install BioPerl

Useful references:

Using ActivePerl's Perl Package Manager:

  1. At the command line prompt type...
    ppm
  2. At the ppm prompt, type...
    search bioperl
  3. Then type...
    install bioperl

Exercise16.plx

#! /usr/local/bin/perl
# Create and run a program which creates a Seq object and manipulates it:

use Bio::Seq;

# initiation of Seq object
$seq = Bio::Seq->new('-seq' =>'CGGCGTCTGGAACTCTATTTTAAGAACCTCTCAAAACGAAACAAGC',
                     '-desc' => 'An example',
                     '-display_id' => 'NM_005476',
                     '-accession_number' => '6382074',
                     '-moltype' => 'dna');  

# sequence manipulations
$aa = $seq -> moltype();                # one of 'dna','rna','protein'
$ab = $seq -> subseq(5,10);             # part of the sequence as string

$ac = $seq -> revcom;                   # returns an object of the reverse complemented sequence
$ac1 = $ac -> seq();

$ad = $seq -> translate;                # returns an object of the sequence translation
$ad1 = $ad -> seq();

$ae = $seq -> translate(undef,undef,1); # returns an object of the sequence translation (using frame 1) (0,1,2 can be used)?
$ae1 = $ae -> seq();

print "Molecule Type: $aa\n";
print "Sequence from 5 to 10: $ab\n";
print "Reverse complemented sequence: $ac1\n";
print "Translated sequence: $ad1\n";
print "Translated sequence (using frame 1): $ae1\n";

Exercise17.plx


Check out the code of several examples
using BioPerl at:


http://bip.weizmann.ac.il/course/prog2/perlBioinfo/


Another answer to exercise 12

#!/usr/bin/perl
use strict;
use warnings;

#TASK: demonstrate the use of “my” in setting the
#scope of a variable 
my $some_variable = 100;

#body of the main program with the function call
print "the value of some_variable is: $some_variable\n";
subroutine1();
print "but here, some_variable is still: $some_variable\n";

#subroutine using $some_variable
sub subroutine1{
	my $some_variable = 0;
	print "in subroutine1,some_variable  is: $some_variable\n";
}


#what happens if you comment out "use strict" and 
#remove "my" from lines 7 and 16


Another answer to exercise 13

#!/usr/bin/perl
use strict;
use warnings;

#TASK: check your answers to the regex exercise

#open input and output files
open(IN,"myanswers.txt");


#read the input file line-by-line
#for each line test if it matches a regular expression
while(<IN>){
	chomp;
	my $is_correct = does_it_match($_);
	if ($is_correct){
		print "$_ is a match\n";
	}
	else{
		print "$_ is NOT a match\n";
	} 
}

#close input file and exit
close(IN);
exit();


#does it match
sub does_it_match{
	my($answer) = @_;
	my $is_correct = 0;
	if ($answer =~ m/^ATG?C*[ATCG]+?A{3,10}$/){
		$is_correct = 1;
	}
	return $is_correct;
}