#!/usr/bin/perl ################################################ # rep_pos_hash.pl # Version 1 # Robert D. Cormia # UCSC Programming for Bioinformatics III # November 7, 2003 # Substantial help from Neil Cormia and Son Nguyen # on this exercise, mostly in creating reports 1 & 2. # Welcome to Intro. to Prog. for Bioinformatics III # Class 3, Exercise 1 # 1. In this exercise we will work with the concept of counting aminoacids and # noting down the respective positions. Write a Perl program called 'rep_pos' # to count the aminoacids in a data file and note the positions as well. The # data file looks like this: # DFDKABLEGXKLRQDXVVMHMGQBA # XCGGAYABSMKERDXGLQYASHDELHFFEET # DKLZAZANXMKTIIDSVLGQTYTYQMETHERXXG # # Generate reports in the following fashion: # # Report I: # # Aminoacids: A # Pos 5: => 3 # Pos 7: => 1 # Pos 20: => 1 # Pos 25: => 1 # Aminoacids: B # Pos 6: => 1 # Pos 8: => 1 # Pos 24: => 1 # ... # #Report II: # # At position: 1 # D: => 2 # P: => 1 # X: => 1 # At position: 2 # C: => 1 # F: => 1 # K: => 1 # At position: 3 # D: => 1 # G: => 1 # L: => 1 ####################################################### # first read in sequences into a temp array # create two hashes by reading in arrary reference # create two print reports acting on the hashes # first print report sorts hash by letter # second print report sorts hash by number # liberal use of print blank lines to seperate values ####################################################### # Declare all variables use strict; my $line; my $count_aa = 'count_aa.txt'; my $aa_output = 'aa_output.txt'; my @aa_array; my %hash_report_1; my %hash_report_2; my $i; # Open input file to read in sequences of amino acids open( INPUT_FILE, $count_aa ) or die ("open() failed for 'fname'\nReason: $!\n\n"); # Open output file to read out the two reports open( OUTPUT_FILE, ">$aa_output" ) or die ("open() failed for '$aa_output'\nReason: $!\n\n"); # Print sequences as read in print "Sequences: \n\n"; print OUTPUT_FILE "Sequences: \n\n"; while ( $line = ) { next if ( $line =~ /^$/ ); # skip if blank line chomp($line); # remove carriage return print "$line \n"; # space before reports print OUTPUT_FILE "$line \n"; @aa_array = split ( '', $line ); # read into temp array my $array_len = @aa_array; # determine array length # i is the position within the string for ( $i = 0 ; $i < $array_len ; $i++ ) { # do report 1 and 2 in a for each loop # pass arrary references into each hash $hash_report_1{ $aa_array[$i] }{$i} += 1; $hash_report_2{$i}{ $aa_array[$i] } += 1; } } # End while # Call print_report_1 to create report 1 &print_report_1( \%hash_report_1 ); # Call print_report_2 to create report 2 &print_report_2( \%hash_report_2 ); # # Print reports using subroutines written by Son Nguyen # ############## Subroutine Report 1 ############## sub print_report_1 { my %hash = %{ $_[0] }; my $amino_acid; my $pos; # Print blank line and Report I header print "\n"; print OUTPUT_FILE "\n"; print "Report I:\n\n"; print OUTPUT_FILE "Report I:\n\n"; foreach $amino_acid ( sort keys %hash ) { print "\n"; print OUTPUT_FILE "\n"; print "Amino Acid: $amino_acid\n"; print OUTPUT_FILE "Amino Acid: $amino_acid\n"; # Foreach loop to sort keys by amino acid by letter # sort { $a <=> $b } $a and $b have values -1, 0, +1 # creates a sorted list by letter in amino acid array foreach $pos ( sort { $a <=> $b } keys %{ $hash{$amino_acid} } ) { #start counting position from 1 my $pos_count = $pos + 1; print "Pos $pos_count :=> $hash{$amino_acid}{$pos}\n"; print OUTPUT_FILE "Pos $pos_count :=> $hash{$amino_acid}{$pos}\n"; } } } ############## Subroutine Report 2 ############## sub print_report_2 { my %hash = %{ $_[0] }; my $amino_acid; my $pos; # Print blank line and Report II header print "\n"; print OUTPUT_FILE "\n"; print "Report II:\n\n"; print OUTPUT_FILE "Report II:\n\n"; # Foreach loop to sort keys by amino acid by number # sort { $a <=> $b } $a and $b have values -1, 0, +1 # creates a sorted list by position in amino acid array foreach $pos ( sort { $a <=> $b } keys %hash ) { print "\n"; print OUTPUT_FILE "\n"; my $pos_count = $pos + 1; print "At position: $pos_count\n"; print OUTPUT_FILE "At position: $pos_count\n"; # sort { $a <=> $b } $a and $b have values -1, 0, +1 # sort base on alpha numerical values foreach $amino_acid ( keys %{ $hash{$pos} } ) { #start counting position from 1 print "$amino_acid :=> $hash{$pos}{$amino_acid}\n"; print OUTPUT_FILE "$amino_acid :=> $hash{$pos}{$amino_acid}\n"; } } } __END__