#!/usr/bin/perl ################################################ # extract_matches.pl # Version 1 # Robert D. Cormia # UCSC Programming for Bioinformatics II # October 10, 2003 ################################################ # Problem 3-1 extract_matches # Write a Perl program called 'extract_matches' to do the following: # a. Read a file called 'strings_data' line by line # b. In each line, look for the presence of the pattern in the # following string and fetch the offsets where the pattern appears # in the string # c. Create a hash called 'match_offsets' where the key is colon separated # pattern and string and the value is colon separated offsets! # d. Print the key-value pairs of this hash into a file called 'matches' # use strict; my $pat; my $str; my $pos; my $line; my @lines; my $fname = 'strings_data.txt'; my @file_contents; my $motifs; my @seqs; my @array_of_offsets; my $dir; my $date; my %match_offsets; my $k; my $v; # Pseudocode # Open file # Read each line # Extract from each line the pattern (first part) and string (second part) # Use the 'matching code' from Motifs and store_matches program # to extract offsets into a hash # Print the key-value pairs from hash into file called matches # Open file strings_data.txt # open file, if open fails call die # die will make the program exit after displaying the message # $! is a special variable that contains the error # message from the system open( INPUT, "strings_data.txt" ) or die "Cannot open file: $!"; @lines = ; close INPUT; # Process lines code block # read the file one line at a time. <> is called as # the diamond operator it reads from the file one # line at a time in a scalar context when the file's # end is reached, <> returns an EOF which is # considered as a false value, so the while loop ends # Block for determining position of offsets foreach (@lines) { # test for possible blank line and skip if true chomp $_; if ( $_ eq "" ) { next; } ( $pat, $str ) = split ( ' ', $_ ); $pos = 0; @array_of_offsets = (); while ( $pos != -1 ) { $pos = index( $str, $pat, $pos ); if ( $pos != -1 ) { push ( @array_of_offsets, $pos ); $match_offsets{"$pat:$str"} = join(':', @array_of_offsets); # Start the next index search just past the last found pattern. $pos++; } } } # ----------------------------------------------------------------- # print it as: # $key => $value # create an output file. # open match.txt # ----------------------------------------------------------------- open( OUTPUT, ">matches.txt" ) or die "Cannot open file: $!"; while ( ($k, $v) = each %match_offsets ) { print "$k => $v\n"; print OUTPUT "$k => $v"; } print "\n"; close(OUTPUT); __END__