#!/usr/bin/perl # Module to OCR Facebook e-mail imaging. # # Usage: ocr2.pl use Shell qw(gocr); use String::Approx 'amatch'; use strict; if($ARGV[0] "" || $ARGV[1] == "") { print "Usage: ocr2.pl \n"; die(0); } my $dirname = @ARGV[0]; my $file; my @text; my @matched; my @hardmatched; my $i = 0; my $j = 0; my $tmp; my @tmp; my $domain = @ARGV[1]; opendir(DIR, $dirname) or die "can't opendir $dirname: $!"; while ($file = readdir(DIR)) { @text[$i] = lc(gocr(' -i ' . $dirname . $file . ' -m 4 -m 2 -a 98 -p /Users/joe/Documents/SecWork/tmpdb/ -C "1234567890abcdefghijklmnopqvwxyz@._-"')); $i++; } closedir(DIR); foreach(@text) { print "Found: $_\n"; $tmp = substr($_, rindex($_, "@") + 1); chomp($tmp); if($tmp eq $domain) { @hardmatched[$j] = $_; $j++; } } print "Hard Matches:\n"; print @hardmatched; $j = 0; foreach (@text) { @tmp = substr($_, rindex($_, "@") + 1); if(amatch($domain, @tmp)) { @matched[$j] = $_; $j++; } } print "Fuzzy Matches:\n"; print @matched;