Perl performance optimization


# Multipling two arrays

# Tested 100000 times, against:
my @x = (4 .. 44);
my @y = (99 .. 139);

# Results:
my @z;
my $i = 0;
foreach (@x) { push(@z, $_ * $y[$i]); ++$i }
# faster (46,082/sec) (2.17 CPU)
my (@z) = map({$x[$_] * $y[$_]} 0 .. $#x);
# slower (35,842/sec) (2.79 CPU)
my @z;
my $i = 0;
while ($i <= $#x) { push(@z, ($x[$i] * $y[$i])); ++$i }
# slower (33,333/sec) (3.00 CPU)



# Read entire file into a string

# Tested 50000 times, against:
my $file = '/etc/pacman.conf'; # 80 lines

# Results:
open(my $fh, '<', $file);
my $string;
sysread($fh, $string, -s $file);
# faster (60,975/sec) (0.82 CPU)
open(my $fh, '<', $file);
my $string = do { local ($/); <$fh> };
# slower (42,372/sec) (1.18 CPU)
open(my $fh, '<', $file);
my $string = join('', <$fh>);
# slower (11,286/sec) (4.43 CPU)



# Adding up all numbers from a string

my $string = 'ea19963c4cc35d517076137fedacd379';
$string =~ tr/0-9//dc;
my $result = 0;
$result += $_ foreach split(//, $string);
# faster (77,371/sec) (5.17 CPU)
my $string = 'ea19963c4cc35d517076137fedacd379';
my $result = 0;
$result += $_ foreach $string =~ /\d/g;

# slower (44,594/sec) (8.97 CPU)
my $string = 'ea19963c4cc35d517076137fedacd379';
my $result = 0;
$result += $1 while $string =~ /(\d)/g;
# slower (35,875/sec) (11.15 CPU)



# Capturing

# Tested 700000 times, against:
my $z = '=>abc -- xyz<=';

# Results:
my ($x, $y) = ($z =~ /(\w+) -- (\w+)/);
# faster (489,510/sec) (1.43 CPU)
$z =~ /(\w+) -- (\w+)/;
my $x = $1;
my $y = $2;
# slower (466,666/sec) (1.50 CPU)



# Sorting and removing duplicates

# Tested 7000 times, against:
my @array = ('a'..'z','A'..'Z',1..26);
push(@array,$array[rand $#array]) for 0..1000;

# Results:
my %seen; @seen{@array} = (); my @uniq = sort keys %seen;
# faster (5,223/sec) (1.34 CPU)
my %seen; my @uniq = sort grep((!$seen{$_}++),@array);
# slower (2,341/sec) (2.99 CPU)



# Removing duplicate items without sorting the list

# Tested 1000 times, against:
my @array = ('a' .. 'z', 'A' .. 'Z');
push(@array, $array[rand $#array]) for 0 .. 1000;

# Results:
# keeps first duplicated item
my @data = @array;
my (@uniq) = do{my %h; grep{!$h{$_}++}@data};
return;
# faster (1,162/sec) (0.86 CPU)
# keeps last duplicated item
my @data = @array;
my @uniq;
while (@data) {
    my $line = shift @data;
    push @uniq, $line unless $line ~~ \@data;
}
return;
# slower (87/sec) (11.38 CPU)



# (if, else) vs. (?, :)

my $x = 0;
my $y = 1;
my $string = $x ? $y ? 'x,y' : 'x' : $y ? 'y' : undef;
# faster (1,764,705/sec) (1.70 CPU)
my $x = 0;
my $y = 1;
my $string;
if ($x) {
    if ($y) { $string = 'x,y'; }
    else { $string = 'x'; }
}
else {
    if ($y) { $string = 'y'; }
    else { $string = undef; }
}
# slower (1,282,051/sec) (2.34 CPU)



# Get the first character of a string

# Tested 10000000 times, against:
my $string = 'This is a string!';

# Results:
my $first_letter = chr ord $string;
# faster (3,636,363/sec) (2.75 CPU)
my $first_letter = substr($string,0,1);
# slower (3,496,503/sec) (2.86 CPU)
my ($first_letter) = $string =~ /^(.)/s;
# slower (848,896/sec) (11.78 CPU)



# Check if string is an element in @array

# Tested 1000000 times, against:
my @array = qw(one two three four five six seven eigth nine ten);

# Results:
1 if 'six' ~~ \@array;
# faster (934,579/sec) (1.07 CPU)
1 if grep(('six' eq $_),@array);
# slower (456,621/sec) (2.19 CPU)



# Make a large integer with thousands separators

# Tested 300000 times, against:
sub get_number {
    int rand(12312183214129);
}

# Results:
my $n = get_number();
length($n) > 3 or return $n;
my $l = length($n) - 3;
my $i = ($l - 1) % 3 + 1;
my $x = substr($n, 0, $i) . ',';
while ($i < $l) {
    $x .= substr($n, $i, 3) . ',';
    $i += 3;
}
$n = $x . substr($n, $i);
# faster (123,966/sec) (2.42 CPU)
my $num = get_number();
$num = reverse $num;
$num =~ s/(\d{3})/$1,/g;
$num = reverse $num;
$num =~ s/^\,//;
# slower (71,599/sec) (4.19 CPU)
my $number = get_number();
$number =~ s/(?<=\d)(?=(?:\d\d\d)+\b)/,/g;
# slower (49,261/sec) (6.09 CPU)



# Removing the first character of a string

my $string = '>This is a string!';
substr $string, 0, 1, '';
# faster (2,049,180/sec) (2.44 CPU)
my $string = '>This is a string!';
$string = unpack "xA*", $string;
# slower (1,111,111/sec) (4.50 CPU)
my $string = '>This is a string!';
my $rstring = reverse($string);
chop $rstring;
$string = reverse($rstring);
# slower (1,057,082/sec) (4.73 CPU)



# Removing the first and last character of a string

# Tested 2000000 times, against:
my $string = "+Just another Perl hacker,\n";

# Results:
my $str = $string;
chop $str;
substr $str, 0, 1, '';
# faster (1,639,344/sec) (1.22 CPU)
my $str = $string;
chop $str;
$str = reverse $str;
chop $str;
$str = reverse $str;
# slower (943,396/sec) (2.12 CPU)
my $str = $string;
$str =~ s/^.//s;
$str =~ s/.\z//s;
# slower (649,350/sec) (3.08 CPU)



# map VS. for

my @punct;
push(@punct, chr($_)) for (33 .. 47, 58 .. 64, 91 .. 96, 123 .. 126);
# faster (52,910/sec) (1.89 CPU)
my (@punct) = map((chr($_)), 33 .. 47, 58 .. 64, 91 .. 96, 123 .. 126);
# slower (42,016/sec) (2.38 CPU)



# End of string anchors

# Tested 4000000 times, against:
my $string = 'This is a string!' x 100;

# Results:
$string =~ /string!$/;
# faster (3,603,603/sec) (1.11 CPU)
$string =~ /string!\z/;
# slower (1,418,439/sec) (2.82 CPU)
$string =~ /string!\Z/;
# slower (1,408,450/sec) (2.84 CPU)



# If string begins with...

# Tested 4000000 times, against:
my $string = 'This is a string!';

# Results:
1 unless (index($string, 'This'));
# faster (4,255,319/sec) (0.94 CPU)
1 if (substr($string, 0, 4) eq 'This');
# slower (3,703,703/sec) (1.08 CPU)
1 if ($string =~ /^This/);
# slower (1,659,751/sec) (2.41 CPU)



# Globing some type of files

# Tested 10000 times, against:
my $dir = '/usr/share/applications';

# Results:
my @files;
opendir(my $dir_h, $dir);
/\.desktop$/ and push @files, "$dir/$_" while readdir $dir_h;
# faster (4,629/sec) (2.16 CPU)
opendir(my $dir_h, $dir);
my @files = grep((/\.desktop$/ and $_ = "$dir/$_"), readdir($dir_h));
# slower (4,566/sec) (2.19 CPU)
my @files = glob("$dir/*.desktop");
# slower (1,872/sec) (5.34 CPU)



# Split a string by one character

# Tested 3000 times, against:
my $string = "ABD\$Okd<-s9\n23|!/_+#23/2\@3" x 100;

# Results:
my @x = split(//, $string);
# faster (937/sec) (3.20 CPU)
my @x = unpack('(a1)*', $string);
# slower (763/sec) (3.93 CPU)
my $i = -1;
my @y;
while (1) { push(@y, substr($string, $i += 1, 1) || last) }
# slower (549/sec) (5.46 CPU)



# Split a string by $n characters

# Tested 10000 times, against:
my $string = join('', "x\ny", 'A'..'Z', 'a'..'z', 1 .. 1000);
my $n = int (rand 10) + 1;

# Results:
my @array = unpack("(a$n)*", $string);
# faster (2,109/sec) (4.74 CPU)
my @array;
my $i = -$n;
my $l = length($string);
while (1) { last if ($i + $n) > $l; push(@array, substr($string, $i += $n, $n)) }
# slower (1,200/sec) (8.33 CPU)
my @array = $string =~ /.{$n}/sg;
# slower (1,058/sec) (9.45 CPU)



# Strip non-alphanumeric chars

# Tested 1000000 times, against:
my $string = 'U)n@~r)@=/,.3-?{]a[|})d\\a~b|^l~"e';

# Results:
my $str = $string;
$str =~ tr/A-Za-z0-9//dc;
# faster (1,587,301/sec) (0.63 CPU)
my $str = $string;
$str =~ s/\W+//g;
# slower (173,611/sec) (5.76 CPU)
my $str = $string;
$str =~ s/[[:^alnum:]]+//g;
# slower (163,132/sec) (6.13 CPU)



# Date formating

# Tested 1000000 times, against:
my $date = '2011-11-26';

# Results:
my $new_date = join('.', ((split(/-/, $date, 3))[2, 1, 0]));
# faster (606,060/sec) (1.65 CPU)
my $new_date;
foreach my $d ($date =~ /^(\d\d\d\d)-(\d\d)-(\d\d)$/) {
    $new_date = $d . do { ".$new_date" if defined $new_date };
}
# slower (246,305/sec) (4.06 CPU)
my $new_date = $date;
$new_date =~ s/^(\d\d\d\d)-(\d\d)-(\d\d)$/$3.$2.$1/;
# slower (229,357/sec) (4.36 CPU)



# Adding up all numbers

# Tested 40000 times, against:
my @array = 0..1000;

# Results:
my $sum = 0;
$sum += $_ foreach(@array);
# faster (5,398/sec) (7.41 CPU)
my $sum = 0;
map(($sum += $_),@array);
# slower (5,063/sec) (7.90 CPU)
my $sum = 0;
grep(($sum += $_),@array);
# slower (4,895/sec) (8.17 CPU)



# Stripping Trailing Whitespace

# Tested 22255 times, against:
my $string = 'A string with 10 spaces at the end. ' x 100;

# Results:
my $str = $string;
$str = unpack('A*', $str);
return;
# faster (278,187/sec) (0.08 CPU)
my $str = $string;
chop $str while substr($str, -1) =~ /\s/;
return;
# slower (71,790/sec) (0.31 CPU)
my $str = $string;
$str =~ s/\s+\z//;
return;
# slower (3,206/sec) (6.94 CPU)



# Stripping Leading and Trailing Whitespace

# Tested 122222 times, against:
my $string = q{ }. q{a v sv s / df s d-g } x 200 . q{ };

# Results:
my $str = $string;
$str = unpack('A*', $str);
$str =~ s/^\s+//;
return;
# faster (179,738/sec) (0.68 CPU)
# From Mastering Regular Expressions
my $str = $string;
$str =~ s/^\s+((?:.+\S)?)\s+$/$1/s;
return;
# slower (83,713/sec) (1.46 CPU)
my $str = $string;
$str = reverse unpack('A*', reverse unpack('A*', $str));
return;
# slower (50,504/sec) (2.42 CPU)



# Replacing two or more spaces with a single space

# Tested 1000000 times, against:
my $string = '  this   is a   string with    spaces to     remove   ';

# Results:
my $str = $string;
$str = join(' ', split(' ', $str));
# faster (261,780/sec) (3.82 CPU)
my $str = $string;
$str =~ s/  +/ /g;
# slower (186,915/sec) (5.35 CPU)
my $str = $string;
$str =~ s/ +/ /g;
# slower (150,829/sec) (6.63 CPU)



# Getting the exact match of a string

# Tested 2000000 times, against:
my $string = 'Just another Perl hacker,';
my $match = 'peRL';

# Results:
${^MATCH} if $string =~ /$match/i;
# faster (1,020,408/sec) (1.96 CPU)
$& if $string =~ /$match/i;
# slower (985,221/sec) (2.03 CPU)
$1 if $string =~ /($match)/i;
# slower (677,966/sec) (2.95 CPU)



# Capturing delimited text

# Tested 222552 times, against:
my $string = 'capture =THIS= from string!' x 100;

# Results:
my $i = index($string, '=');
my $cap = substr($string, $i+1, index($string, '=', $i+1)-$i-1);
return;
# faster (585,663/sec) (0.38 CPU)
my ($cap) = $string =~ /=(.*?)=/;
return;
# slower (289,028/sec) (0.77 CPU)

*** the above codes has been tested with Benchmark module.
*** the script which I used to test the above codes, is this.