Laziness — The quality that makes you go to great effort to reduce overall energy expenditure. Write a program others can use so you don't have to answer their questions. Write functions so you don't repeat yourself.
Impatience — The anger you feel when the computer is being lazy. This makes you write programs that anticipate your needs, not just react to them.
Hubris — Excessive pride, the kind that makes you write (and maintain) programs that other people won't want to say bad things about.
# Perl = sed + awk + sh + C + "kitchen sink" # From sed: s/pattern/replacement/flags # d, p, q, a, i, y (transliterate) # line ranges: 1,5 /start/,/end/ # -n (suppress output) -i (in-place) # From awk: field splitting ($1 $2 $NF) # pattern { action } blocks # BEGIN { } and END { } # associative arrays # printf, getline # -F field separator # From sh: backtick execution `cmd` # string interpolation "hello $name" # here-docs <<EOF # file test operators -f -d -e -r -w # From C: control flow (if for while) # operators ++ -- += *= ? : # stdio concepts # Perl's own: context (list vs scalar) # references and complex data # tied variables, overloading # BEGIN/END/UNITCHECK/CHECK/INIT # source filters, XS extension # AUTOLOAD, DESTROY, tie # formats (a whole report language)
$_ when given no argument: print, chomp, chop, length, uc, lc, split, tr///, s///, m//. This is what makes Perl one-liners so compact — you never type the variable name unless you have to.
undef → slurp whole fileuse English qw(-no_match_vars); gives every special variable a human name: $/ → $INPUT_RECORD_SEPARATOR, $. → $NR (like awk's NR), $! → $OS_ERROR. The -no_match_vars avoids the catastrophic performance penalty that $`, $&, $' impose on ALL regex matches in a program.
-e evaluates code, -n wraps in a line-reading loop (like sed -n or awk's default), -p adds an automatic print, -i edits files in-place, -F sets the field separator. Master these five and you retire your sed and awk scripts.# -e 'code' execute code (multiple -e allowed) perl -e 'print "Hello\n"' # -n wrap in: while(<>) { ...code... } # reads stdin or files, does NOT print by default perl -ne 'print if /error/i' app.log # -p wrap in: while(<>) { ...code...; print } # like sed: always prints $_ after code perl -pe 's/foo/bar/g' file.txt # -i[ext] in-place edit (backup with extension) # -i.bak makes .bak backup # -i alone: no backup (dangerous but useful) perl -i.bak -pe 's/localhost/prod.db/g' config.ini # -F'sep' field separator (implies -an) # sets $, and splits $_ into @F perl -F',' -ane 'print $F[0], "\n"' data.csv # -a autosplit mode: splits $_ into @F # (used with -n or -p) perl -ane 'print $F[-1], "\n"' # last field # -l[oct] chomp each input line, set $\ to "\n" # so print automatically adds newline perl -lne 'print uc' # uppercase every line, chomp+print # -0[oct] input record separator as octal # -0777 = slurp (set $/ to undef) # -00 = paragraph mode (blank line = RS) perl -0777 -ne 's/\n\n+/\n/g; print' # collapse blank lines # -s enable $opt_x from -x command line flags # -M module use module (like -MData::Dumper) perl -MData::Dumper -e 'print Dumper \%ENV' perl -MPOSIX -e 'printf "%d\n", POSIX::floor(3.7)' perl -MList::Util=sum,max -e 'print sum(1..100)' # → 5050 # -w / use warnings warnings # -d debugger # -c compile-check only (no execute) perl -c script.pl
# BEGIN{} runs before the first line is read # END{} runs after all input is consumed # These work identically to awk's BEGIN/END # Count lines (like wc -l): perl -ne 'END{print "$.\n"}' # Sum a column (like awk '{sum+=$1} END{print sum}'): perl -ane '$s+=$F[0]; END{print "$s\n"}' # Print header and footer around output: perl -ne ' BEGIN { print "=== ERRORS ===\n" } print if /ERROR/ END { print "=== DONE ===\n" } ' app.log # Accumulate then process (can't do this in sed): perl -ne ' push @lines, $_ if /relevant/; END { my @sorted = sort { length($a) <=> length($b) } @lines; print for @sorted; } ' data.txt # Set $/ to slurp the whole file in BEGIN: perl -ne 'BEGIN{$/=undef} s/\n/ /g; print' file.txt # Better with -0777: perl -0777pe 's/\n/ /g' file.txt # Multiple -e flags — readable one-liners: perl -ne '' \ -e 'next unless /^ERROR/' \ -e 'chomp; push @e, $_' \ -e 'END { printf "%d errors\n%s\n", scalar @e, join "\n", @e }'
# -a splits $_ on whitespace into @F (like awk $1 $2 $NF) # -F'sep' sets the separator (regex or string) # Print 2nd and 5th field (like awk '{print $2,$5}'): perl -ane 'print "$F[1] $F[4]\n"' # Note: @F is 0-indexed; awk's $1 = $F[0] in Perl # Last field (like awk '{print $NF}'): perl -ane 'print "$F[-1]\n"' # CSV parsing with -F',': perl -F',' -ane 'print "$F[0],$F[2]\n" if $. > 1' data.csv # $. > 1 skips header line # Tab-separated (like TSV): perl -F'\t' -ane 'print join(",", @F)' # Multi-char or regex separator: perl -F'\s*:\s*' -ane 'print "$F[0]\n"' /etc/passwd # Rebuild line with modified fields: perl -F',' -ane ' $F[2] *= 1.1; # bump 3rd column by 10% print join(",", @F) '
# Delete blank lines: perl -ne 'print unless /^\s*$/' # Number lines (like nl / cat -n): perl -ne 'printf "%4d %s", $., $_' # Print lines 5–10 (like sed -n '5,10p'): perl -ne 'print if 5..10' # .. is the range operator — evaluates to true between matches! # Print lines between patterns (like sed '/START/,/END/p'): perl -ne 'print if /START/../END/' # Reverse lines of a file (like tac): perl -e 'print reverse <>' # Unique lines without sorting (like awk '!seen[$0]++'): perl -ne 'print unless $seen{$_}++' # Sort and unique simultaneously: perl -e 'my %s; $s{$_}++ for <>; print sort keys %s' # Print every Nth line (every 3rd line): perl -ne 'print if $. % 3 == 0' # Sum all numbers found anywhere in file: perl -nle '$s += $_ for /(\d+\.?\d*)/g; END{print $s}' # Word frequency count: perl -ne '$c{lc $_}++ for /(\w+)/g; END { printf "%6d %s\n", $c{$_}, $_ for sort {$c{$b}<=>$c{$a}} keys %c }' # ROT13 (classic): perl -pe 'y/A-Za-z/N-ZA-Mn-za-m/' # Base64 encode a file: perl -MMIME::Base64 -0777 -ne 'print encode_base64($_)' # URL decode a string: perl -MURI::Escape -le 'print uri_unescape($ARGV[0])' # Extract all URLs from HTML: perl -ne 'print "$1\n" while /href="([^"]+)"/gi' # JSON one-liners (with JSON::PP which is core): perl -MJSON::PP -0777ne ' my $d = decode_json($_); print "$_->{name}\n" for @{$d->{users}} '
# The Schwartzian Transform — decorate/sort/undecorate # Sort files by modification time (Perl's fastest way): my @sorted = map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [$_, (stat $_)[9]] } <*.log>; # Orcish Maneuver — memoize inside sort with ||= sort { ($cache{$a} ||= expensive($a)) <=> ($cache{$b} ||= expensive($b)) } @items; # wantarray — different behavior in list vs scalar context: sub context_aware { return wantarray ? (1,2,3) : 42; } my @list = context_aware(); # (1,2,3) my $scalar = context_aware(); # 42 # Local $/ for block-scoped slurp: my $content = do { local $/; # undef $/ in this scope only open my $fh, '<', $file or die; <$fh>; # slurps entire file }; # Typeglob aliasing — the dark art: *alias = \&original; # alias a sub *STDOUT = *STDERR; # redirect stdout to stderr local *ARGV; # localize the diamond operator # String repetition operator x: print "-" x 72, "\n"; # 72 dashes my @zeros = (0) x 100; # 100 zeros # Hash slice — extract multiple keys at once: my @vals = @hash{qw(name age email)}; @hash{qw(x y z)} = (1,2,3); # set multiple keys # Grep and map as real functions (not just filters): my @evens = grep { $_ % 2 == 0 } 1..20; my @sq = map { $_ ** 2 } 1..10; # sprintf for right-align, zero-pad, format: printf "%*d\n", 10, $n; # right-align in field width 10 printf "%-20s %5.2f\n", $name, $price;
\K keep operator, and the ability to execute arbitrary Perl code inside a regex match. We're going full depth.# /i case-insensitive # /g global (find all matches) # /m multiline (^ and $ match line boundaries) # /s single-line (. matches \n too) # /x extended (whitespace and # comments ignored) # /e evaluate replacement as Perl code (s/// only) # /r return modified copy, don't modify in-place (5.14+) # /a ASCII mode (restrict \w \d \s to ASCII) # /u Unicode semantics # /l locale semantics # /p preserve $`, $&, $' (slower) # /n no captures ($1..$9 not set, faster) # /xx double-extended: even spaces in char class ignored # /x is life-changing for complex patterns: if ($email =~ / \A # start of string ( # capture local part [^@\s]+ # one or more non-@ non-space ) @ # literal @ ( # capture domain [^@\s]+ # domain part \. # literal dot [a-z]{2,} # TLD ) \z # end of string /xi) { print "user=$1 domain=$2\n"; } # /e: substitute with code evaluation: $text =~ s/(\d+)/sprintf "%05d", $1/ge; # Zero-pads every number in $text to 5 digits $text =~ s/\bUC:(\w+)\b/uc($1)/ge; # "UC:hello" → "HELLO" # /ee: double-eval (evaluate result as Perl, then evaluate again) $tmpl =~ s/\{\{(\w+)\}\}/$vars{$1}/ge; # Simple template engine in one line # /r: non-destructive substitution (returns copy): my $clean = $dirty =~ s/[^\w\s]//gr; my @clean = map { (my $c = $_) =~ s/^\s+|\s+$//gr } @raw; # trim every element without destroying originals
# ── LOOKAHEAD ───────────────────────────────────────── (?=pattern) # positive lookahead: what follows matches (?!pattern) # negative lookahead: what follows does NOT match # Find "foo" only if followed by "bar": $s =~ /foo(?=bar)/ # Number not followed by a dot (integer detection): $s =~ /\d+(?!\.)/ # ── LOOKBEHIND ──────────────────────────────────────── (?<=pattern) # positive lookbehind: what precedes matches (?<!pattern) # negative lookbehind: what precedes does NOT match # Extract value after "Price: ": $s =~ /(?<=Price:\s)\d+\.\d+/ # ── \K: KEEP — the lookbehind shortcut ─────────────── # \K resets start of match — everything before \K is # matched but NOT included in $& or the capture # Extract version number after "version ": $s =~ /version\s+\K[\d.]+/i # cleaner than: /version\s+([\d.]+)/ with $1 # Delete everything AFTER a semicolon (keep before): $s =~ s/\K;.*$//s # ── NAMED CAPTURES ──────────────────────────────────── $s =~ /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/ print "$+{year}/$+{month}/$+{day}\n"; # %+ is the named captures hash # %- is the named captures hash (allows multiple same-name) # ── NON-CAPTURING GROUP ─────────────────────────────── (?:pattern) # group without capturing to $1 etc. $s =~ /(?:foo|bar)baz/ # match foobaz or barbaz, no capture # ── ATOMIC GROUP (possessive) ───────────────────────── (?>pattern) # no backtracking inside (atomic) # Prevents catastrophic backtracking on complex patterns $s =~ /(?>\w+):/ # grab all word chars, don't give back # ── CODE ASSERTIONS: (?{}) and (??{}) ───────────────── # (?{ code }): execute code during match, set $^R $str =~ /(\d+)(?{ $total += $1 })/g; # Accumulates into $total DURING the match # (??{ code }): interpolate result as regex pattern # Match balanced parentheses (classic example): my $balanced = qr/ \( (?: [^()]+ # non-parens | (??{ $balanced }) # recursive! )* \) /x; $str =~ /$balanced/; # matches ((a+(b*c))+(d))
# /g in LIST context: return all matches at once my @nums = $s =~ /(\d+)/g; # /g in SCALAR context: iterate (like Python re.finditer) while ($s =~ /(\w+)=(\w+)/g) { print "$1 => $2\n"; # pos($s) holds current position in string } # pos() and \G anchor: # \G matches at pos() — where last /g match left off while ($s =~ /\G\s*(\w+)/g) { print "token: $1 at pos=", pos($s), "\n"; } # Captures with /g in list context: my %pairs = $s =~ /(\w+)=(\w+)/g; # Captures alternate as key,value pairs → direct into hash # tr/// — transliterate (not a regex but critical): (my $count = $s) =~ tr/aeiou//; # count vowels $s =~ tr/a-z/A-Z/; # uppercase (like y///) $s =~ tr/\n//; # count newlines $s =~ tr/ //s; # squeeze runs of spaces $s =~ tr/a-zA-Z0-9//cd; # delete non-alphanumeric (c=complement, d=delete)
# Possessive quantifiers (Perl 5.10+): never backtrack ++ # possessive * (zero or more, no backtrack) ?+ # possessive ? (zero or one, no backtrack) ++ # possessive + (one or more, no backtrack) {2,5}+ # possessive counted (no backtrack) # ".*+" consumes everything, never gives back # Prevents catastrophic backtracking on long strings: $s =~ /^(?:[^"]++|"[^"]*+")*$/ # fast CSV-like check # Conditional pattern (?(cond)yes|no): $s =~ /(")?(?(1)[^"]+"|[^,]+)/ # If a " was captured: match quoted field # Otherwise: match unquoted field # Backreference in conditional: $s =~ /^(\[)?(?(1)[^\]]+\]|[^[\]]+)$/ # Named backreference: $s =~ /(?<q>["'])(?<body>.+?)\k<q>/ # \k<name> backreference to named capture # Matches either "quoted" or 'quoted' correctly # Embedded code for complex validation: $s =~ /(\d{1,3}(?:\.\d{1,3}){3})(?{ my @parts = split /\./, $1; die "not IPv4" if grep { $_ > 255 } @parts; })/x; # Regex stored in variables (qr// — compiled regex object): my $ipv4 = qr/(?:\d{1,3}\.){3}\d{1,3}/; my $date = qr/\d{4}-\d{2}-\d{2}/; $log =~ /$date\s+$ipv4/; # compose them # split() is regex-powered: my @f = split /\s*,\s*/, $s; # split on comma + optional spaces my @f = split /(?<=\d)(?=[A-Z])/, $s; # split between digit and uppercase my @f = split ' ', $s; # magic: like awk (trim + split on whitespace) my @f = split /,/, $s, 5; # max 5 fields
# sed 'address command' file # address: line number, /regex/, or range # command: s (sub), d (delete), p (print), # q (quit), a (append), i (insert), # y (transliterate), = (print line no) # Substitute: sed 's/foo/bar/' # first occurrence per line sed 's/foo/bar/g' # global (all occurrences) sed 's/foo/bar/i' # case-insensitive (GNU sed) sed 's/foo/bar/2' # 2nd occurrence only sed 's/foo/bar/gp' # replace + print (with -n) sed 's/\(foo\)/[\1]/' # backreference (BRE syntax) sed -E 's/(foo)/[\1]/' # ERE: no escaping needed # Delete: sed '/^#/d' # delete comment lines sed '/^$/d' # delete blank lines sed '5d' # delete line 5 sed '2,5d' # delete lines 2-5 sed '/START/,/END/d' # delete between patterns # Print (with -n to suppress auto-print): sed -n '5p' # print only line 5 sed -n '5,10p' # print lines 5-10 sed -n '/pattern/p' # print matching lines (like grep) sed -n '/START/,/END/p' # print between patterns # In-place editing: sed -i 's/old/new/g' f # GNU sed (no backup) sed -i.bak 's/old/new/g' f # BSD/macOS (with backup) # Append / Insert: sed '/pattern/a\new line' # append after matching line sed '/pattern/i\new line' # insert before matching line sed '$a\last line' # append at end of file # Transliterate: sed 'y/abc/ABC/' # like tr, char-by-char # Multiple commands: sed -e 's/foo/bar/' -e 's/baz/qux/' sed '/foo/{s/foo/bar/; s/x/y/}' # grouped # Quit after N lines (fast head -n): sed '5q' # print first 5 then quit
# sed 's/foo/bar/' → perl -pe 's/foo/bar/' # sed 's/foo/bar/g' → perl -pe 's/foo/bar/g' # sed 's/foo/bar/i' → perl -pe 's/foo/bar/gi' # sed -n '5p' → perl -ne 'print if $. == 5' # sed -n '5,10p' → perl -ne 'print if 5..10' # sed '/^#/d' → perl -ne 'print unless /^#/' # sed '/^$/d' → perl -ne 'print if /\S/' # sed -n '/START/,/END/p' → perl -ne 'print if /START/../END/' # sed '/START/,/END/d' → perl -ne 'print unless /START/../END/' # sed -i.bak 's/old/new/g' file → perl -i.bak -pe 's/old/new/g' file # sed 'y/abc/ABC/' → perl -pe 'tr/abc/ABC/' # sed '5q' → perl -pe 'exit if $. > 5' # sed '/pattern/a\text' → perl -pe 'print "text\n" if /pattern/' # sed '=' file (print line numbers) → perl -ne 'print "$.\n$_"' # WHERE PERL WINS DECISIVELY: # sed can't do: backreference math perl -pe 's/(\d+)/$1*2/e' # double every number # sed can't do: conditional replacement perl -pe 's/(\w+)/length($1)>5 ? uc($1) : $1/ge' # sed can't do: stateful processing perl -ne ' $n++ if /BEGIN/; $n-- if /END/; print if $n > 0; ' # sed can't do: lookbehind in replacement perl -pe 's/(?<=\d{4}-\d{2}-)\d{2}/XX/g'
$1 is Perl's $F[0] with -a.# awk 'pattern { action }' file # No pattern: always execute # No action: print the line # Built-in variables: $0 # entire current line $1..$NF # fields (NF = number of fields) FS # field separator (default: whitespace) OFS # output field separator RS # record separator (default: \n) ORS # output record separator NR # current record number (like Perl's $.) NF # number of fields in current record FILENAME # current filename FNR # record number within current file ARGC/ARGV # argument count/array # Basic examples: awk '{print $2}' # 2nd field awk '{print $NF}' # last field awk '{print $(NF-1)}' # second to last awk '{print NR": "$0}' # line numbers awk 'NR==5' # print line 5 awk 'NR>=5 && NR<=10' # lines 5-10 awk '/pattern/' # print matching lines awk '!/pattern/' # print non-matching awk '$3 > 100' # numeric comparison awk '$1 ~ /foo/' # field matches regex awk '-F,' '{print $1}' # CSV first field awk 'BEGIN{FS=","} {print $2}' # Aggregation: awk '{sum+=$1} END{print sum}' awk '{if($1>max)max=$1} END{print max}' # Associative arrays — awk's superpower: awk '{count[$1]++} END{for(k in count) print k, count[k]}' awk '{sum[$2]+=$3} END{for(k in sum) printf "%s: %.2f\n",k,sum[k]}' # Two-file join: awk 'NR==FNR{a[$1]=$2;next} $1 in a{print $0,a[$1]}' f1 f2 # Multi-char field separator (gawk): awk -v FS='::' '{print $1}' # printf formatting: awk '{printf "%-20s %8.2f\n", $1, $2}' # Modify and reprint a field: awk '{$2=$2*1.1; print}' # bump 2nd column, reprint awk 'BEGIN{OFS=","} {$1=$1; print}' # rebuild with new OFS
# awk '{print $2}' → perl -ane 'print "$F[1]\n"' # awk '{print $NF}' → perl -ane 'print "$F[-1]\n"' # awk '{print NR": "$0}' → perl -ne 'print "$.: $_"' # awk 'NR==5' → perl -ne 'print if $. == 5' # awk '$3 > 100' → perl -ane 'print if $F[2] > 100' # awk '$1 ~ /foo/' → perl -ane 'print if $F[0] =~ /foo/' # awk -F, '{print $1}' → perl -F',' -ane 'print "$F[0]\n"' # awk '{sum+=$1} END{print sum}' → perl -ane '$s+=$F[0]; END{print "$s\n"}' # awk '{count[$1]++} END{for(k in count)print k,count[k]}' → perl -ane '$c{$F[0]}++; END { printf "%s %d\n", $_, $c{$_} for keys %c }' # awk '{sum[$2]+=$3} END{...}' → perl -ane '$s{$F[1]}+=$F[2]; END { printf "%s: %.2f\n", $_, $s{$_} for sort keys %s }' # awk two-file join (NR==FNR trick) → perl -ane ' if ($ARGV eq "f1") { $h{$F[0]}=$F[1]; next } print "$_ $h{$F[0]}\n" if exists $h{$F[0]} ' f1 f2 # awk 'BEGIN{OFS=","} {$1=$1; print}' (rebuild with OFS) → perl -F'\s+' -ane 'print join(",", @F)' # WHERE PERL DEMOLISHES AWK: # awk can't do: complex data structures perl -ane 'push @{$g{$F[0]}}, $F[1]; END { for my $k (keys %g) { printf "%s: %s\n", $k, join(", ", @{$g{$k}}) }}' # awk can't do: regex-powered split with captures perl -ne 'my @f = split /\s*(?:,|;|\|)\s*/; ...' # awk can't do: module ecosystem perl -MCSV::PP -e '...' # proper CSV parsing perl -MNet::IP -e '...' # IP math
grep 'pattern' file # basic match grep -i 'pat' file # case-insensitive grep -v 'pat' file # invert (non-matching lines) grep -c 'pat' file # count matching lines grep -l 'pat' *.log # filenames with matches grep -L 'pat' *.log # filenames WITHOUT matches grep -n 'pat' file # with line numbers grep -r 'pat' dir/ # recursive grep -rl 'pat' dir/ # recursive, filenames only grep -A 3 'pat' # 3 lines After match grep -B 3 'pat' # 3 lines Before match grep -C 3 'pat' # 3 lines Context (both) grep -P '(?<=foo)bar' # PCRE (Perl-compatible!) grep -E 'a+|b+' # Extended regex grep -F 'literal.str' # Fixed string (no regex) grep -w 'word' # whole word match grep -x 'whole line' # whole line match grep -o 'pat' # only the match (not whole line) grep -m 5 'pat' # stop after 5 matches grep --include='*.py' -r 'pat' # filter by filename # Perl equivalents: perl -ne 'print if /pat/' # basic perl -ne 'print if /pat/i' # -i perl -ne 'print unless /pat/' # -v perl -ne '$c++ if /pat/; END{print "$c\n"}' # -c perl -ne 'print "$ARGV\n" if /pat/ and !$seen{$ARGV}++' # -l perl -ne 'print "$.: $_" if /pat/' # -n perl -ne 'print "$1\n" while /(\w+@\w+\.\w+)/g' # -o analog
# find equivalents using File::Find (core module) use File::Find; # find . -name '*.pl' → find(sub { print "$File::Find::name\n" if /\.pl$/ }, '.'); # find . -name '*.log' -mtime -7 → find(sub { return unless /\.log$/; my $age = (-M $_); # -M = days since modification print "$File::Find::name\n" if $age < 7; }, '.'); # find . -size +1M -type f → find(sub { return unless -f $_; print "$File::Find::name\n" if -s $_ > 1_048_576; }, '.'); # find . -name '*.conf' -exec grep -l 'debug' {} \; → find(sub { return unless /\.conf$/; open my $fh, '<', $_ or return; print "$File::Find::name\n" if grep {/debug/} <$fh>; }, '.'); # Path::Tiny — modern file operations (non-core but common): use Path::Tiny; my @files = path('.')->iterator({recurse=>1}); # File test operators — the -X operators: -f $_ # is a plain file -d $_ # is a directory -e $_ # exists -r $_ # readable -w $_ # writable -x $_ # executable -s $_ # file size in bytes -z $_ # zero size (empty) -M $_ # days since modification -A $_ # days since access -C $_ # days since inode change -T $_ # text file heuristic -B $_ # binary file heuristic -l $_ # is a symlink
#!/usr/bin/perl # Mini-ack: recursive grep with line numbers and context # Demonstrates File::Find + regex in production code use strict; use warnings; use File::Find; use Getopt::Long; my ($pattern, $ignore_case, $context) = ($ARGV[0], 0, 0); GetOptions('i' => \$ignore_case, 'C=i' => \$context); my $re = $ignore_case ? qr/$pattern/i : qr/$pattern/; find({ wanted => sub { return unless -f && -T; # text files only return if /\.git\//; # skip .git open my $fh, '<', $_ or return; my @lines = <$fh>; for my $i (0..$#lines) { next unless $lines[$i] =~ $re; my $fname = $File::Find::name; printf "\033[35m%s\033[0m:\033[32m%d\033[0m: %s", $fname, $i+1, $lines[$i]; } }, no_chdir => 1, }, @ARGV[1..$#ARGV] || '.');
ack — written entirely in Perl by Andy Lester — was the first "grep for programmers." It knows about VCS directories, file types, and uses Perl regex natively. ripgrep took ack's ideas and implemented them in Rust at hardware speed. But for one-off tasks that need scripting — regex + transformation + output formatting — Perl still beats them both.
# grep -P gives you PCRE — lookbehind, named captures # BUT: grep -P can't transform what it finds. # Perl can extract AND transform in one pass. # grep -oP: extract matches only (the closest to Perl): grep -oP '(?<=version:)\s*\K[\d.]+' *.yaml # Perl equivalent — AND increment the version: perl -i -pe 's/(?<=version:\s)([\d.]+)/ my @p=split\/\./,$1; $p[-1]++; join".",$@p /e' *.yaml # grep can't do: validate AND extract AND reformat perl -ne ' next unless /^(\d{4}-\d{2}-\d{2})\s+(\S+)\s+(.+)/; my ($date,$level,$msg) = ($1,$2,$3); next if $level eq "DEBUG"; printf "[%s] %-5s %s\n", $date, $level, substr($msg,0,80); ' app.log
# ── CREATING REFERENCES ─────────────────────────────── my $sref = \$scalar; # reference to scalar my $aref = \@array; # reference to array my $href = \%hash; # reference to hash my $cref = \⊂ # reference to subroutine # Anonymous constructors (the common pattern): my $aref = [1,2,3]; # anon array ref my $href = {a=>1,b=>2}; # anon hash ref my $cref = sub { $_[0] * 2 }; # anon code ref # ── DEREFERENCING ───────────────────────────────────── $$sref # deref scalar @$aref # deref array (all elements) %$href # deref hash $aref->[0] # element via arrow (preferred) $href->{key} # hash element via arrow $cref->(@args) # call coderef # ── COMPLEX STRUCTURES ──────────────────────────────── # Array of hashes (most common): my @users = ( { name => 'Alice', age => 32, roles => [qw(admin user)] }, { name => 'Bob', age => 28, roles => [qw(user)] }, ); print $users[0]{name}, "\n"; # Alice print $users[0]{roles}[0], "\n"; # admin # Hash of arrays: my %groups = ( admins => [qw(alice bob)], users => [qw(carol dave eve)], ); push @{$groups{admins}}, 'frank'; print scalar @{$groups{users}}; # 3 # Hash of hashes (two-level lookup): my %config = ( db => { host => 'localhost', port => 5432 }, app => { debug => 1, workers => 4 }, ); print $config{db}{host}, "\n"; $config{db}{port} = 5433;
# Closures — functions that capture lexical scope: sub make_counter { my $n = $_[0] || 0; return { inc => sub { $n++ }, dec => sub { $n-- }, reset => sub { $n = 0 }, value => sub { $n }, }; } my $c = make_counter(10); $c->{inc}->(); $c->{inc}->(); print $c->{value}->(), "\n"; # 12 # Dispatch table — replace if/elsif chains: my %dispatch = ( add => sub { $_[0] + $_[1] }, sub => sub { $_[0] - $_[1] }, mul => sub { $_[0] * $_[1] }, ); my $result = $dispatch{$op}->($a, $b) if exists $dispatch{$op}; # AUTOLOAD — catch calls to undefined methods: our $AUTOLOAD; sub AUTOLOAD { my $name = $AUTOLOAD; $name =~ s/.*:://; # strip package name return if $name eq 'DESTROY'; print "Called: $name with @_\n"; } # Chained string operations with intermediate variables # using the 'or die' idiom: open my $fh, '<', $file or die "Cannot open $file: $!"; # here-doc with interpolation: my $sql = <<SQL; SELECT $cols FROM $table WHERE $cond SQL # Indented heredoc (5.26+): my $html = <<~HTML; <div> <p>$content</p> </div> HTML
# ── HYPER-OPERATORS: apply to all elements ───────────── my @a = (1, 2, 3); my @b = (10, 20, 30); say @a »+« @b; # (11, 22, 33) — element-wise add say @a »* 2; # (2, 4, 6) — scalar apply say @a »** 2; # (1, 4, 9) — square all say (@a »+« @b) »* 2; # (22, 44, 66) # ── JUNCTIONS: superposition of values ──────────────── my $any_role = any("admin", "owner", "root"); if $user.role == $any_role { say "has privilege" } # any(), all(), none(), one() — quantum logic my $all = all(1, 2, 3); say 2 == $all; # False (not all are 2) say 0 < $all; # True (all are > 0) # ── SMARTMATCH: context-sensitive comparison ────────── say "hello" ~~ /ell/; # regex match say 42 ~~ (1..100); # range membership say (1,2,3) ~~ (1,2,3); # list equality given $x { when 1 { say "one" } when 2..10 { say "small" } when /foo/ { say "has foo" } default { say "other" } } # ── LAZY LISTS ──────────────────────────────────────── my @fib := (0, 1, * + * ... Inf); # infinite Fibonacci! say @fib[^10]; # first 10 terms say @fib.first(* > 100); # first term > 100 my @evens = (2, 4 ... Inf); # infinite even numbers my @primes = (2..*).grep(&is-prime); # infinite prime stream say @primes[^20]; # first 20 primes # ── RANGES + SUBSCRIPT ADVERBS ──────────────────────── my @a = 1..100; say @a[^5]; # first 5: (1,2,3,4,5) say @a[*-5..*-1]; # last 5 say @a[0,2...^@a]; # every 2nd element # ── FEW MORE WILD OPERATORS ─────────────────────────── say [+] 1..100; # reduce with + → 5050 say [*] 1..10; # reduce with * → 3628800 (10!) say [max] 3,1,4,1,5,9; # reduce with max → 9 say [~] "a","b","c"; # reduce with ~ (concat) → "abc"
# Raku grammars: PEG-based parsers as first-class syntax. # This is what Larry Wall always wanted. # Parse a simple arithmetic expression grammar: grammar ArithExpr { rule TOP { <expr> } rule expr { <term> (['+'|'-'] <term>)* } rule term { <factor> (['*'|'/'] <factor>)* } token factor { '(' <expr> ')' | <number> } token number { \d+ ('.' \d+)? } } class ArithActions { method TOP($/) { make $<expr>.made } method expr($/) { my $v = $<term>[0].made; for 1..$<term>.end -> $i { $v += $<term>[$i].made; } make $v; } # etc. } my $result = ArithExpr.parse( "(3+4)*2", actions => ArithActions.new ); say $result.made; # 14 # Raku regex superpowers: $str ~~ /\d+ '-' \d+/; # whitespace ignored in regex! $str ~~ /:i hello/; # :i modifier inline $str ~~ /(\w+) '=' <[0..9]>+/; # character classes $str ~~ /$<key>=(\w+)/; # named capture $say $/<key>; # access named capture
use v5.38 will outlive most of the systems it runs on. Raku is where you'd write a new system that needs to parse things.
use v5.36; # enables: strict, warnings, say, state, # fc, unicode_strings, and more # say — print with newline (finally core without use feature): say "Hello"; # state — variable initialized only once: sub counter { state $n = 0; return $n++; } # fc — case-folding (Unicode-correct case-insensitive compare): use feature 'fc'; if (fc($a) eq fc($b)) { ... } # Subroutine signatures (5.20+, stable in 5.36): sub greet ($name, $greeting = "Hello") { say "$greeting, $name!"; } greet("Alice"); greet("Bob", "Howdy"); # for/foreach with $_-free iteration (5.36): for my ($k, $v) (%hash) { # pairs directly! say "$k = $v"; } # Defer blocks (5.36) — like Go's defer: use feature 'defer'; open my $fh, '<', $file or die; defer { close $fh } # closes when scope exits # builtin:: namespace (5.36) — new builtins use builtin qw(true false is_bool weaken blessed reftype); say builtin::true == 1; # true (a proper boolean) say builtin::false == 0; # true say builtin::is_bool(true); # 1